Example
async def main() -> None:
crawler = HttpCrawler()
# Define the default request handler, which will be called for every request.
@crawler.router.default_handler
async def request_handler(context: HttpCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')
response = context.http_response.read().decode('utf-8')
context.log.info(f'Response: {response}') # To see the response in the logs.
# Prepare a POST request to the form endpoint.
request = Request.from_url(
url='https://httpbin.org/post',
method='POST',
headers = {"content-type": "application/json"},
data={
'custname': 'John Doe',
'custtel': '1234567890',
'custemail': '[email protected]',
'size': 'large',
'topping': ['bacon', 'cheese', 'mushroom'],
'delivery': '13:00',
'comments': 'Please ring the doorbell upon arrival.',
},
)
await crawler.run([request])
Current response format
{
"args": {},
"data": "custname=John+Doe&custtel=1234567890&custemail=johndoe%40example.com&size=large&topping=bacon&topping=cheese&topping=mushroom&delivery=13%3A00&comments=Please+ring+the+doorbell+upon+arrival.",
"files": {},
"form": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"Content-Length": "190",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"X-Amzn-Trace-Id": "Root=1-66fc90cf-11644d4f5483e1096211b721"
},
"json": null,
"origin": "91.240.96.149",
"url": "https://httpbin.org/post"
}
Expected response format
{
"args": {},
"data": "{\"custname\": \"John Doe\", \"custtel\": \"1234567890\", \"custemail\": \"[email protected]\", \"size\": \"large\", \"topping\": [\"bacon\", \"cheese\", \"mushroom\"], \"delivery\": \"13:00\", \"comments\": \"Please ring the doorbell upon arrival.\"}",
"files": {},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Content-Length": "221",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "python-httpx/0.27.2",
"X-Amzn-Trace-Id": "Root=1-66fc91c2-6db9989347fef25b150615e2"
},
"json": {
"comments": "Please ring the doorbell upon arrival.",
"custemail": "[email protected]",
"custname": "John Doe",
"custtel": "1234567890",
"delivery": "13:00",
"size": "large",
"topping": [
"bacon",
"cheese",
"mushroom"
]
},
"origin": "91.240.96.149",
"url": "https://httpbin.org/post"
}
Both HTTPX and curl_impersonate allow creating a “POST” request with JSON payload, in two ways
data={
'custname': 'John Doe',
'custtel': '1234567890',
'custemail': '[email protected]',
'size': 'large',
'topping': ['bacon', 'cheese', 'mushroom'],
'delivery': '13:00',
'comments': 'Please ring the doorbell upon arrival.',
}
response = httpx.post(url, json=data)
response = httpx.post(url, data=json.dumps(data))
But we can't reproduce this behavior in Crawlee, because the json parameter is not passed when the request is created and the data parameter cannot be a string
Example
Current response format
{ "args": {}, "data": "custname=John+Doe&custtel=1234567890&custemail=johndoe%40example.com&size=large&topping=bacon&topping=cheese&topping=mushroom&delivery=13%3A00&comments=Please+ring+the+doorbell+upon+arrival.", "files": {}, "form": {}, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9", "Content-Length": "190", "Content-Type": "application/json", "Host": "httpbin.org", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", "X-Amzn-Trace-Id": "Root=1-66fc90cf-11644d4f5483e1096211b721" }, "json": null, "origin": "91.240.96.149", "url": "https://httpbin.org/post" }Expected response format
{ "args": {}, "data": "{\"custname\": \"John Doe\", \"custtel\": \"1234567890\", \"custemail\": \"[email protected]\", \"size\": \"large\", \"topping\": [\"bacon\", \"cheese\", \"mushroom\"], \"delivery\": \"13:00\", \"comments\": \"Please ring the doorbell upon arrival.\"}", "files": {}, "form": {}, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Content-Length": "221", "Content-Type": "application/json", "Host": "httpbin.org", "User-Agent": "python-httpx/0.27.2", "X-Amzn-Trace-Id": "Root=1-66fc91c2-6db9989347fef25b150615e2" }, "json": { "comments": "Please ring the doorbell upon arrival.", "custemail": "[email protected]", "custname": "John Doe", "custtel": "1234567890", "delivery": "13:00", "size": "large", "topping": [ "bacon", "cheese", "mushroom" ] }, "origin": "91.240.96.149", "url": "https://httpbin.org/post" }Both HTTPX and curl_impersonate allow creating a “POST” request with JSON payload, in two ways
But we can't reproduce this behavior in Crawlee, because the
jsonparameter is not passed when the request is created and thedataparameter cannot be a string