HTTPX adalah klien HTTP Python modern dengan dukungan async dan HTTP/2. Panduan ini menunjukkan cara menggunakannya dengan CaptchaAI untuk penyelesaian CAPTCHA sinkronisasi dan asinkron.
Prasyarat
| Item | Detail |
|---|---|
| Python | 3.8+ |
| httpx | 0.24+ |
| API key CaptchaAI | Daftar di sini |
pip install httpx
Klien Sinkron
import httpx
import time
import os
class CaptchaAISync:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://ocr.captchaai.com"
self.client = httpx.Client(timeout=30)
def solve(self, params, timeout=300):
params["key"] = self.api_key
# Submit
resp = self.client.get(f"{self.base_url}/in.php", params=params)
text = resp.text
if not text.startswith("OK|"):
raise Exception(f"Submit failed: {text}")
task_id = text.split("|")[1]
# Poll
deadline = time.time() + timeout
poll_params = {"key": self.api_key, "action": "get", "id": task_id}
while time.time() < deadline:
time.sleep(5)
result = self.client.get(
f"{self.base_url}/res.php", params=poll_params
)
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(f"Solve failed: {result.text}")
raise TimeoutError(f"Task {task_id} timed out")
def get_balance(self):
resp = self.client.get(f"{self.base_url}/res.php", params={
"key": self.api_key, "action": "getbalance"
})
return float(resp.text)
def close(self):
self.client.close()
# Usage
solver = CaptchaAISync(os.environ["CAPTCHAAI_API_KEY"])
token = solver.solve({
"method": "userrecaptcha",
"googlekey": "6Le-wvkS...",
"pageurl": "https://example.com",
})
print(f"Token: {token[:50]}...")
solver.close()
Klien Asinkron
import httpx
import asyncio
import os
class CaptchaAIAsync:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://ocr.captchaai.com"
self.client = httpx.AsyncClient(timeout=30)
async def solve(self, params, timeout=300):
params["key"] = self.api_key
# Submit
resp = await self.client.get(
f"{self.base_url}/in.php", params=params
)
text = resp.text
if not text.startswith("OK|"):
raise Exception(f"Submit failed: {text}")
task_id = text.split("|")[1]
# Poll
deadline = asyncio.get_event_loop().time() + timeout
poll_params = {"key": self.api_key, "action": "get", "id": task_id}
while asyncio.get_event_loop().time() < deadline:
await asyncio.sleep(5)
result = await self.client.get(
f"{self.base_url}/res.php", params=poll_params
)
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(f"Solve failed: {result.text}")
raise TimeoutError(f"Task {task_id} timed out")
async def get_balance(self):
resp = await self.client.get(f"{self.base_url}/res.php", params={
"key": self.api_key, "action": "getbalance"
})
return float(resp.text)
async def close(self):
await self.client.aclose()
# Usage
async def main():
solver = CaptchaAIAsync(os.environ["CAPTCHAAI_API_KEY"])
# Solve multiple concurrently
tasks = [
solver.solve({
"method": "userrecaptcha",
"googlekey": "6Le-wvkS...",
"pageurl": f"https://example.com/page{i}",
})
for i in range(5)
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for i, r in enumerate(results):
if isinstance(r, Exception):
print(f"Page {i}: FAILED - {r}")
else:
print(f"Page {i}: solved ({len(r)} chars)")
await solver.close()
asyncio.run(main())
Dukungan HTTP/2
HTTPX mendukung HTTP/2, mengurangi overhead koneksi:
pip install httpx[http2]
client = httpx.AsyncClient(http2=True, timeout=30)
HTTP/2 meng-multiplex request melalui satu koneksi, meningkatkan performa saat submit dan poll beberapa CAPTCHA secara bersamaan.
Contoh Scraping dengan Penanganan CAPTCHA
import httpx
import re
import os
async def scrape_with_captcha(url, solver):
async with httpx.AsyncClient() as client:
# Fetch page
resp = await client.get(url)
html = resp.text
# Check for reCAPTCHA
match = re.search(
r'data-sitekey=["\']([A-Za-z0-9_-]+)["\']', html
)
if not match:
return html
site_key = match.group(1)
token = await solver.solve({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
# Submit form with token
resp = await client.post(url, data={
"g-recaptcha-response": token,
})
return resp.text
async def main():
solver = CaptchaAIAsync(os.environ["CAPTCHAAI_API_KEY"])
content = await scrape_with_captcha("https://example.com", solver)
print(f"Got {len(content)} chars")
await solver.close()
asyncio.run(main())
Perbandingan: httpx vs requests vs aiohttp
| Fitur | httpx (sync) | httpx (async) | requests | aiohttp |
|---|---|---|---|---|
| Dukungan async | ⏳ | ✅ | ⏳ | ✅ |
| HTTP/2 | ✅ | ✅ | ⏳ | ⏳ |
| Connection pooling | ✅ | ✅ | ✅ | ✅ |
| Kompatibilitas API | seperti requests | seperti requests | – | Berbeda |
| Terbaik untuk | Drop-in replacement | Kode async modern | Script cepat | Concurrency tinggi |
Pertanyaan Umum
Haruskah saya menggunakan httpx daripada requests?
Untuk proyek baru, ya. httpx memiliki API yang kompatibel dengan requests ditambah dukungan async dan HTTP/2. Untuk kode yang sudah ada yang menggunakan requests, migrasinya mudah.
Apakah httpx lebih cepat dari aiohttp?
aiohttp memiliki overhead sedikit lebih rendah untuk beban kerja async murni. httpx lebih cepat untuk koneksi HTTP/2 dan lebih nyaman untuk kode sync/async campuran.
Bisakah saya menggunakan httpx dengan Scrapy?
Tidak secara langsung — Scrapy menggunakan event loop Twisted. Gunakan httpx dalam script mandiri atau dengan framework berbasis asyncio seperti FastAPI.
Panduan Terkait
- aiohttp + CaptchaAI Async
- Axios + CaptchaAI (Tanpa Browser)
- Panduan Python Scraping CAPTCHA