aiohttp memungkinkan permintaan HTTP non-pemblokiran dengan Python. Gabungkan dengan CaptchaAI untuk menyelesaikan beberapa CAPTCHA secara bersamaan tanpa memblokir event loop Anda.
Persyaratan
| Persyaratan | Detail |
|---|---|
| Python | 3.8+ |
| aiohttp | 3.8+ |
| Kunci API CaptchaAI | Dapatkan satu di sini |
pip install aiohttp
Klien Async CaptchaAI
import aiohttp
import asyncio
class AsyncCaptchaAI:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://ocr.captchaai.com"
async def submit(self, session, params):
"""Submit a CAPTCHA task and return the task ID."""
params["key"] = self.api_key
async with session.get(
f"{self.base_url}/in.php", params=params
) as resp:
text = await resp.text()
if not text.startswith("OK|"):
raise Exception(f"Submit failed: {text}")
return text.split("|")[1]
async def poll(self, session, task_id, timeout=300):
"""Poll for the result with a timeout."""
params = {
"key": self.api_key,
"action": "get",
"id": task_id,
}
deadline = asyncio.get_event_loop().time() + timeout
while asyncio.get_event_loop().time() < deadline:
await asyncio.sleep(5)
async with session.get(
f"{self.base_url}/res.php", params=params
) as resp:
text = await resp.text()
if text == "CAPCHA_NOT_READY":
continue
if text.startswith("OK|"):
return text.split("|", 1)[1]
raise Exception(f"Solve failed: {text}")
raise TimeoutError(f"Task {task_id} timed out after {timeout}s")
async def solve(self, session, params, timeout=300):
"""Submit and poll in one call."""
task_id = await self.submit(session, params)
return await self.poll(session, task_id, timeout)
async def get_balance(self, session):
"""Check account balance."""
params = {"key": self.api_key, "action": "getbalance"}
async with session.get(
f"{self.base_url}/res.php", params=params
) as resp:
return float(await resp.text())
Pecahkan CAPTCHA Tunggal
import asyncio
import os
async def main():
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
# Check balance
balance = await solver.get_balance(session)
print(f"Balance: ${balance:.2f}")
# Solve reCAPTCHA v2
token = await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": "6Le-wvkS...",
"pageurl": "https://example.com",
})
print(f"Token: {token[:50]}...")
asyncio.run(main())
Selesaikan Beberapa CAPTCHA Secara Bersamaan
async def solve_batch(urls, site_key):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
tasks = [
solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
for url in urls
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for url, result in zip(urls, results):
if isinstance(result, Exception):
print(f"FAILED {url}: {result}")
else:
print(f"SOLVED {url}: {len(result)} chars")
return results
urls = [
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3",
"https://example.com/page4",
"https://example.com/page5",
]
asyncio.run(solve_batch(urls, "6Le-wvkS..."))
Scraping dengan Penanganan CAPTCHA
async def scrape_with_captcha(url, site_key):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
# Fetch the page
async with session.get(url) as resp:
html = await resp.text()
# Check if page has a CAPTCHA
if "g-recaptcha" not in html:
return html # No CAPTCHA, return content
# Solve the CAPTCHA
token = await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
# Submit with solved token
async with session.post(url, data={
"g-recaptcha-response": token,
}) as resp:
return await resp.text()
Semaphore untuk Kontrol Kecepatan
Batasi penyelesaian secara bersamaan untuk menghindari API kewalahan:
async def solve_with_limit(urls, site_key, max_concurrent=10):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
semaphore = asyncio.Semaphore(max_concurrent)
async def solve_one(session, url):
async with semaphore:
return await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
async with aiohttp.ClientSession() as session:
tasks = [solve_one(session, url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
solved = sum(1 for r in results if not isinstance(r, Exception))
print(f"Solved {solved}/{len(urls)} CAPTCHAs")
return results
Contoh Turnstile
async def solve_turnstile(url, sitekey):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
token = await solver.solve(session, {
"method": "turnstile",
"sitekey": sitekey,
"pageurl": url,
})
return token
Pemecahan Masalah
| Kesalahan | Penyebab | Solusi |
|---|---|---|
ClientConnectorError |
Masalah jaringan | Periksa konektivitas |
Submit failed: ERROR_ZERO_BALANCE |
Tidak ada dana | Isi ulang akun |
TimeoutError |
Penyelesaian yang lambat | Tingkatkan parameter batas waktu |
RuntimeError: Event loop is closed |
Menggunakan asyncio.run di Jupyter |
Gunakan nest_asyncio |
Pertanyaan Umum
Mengapa aiohttp bukannya httpx?
aiohttp adalah pustaka HTTP async paling matang untuk Python dengan performa terbaik untuk beban kerja konkurensi tinggi. httpx juga berfungsi – lihat panduan integrasi httpx.
Berapa banyak penyelesaian bersamaan yang dapat saya jalankan?
CaptchaAI menangani 100+ permintaan bersamaan. Gunakan semaphore untuk mengontrol konkurensi Anda berdasarkan kebutuhan dan keseimbangan Anda.
Bisakah saya menggunakan kembali sesi ini untuk beberapa penyelesaian?
Ya, dan Anda harus melakukannya. Sesi aiohttp memelihara kumpulan koneksi, membuat permintaan berikutnya lebih cepat.
Panduan Terkait
- Integrasi HTTPX + CaptchaAI
- Pemecahan CAPTCHA Paralel
- Integrasi Scrapy + CaptchaAI