Integrasi

aiohttp + CaptchaAI: Pemecahan CAPTCHA Asinkron

aiohttp memungkinkan permintaan HTTP non-pemblokiran dengan Python. Gabungkan dengan CaptchaAI untuk menyelesaikan beberapa CAPTCHA secara bersamaan tanpa memblokir event loop Anda.

Persyaratan

Persyaratan Detail
Python 3.8+
aiohttp 3.8+
Kunci API CaptchaAI Dapatkan satu di sini
pip install aiohttp

Klien Async CaptchaAI

import aiohttp
import asyncio


class AsyncCaptchaAI:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://ocr.captchaai.com"

    async def submit(self, session, params):
        """Submit a CAPTCHA task and return the task ID."""
        params["key"] = self.api_key
        async with session.get(
            f"{self.base_url}/in.php", params=params
        ) as resp:
            text = await resp.text()

        if not text.startswith("OK|"):
            raise Exception(f"Submit failed: {text}")

        return text.split("|")[1]

    async def poll(self, session, task_id, timeout=300):
        """Poll for the result with a timeout."""
        params = {
            "key": self.api_key,
            "action": "get",
            "id": task_id,
        }
        deadline = asyncio.get_event_loop().time() + timeout

        while asyncio.get_event_loop().time() < deadline:
            await asyncio.sleep(5)

            async with session.get(
                f"{self.base_url}/res.php", params=params
            ) as resp:
                text = await resp.text()

            if text == "CAPCHA_NOT_READY":
                continue
            if text.startswith("OK|"):
                return text.split("|", 1)[1]
            raise Exception(f"Solve failed: {text}")

        raise TimeoutError(f"Task {task_id} timed out after {timeout}s")

    async def solve(self, session, params, timeout=300):
        """Submit and poll in one call."""
        task_id = await self.submit(session, params)
        return await self.poll(session, task_id, timeout)

    async def get_balance(self, session):
        """Check account balance."""
        params = {"key": self.api_key, "action": "getbalance"}
        async with session.get(
            f"{self.base_url}/res.php", params=params
        ) as resp:
            return float(await resp.text())

Pecahkan CAPTCHA Tunggal

import asyncio
import os

async def main():
    solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])

    async with aiohttp.ClientSession() as session:
        # Check balance
        balance = await solver.get_balance(session)
        print(f"Balance: ${balance:.2f}")

        # Solve reCAPTCHA v2
        token = await solver.solve(session, {
            "method": "userrecaptcha",
            "googlekey": "6Le-wvkS...",
            "pageurl": "https://example.com",
        })
        print(f"Token: {token[:50]}...")

asyncio.run(main())

Selesaikan Beberapa CAPTCHA Secara Bersamaan

async def solve_batch(urls, site_key):
    solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])

    async with aiohttp.ClientSession() as session:
        tasks = [
            solver.solve(session, {
                "method": "userrecaptcha",
                "googlekey": site_key,
                "pageurl": url,
            })
            for url in urls
        ]

        results = await asyncio.gather(*tasks, return_exceptions=True)

        for url, result in zip(urls, results):
            if isinstance(result, Exception):
                print(f"FAILED {url}: {result}")
            else:
                print(f"SOLVED {url}: {len(result)} chars")

        return results


urls = [
    "https://example.com/page1",
    "https://example.com/page2",
    "https://example.com/page3",
    "https://example.com/page4",
    "https://example.com/page5",
]
asyncio.run(solve_batch(urls, "6Le-wvkS..."))

Scraping dengan Penanganan CAPTCHA

async def scrape_with_captcha(url, site_key):
    solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])

    async with aiohttp.ClientSession() as session:
        # Fetch the page
        async with session.get(url) as resp:
            html = await resp.text()

        # Check if page has a CAPTCHA
        if "g-recaptcha" not in html:
            return html  # No CAPTCHA, return content

        # Solve the CAPTCHA
        token = await solver.solve(session, {
            "method": "userrecaptcha",
            "googlekey": site_key,
            "pageurl": url,
        })

        # Submit with solved token
        async with session.post(url, data={
            "g-recaptcha-response": token,
        }) as resp:
            return await resp.text()

Semaphore untuk Kontrol Kecepatan

Batasi penyelesaian secara bersamaan untuk menghindari API kewalahan:

async def solve_with_limit(urls, site_key, max_concurrent=10):
    solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
    semaphore = asyncio.Semaphore(max_concurrent)

    async def solve_one(session, url):
        async with semaphore:
            return await solver.solve(session, {
                "method": "userrecaptcha",
                "googlekey": site_key,
                "pageurl": url,
            })

    async with aiohttp.ClientSession() as session:
        tasks = [solve_one(session, url) for url in urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)

    solved = sum(1 for r in results if not isinstance(r, Exception))
    print(f"Solved {solved}/{len(urls)} CAPTCHAs")
    return results

Contoh Turnstile

async def solve_turnstile(url, sitekey):
    solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])

    async with aiohttp.ClientSession() as session:
        token = await solver.solve(session, {
            "method": "turnstile",
            "sitekey": sitekey,
            "pageurl": url,
        })
        return token

Pemecahan Masalah

Kesalahan Penyebab Solusi
ClientConnectorError Masalah jaringan Periksa konektivitas
Submit failed: ERROR_ZERO_BALANCE Tidak ada dana Isi ulang akun
TimeoutError Penyelesaian yang lambat Tingkatkan parameter batas waktu
RuntimeError: Event loop is closed Menggunakan asyncio.run di Jupyter Gunakan nest_asyncio

Pertanyaan Umum

Mengapa aiohttp bukannya httpx?

aiohttp adalah pustaka HTTP async paling matang untuk Python dengan performa terbaik untuk beban kerja konkurensi tinggi. httpx juga berfungsi – lihat panduan integrasi httpx.

Berapa banyak penyelesaian bersamaan yang dapat saya jalankan?

CaptchaAI menangani 100+ permintaan bersamaan. Gunakan semaphore untuk mengontrol konkurensi Anda berdasarkan kebutuhan dan keseimbangan Anda.

Bisakah saya menggunakan kembali sesi ini untuk beberapa penyelesaian?

Ya, dan Anda harus melakukannya. Sesi aiohttp memelihara kumpulan koneksi, membuat permintaan berikutnya lebih cepat.

Panduan Terkait

  • Integrasi HTTPX + CaptchaAI
  • Pemecahan CAPTCHA Paralel
  • Integrasi Scrapy + CaptchaAI
Komentar dinonaktifkan untuk artikel ini.