Colly adalah framework web scraping Go yang populer. Berikut cara mengintegrasikan CaptchaAI untuk menangani CAPTCHA di scraper Go Anda.
Klien CaptchaAI di Go
package captchaai
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"time"
)
type Client struct {
APIKey string
HTTPClient *http.Client
}
type apiResponse struct {
Status int `json:"status"`
Request string `json:"request"`
}
func NewClient(apiKey string) *Client {
return &Client{
APIKey: apiKey,
HTTPClient: &http.Client{Timeout: 30 * time.Second},
}
}
func (c *Client) SolveRecaptchaV2(sitekey, pageurl string) (string, error) {
// Submit task
data := url.Values{
"key": {c.APIKey},
"method": {"userrecaptcha"},
"googlekey": {sitekey},
"pageurl": {pageurl},
"json": {"1"},
}
resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
if err != nil {
return "", fmt.Errorf("submit error: %w", err)
}
defer resp.Body.Close()
var result apiResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", fmt.Errorf("decode error: %w", err)
}
if result.Status != 1 {
return "", fmt.Errorf("submit failed: %s", result.Request)
}
taskID := result.Request
// Poll for result
time.Sleep(15 * time.Second)
for i := 0; i < 24; i++ {
pollURL := fmt.Sprintf(
"https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
c.APIKey, taskID,
)
resp, err := c.HTTPClient.Get(pollURL)
if err != nil {
time.Sleep(5 * time.Second)
continue
}
var pollResult apiResponse
json.NewDecoder(resp.Body).Decode(&pollResult)
resp.Body.Close()
if pollResult.Status == 1 {
return pollResult.Request, nil
}
if pollResult.Request != "CAPCHA_NOT_READY" {
return "", fmt.Errorf("solve error: %s", pollResult.Request)
}
time.Sleep(5 * time.Second)
}
return "", errors.New("solve timeout")
}
Integrasi Colly
package main
import (
"fmt"
"log"
"os"
"strings"
"github.com/gocolly/colly/v2"
)
func main() {
apiKey := os.Getenv("CAPTCHAAI_API_KEY")
solver := captchaai.NewClient(apiKey)
c := colly.NewCollector(
colly.AllowedDomains("example.com"),
colly.MaxDepth(2),
)
// Detect CAPTCHA pages
c.OnHTML("[data-sitekey]", func(e *colly.HTMLElement) {
sitekey := e.Attr("data-sitekey")
pageURL := e.Request.URL.String()
log.Printf("CAPTCHA detected on %s, solving...", pageURL)
token, err := solver.SolveRecaptchaV2(sitekey, pageURL)
if err != nil {
log.Printf("Solve failed: %v", err)
return
}
log.Printf("CAPTCHA solved, token length: %d", len(token))
// Post form with token
err = c.Post(pageURL, map[string]string{
"g-recaptcha-response": token,
})
if err != nil {
log.Printf("Form submit failed: %v", err)
}
})
// Extract data
c.OnHTML("table tr", func(e *colly.HTMLElement) {
cols := []string{}
e.ForEach("td", func(_ int, td *colly.HTMLElement) {
cols = append(cols, strings.TrimSpace(td.Text))
})
if len(cols) > 0 {
fmt.Printf("Row: %s\n", strings.Join(cols, " | "))
}
})
c.OnError(func(r *colly.Response, err error) {
log.Printf("Error %s: %v", r.Request.URL, err)
})
c.Visit("https://example.com/data")
}
Colly dengan Rate Limiting
package main
import (
"time"
"github.com/gocolly/colly/v2"
)
func main() {
c := colly.NewCollector()
// Rate limit: 1 request per 3 seconds per domain
c.Limit(&colly.LimitRule{
DomainGlob: "*",
Parallelism: 1,
Delay: 3 * time.Second,
RandomDelay: 2 * time.Second,
})
// ... CAPTCHA handling as above ...
c.Visit("https://example.com")
}
Turnstile Solving di Go
func (c *Client) SolveTurnstile(sitekey, pageurl string) (string, error) {
data := url.Values{
"key": {c.APIKey},
"method": {"turnstile"},
"sitekey": {sitekey},
"pageurl": {pageurl},
"json": {"1"},
}
resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
if err != nil {
return "", fmt.Errorf("submit error: %w", err)
}
defer resp.Body.Close()
var result apiResponse
json.NewDecoder(resp.Body).Decode(&result)
if result.Status != 1 {
return "", fmt.Errorf("submit failed: %s", result.Request)
}
// Poll (same as reCAPTCHA)
time.Sleep(5 * time.Second)
for i := 0; i < 20; i++ {
pollURL := fmt.Sprintf(
"https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
c.APIKey, result.Request,
)
resp, err := c.HTTPClient.Get(pollURL)
if err != nil {
time.Sleep(3 * time.Second)
continue
}
var pr apiResponse
json.NewDecoder(resp.Body).Decode(&pr)
resp.Body.Close()
if pr.Status == 1 {
return pr.Request, nil
}
if pr.Request != "CAPCHA_NOT_READY" {
return "", fmt.Errorf("error: %s", pr.Request)
}
time.Sleep(3 * time.Second)
}
return "", errors.New("timeout")
}
Pertanyaan Umum
Mengapa menggunakan Colly dibandingkan scraper Go lainnya?
Colly adalah framework Go scraping paling populer dengan built-in caching, rate limiting, dan concurrent request handling. Ini cocok dipadukan dengan HTTP API CaptchaAI.
Bisakah saya menggunakan Colly dengan headless browser?
Untuk halaman yang memerlukan rendering JavaScript, gunakan chromedp atau rod bersama Colly. Gunakan Colly untuk halaman statis dan headless browser untuk halaman dinamis yang dilindungi CAPTCHA.
Apakah API CaptchaAI kompatibel dengan Go?
Ya. CaptchaAI menggunakan endpoint HTTP standar yang berfungsi dengan package net/http Go. Tidak diperlukan SDK.
Panduan Terkait
- Integrasi Crawlee + CaptchaAI
- Cara Solve reCAPTCHA v2 dengan API CaptchaAI
Tambahkan CAPTCHA solving ke scraper Go Anda — dapatkan CaptchaAI.