Go Integration

Integrate ScrapeHub into your Go applications

Installation

Terminal
go get github.com/scrapehub/scrapehub-go

Quick Start

main.go

package main

import (
    "fmt"
    "log"

    "github.com/scrapehub/scrapehub-go"
)

func main() {
    // Initialize client
    client := scrapehub.NewClient("sk_live_xxxx_449x")

    // Simple scrape
    result, err := client.Scrape(&scrapehub.ScrapeConfig{
        URL:    "https://example.com/products",
        Engine: "neural-x1",
    })
    if err != nil {
        log.Fatal(err)
    }

    fmt.Printf("Extracted %d items\n", len(result.Data))
    for _, item := range result.Data {
        fmt.Println(item)
    }
}

Advanced Configuration

advanced.go

package main

import (
    "context"
    "fmt"
    "log"
    "time"

    "github.com/scrapehub/scrapehub-go"
)

func main() {
    // Initialize client with options
    client := scrapehub.NewClient(
        "sk_live_xxxx_449x",
        scrapehub.WithTimeout(5 * time.Minute),
        scrapehub.WithRetries(3),
    )

    // Advanced scraping with custom config
    ctx := context.Background()
    result, err := client.ScrapeWithContext(ctx, &scrapehub.ScrapeConfig{
        URL:    "https://example.com/products",
        Engine: "neural-x1",
        Format: "json",

        // Pagination
        Pagination: &scrapehub.PaginationConfig{
            Enabled:  true,
            MaxPages: 10,
            Selector: "a.next-page",
        },

        // Custom headers
        Headers: map[string]string{
            "User-Agent":      "Mozilla/5.0...",
            "Accept-Language": "en-US,en;q=0.9",
        },

        // JavaScript rendering
        RenderJS:          true,
        WaitForSelector:   ".product-list",

        // Proxy settings
        Proxy: &scrapehub.ProxyConfig{
            Enabled:     true,
            Region:      "us-east",
            Residential: true,
        },
    })
    if err != nil {
        log.Fatal(err)
    }

    fmt.Printf("Status: %s\n", result.Status)
    fmt.Printf("Pages scraped: %d\n", result.PagesScraped)
    fmt.Printf("Items extracted: %d\n", len(result.Data))
    fmt.Printf("Time taken: %.2fs\n", result.Duration)
}

Concurrent Processing

concurrent.go

package main

import (
    "context"
    "fmt"
    "sync"

    "github.com/scrapehub/scrapehub-go"
)

func main() {
    client := scrapehub.NewClient("sk_live_xxxx_449x")

    urls := []string{
        "https://example.com/category/1",
        "https://example.com/category/2",
        "https://example.com/category/3",
    }

    // Use WaitGroup for concurrency
    var wg sync.WaitGroup
    results := make(chan *scrapehub.ScrapeResult, len(urls))

    for _, url := range urls {
        wg.Add(1)
        go func(u string) {
            defer wg.Done()

            result, err := client.Scrape(&scrapehub.ScrapeConfig{
                URL:    u,
                Engine: "neural-x1",
            })
            if err != nil {
                fmt.Printf("Error scraping %s: %v\n", u, err)
                return
            }

            results <- result
        }(url)
    }

    // Wait for all goroutines
    go func() {
        wg.Wait()
        close(results)
    }()

    // Collect results
    allItems := []map[string]interface{}{}
    for result := range results {
        allItems = append(allItems, result.Data...)
    }

    fmt.Printf("Total items extracted: %d\n", len(allItems))
}

Job Management

jobs.go

package main

import (
    "context"
    "fmt"
    "log"
    "time"

    "github.com/scrapehub/scrapehub-go"
)

func main() {
    client := scrapehub.NewClient("sk_live_xxxx_449x")
    ctx := context.Background()

    // Start a scrape job (non-blocking)
    job, err := client.CreateJob(ctx, &scrapehub.JobConfig{
        URL:    "https://example.com/large-dataset",
        Engine: "neural-x1",
    })
    if err != nil {
        log.Fatal(err)
    }

    fmt.Printf("Job ID: %s\n", job.ID)
    fmt.Printf("Status: %s\n", job.Status)

    // Poll job status
    for {
        job, err = client.GetJob(ctx, job.ID)
        if err != nil {
            log.Fatal(err)
        }

        if job.IsComplete() {
            break
        }

        fmt.Printf("Progress: %d%%\n", job.Progress)
        time.Sleep(5 * time.Second)
    }

    // Get results when complete
    if job.IsSuccessful() {
        results, err := job.GetResults()
        if err != nil {
            log.Fatal(err)
        }
        fmt.Printf("Extracted %d items\n", len(results))
    } else {
        fmt.Printf("Job failed: %s\n", job.ErrorMessage)
    }

    // List all jobs
    jobs, err := client.ListJobs(ctx, &scrapehub.ListJobsOptions{
        Limit:  10,
        Status: "completed",
    })
    if err != nil {
        log.Fatal(err)
    }

    for _, j := range jobs {
        fmt.Printf("%s: %s - %s\n", j.ID, j.Status, j.CreatedAt)
    }
}

HTTP Handler Integration

server.go

package main

import (
    "encoding/json"
    "log"
    "net/http"
    "os"

    "github.com/scrapehub/scrapehub-go"
)

type ScrapeRequest struct {
    URL    string `json:"url"`
    Engine string `json:"engine"`
}

func main() {
    client := scrapehub.NewClient(os.Getenv("SCRAPEHUB_API_KEY"))

    http.HandleFunc("/api/scrape", func(w http.ResponseWriter, r *http.Request) {
        if r.Method != http.MethodPost {
            http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
            return
        }

        var req ScrapeRequest
        if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
            http.Error(w, err.Error(), http.StatusBadRequest)
            return
        }

        result, err := client.Scrape(&scrapehub.ScrapeConfig{
            URL:    req.URL,
            Engine: req.Engine,
        })
        if err != nil {
            http.Error(w, err.Error(), http.StatusInternalServerError)
            return
        }

        w.Header().Set("Content-Type", "application/json")
        json.NewEncoder(w).Encode(map[string]interface{}{
            "success":   true,
            "itemCount": len(result.Data),
            "data":      result.Data,
        })
    })

    log.Println("Server starting on :8080")
    log.Fatal(http.ListenAndServe(":8080", nil))
}

Webhook Handler

webhooks.go

package main

import (
    "encoding/json"
    "fmt"
    "log"
    "net/http"

    "github.com/scrapehub/scrapehub-go"
)

type WebhookPayload struct {
    Event string `json:"event"`
    JobID string `json:"job_id"`
}

func main() {
    client := scrapehub.NewClient("sk_live_xxxx_449x")

    // Create job with webhook
    job, err := client.CreateJob(nil, &scrapehub.JobConfig{
        URL:        "https://example.com/products",
        Engine:     "neural-x1",
        WebhookURL: "https://your-server.com/webhook",
    })
    if err != nil {
        log.Fatal(err)
    }

    fmt.Printf("Job created: %s\n", job.ID)

    // Webhook handler
    http.HandleFunc("/webhook", func(w http.ResponseWriter, r *http.Request) {
        var payload WebhookPayload
        if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
            http.Error(w, err.Error(), http.StatusBadRequest)
            return
        }

        switch payload.Event {
        case "job.completed":
            fmt.Printf("Job %s completed!\n", payload.JobID)

            // Fetch results
            job, err := client.GetJob(nil, payload.JobID)
            if err != nil {
                log.Printf("Error fetching job: %v\n", err)
                return
            }

            results, _ := job.GetResults()
            fmt.Printf("Extracted %d items\n", len(results))

        case "job.failed":
            fmt.Printf("Job %s failed!\n", payload.JobID)
        }

        w.WriteHeader(http.StatusOK)
        json.NewEncoder(w).Encode(map[string]string{"status": "received"})
    })

    log.Fatal(http.ListenAndServe(":8080", nil))
}

Error Handling

errors.go

package main

import (
    "errors"
    "fmt"
    "log"

    "github.com/scrapehub/scrapehub-go"
)

func main() {
    client := scrapehub.NewClient("sk_live_xxxx_449x")

    result, err := client.Scrape(&scrapehub.ScrapeConfig{
        URL: "https://example.com",
    })

    if err != nil {
        var authErr *scrapehub.AuthenticationError
        var rateLimitErr *scrapehub.RateLimitError
        var invalidReqErr *scrapehub.InvalidRequestError
        var scraperErr *scrapehub.ScraperError

        switch {
        case errors.As(err, &authErr):
            fmt.Println("Authentication failed:", err)
            fmt.Println("Check your API key")

        case errors.As(err, &rateLimitErr):
            fmt.Println("Rate limit exceeded:", err)
            fmt.Printf("Retry after: %d seconds\n", rateLimitErr.RetryAfter)

        case errors.As(err, &invalidReqErr):
            fmt.Println("Invalid request:", err)
            fmt.Println("Error details:", invalidReqErr.Details)

        case errors.As(err, &scraperErr):
            fmt.Println("Scraper failed:", err)
            fmt.Println("Target URL:", scraperErr.URL)
            fmt.Println("Error code:", scraperErr.Code)

        default:
            fmt.Println("Unexpected error:", err)
        }

        log.Fatal(err)
    }

    fmt.Printf("Success! Extracted %d items\n", len(result.Data))
}

Testing

scraper_test.go

package main

import (
    "testing"

    "github.com/scrapehub/scrapehub-go"
    "github.com/scrapehub/scrapehub-go/mock"
    "github.com/stretchr/testify/assert"
)

func TestScrapeProducts(t *testing.T) {
    // Create mock client
    mockClient := mock.NewClient()

    // Set up mock response
    mockClient.OnScrape(&scrapehub.ScrapeConfig{
        URL:    "https://example.com/products",
        Engine: "neural-x1",
    }).Return(&scrapehub.ScrapeResult{
        Data: []map[string]interface{}{
            {"name": "Product 1", "price": 29.99},
            {"name": "Product 2", "price": 39.99},
        },
        Status: "completed",
    }, nil)

    // Test scraping
    result, err := mockClient.Scrape(&scrapehub.ScrapeConfig{
        URL:    "https://example.com/products",
        Engine: "neural-x1",
    })

    assert.NoError(t, err)
    assert.Equal(t, "completed", result.Status)
    assert.Len(t, result.Data, 2)
}

func TestScrapeError(t *testing.T) {
    mockClient := mock.NewClient()

    mockClient.OnScrape(&scrapehub.ScrapeConfig{
        URL: "https://example.com/invalid",
    }).Return(nil, &scrapehub.ScraperError{
        Message: "Failed to scrape",
        URL:     "https://example.com/invalid",
    })

    _, err := mockClient.Scrape(&scrapehub.ScrapeConfig{
        URL: "https://example.com/invalid",
    })

    assert.Error(t, err)
    assert.IsType(t, &scrapehub.ScraperError{}, err)
}

API Reference

For complete API documentation, visit our Go package reference:

View Go Package Docs →