Go Integration
Integrate ScrapeHub into your Go applications
Installation
Terminalgo get github.com/scrapehub/scrapehub-go
Quick Start
main.go
package main
import (
"fmt"
"log"
"github.com/scrapehub/scrapehub-go"
)
func main() {
// Initialize client
client := scrapehub.NewClient("sk_live_xxxx_449x")
// Simple scrape
result, err := client.Scrape(&scrapehub.ScrapeConfig{
URL: "https://example.com/products",
Engine: "neural-x1",
})
if err != nil {
log.Fatal(err)
}
fmt.Printf("Extracted %d items\n", len(result.Data))
for _, item := range result.Data {
fmt.Println(item)
}
}Advanced Configuration
advanced.go
package main
import (
"context"
"fmt"
"log"
"time"
"github.com/scrapehub/scrapehub-go"
)
func main() {
// Initialize client with options
client := scrapehub.NewClient(
"sk_live_xxxx_449x",
scrapehub.WithTimeout(5 * time.Minute),
scrapehub.WithRetries(3),
)
// Advanced scraping with custom config
ctx := context.Background()
result, err := client.ScrapeWithContext(ctx, &scrapehub.ScrapeConfig{
URL: "https://example.com/products",
Engine: "neural-x1",
Format: "json",
// Pagination
Pagination: &scrapehub.PaginationConfig{
Enabled: true,
MaxPages: 10,
Selector: "a.next-page",
},
// Custom headers
Headers: map[string]string{
"User-Agent": "Mozilla/5.0...",
"Accept-Language": "en-US,en;q=0.9",
},
// JavaScript rendering
RenderJS: true,
WaitForSelector: ".product-list",
// Proxy settings
Proxy: &scrapehub.ProxyConfig{
Enabled: true,
Region: "us-east",
Residential: true,
},
})
if err != nil {
log.Fatal(err)
}
fmt.Printf("Status: %s\n", result.Status)
fmt.Printf("Pages scraped: %d\n", result.PagesScraped)
fmt.Printf("Items extracted: %d\n", len(result.Data))
fmt.Printf("Time taken: %.2fs\n", result.Duration)
}Concurrent Processing
concurrent.go
package main
import (
"context"
"fmt"
"sync"
"github.com/scrapehub/scrapehub-go"
)
func main() {
client := scrapehub.NewClient("sk_live_xxxx_449x")
urls := []string{
"https://example.com/category/1",
"https://example.com/category/2",
"https://example.com/category/3",
}
// Use WaitGroup for concurrency
var wg sync.WaitGroup
results := make(chan *scrapehub.ScrapeResult, len(urls))
for _, url := range urls {
wg.Add(1)
go func(u string) {
defer wg.Done()
result, err := client.Scrape(&scrapehub.ScrapeConfig{
URL: u,
Engine: "neural-x1",
})
if err != nil {
fmt.Printf("Error scraping %s: %v\n", u, err)
return
}
results <- result
}(url)
}
// Wait for all goroutines
go func() {
wg.Wait()
close(results)
}()
// Collect results
allItems := []map[string]interface{}{}
for result := range results {
allItems = append(allItems, result.Data...)
}
fmt.Printf("Total items extracted: %d\n", len(allItems))
}Job Management
jobs.go
package main
import (
"context"
"fmt"
"log"
"time"
"github.com/scrapehub/scrapehub-go"
)
func main() {
client := scrapehub.NewClient("sk_live_xxxx_449x")
ctx := context.Background()
// Start a scrape job (non-blocking)
job, err := client.CreateJob(ctx, &scrapehub.JobConfig{
URL: "https://example.com/large-dataset",
Engine: "neural-x1",
})
if err != nil {
log.Fatal(err)
}
fmt.Printf("Job ID: %s\n", job.ID)
fmt.Printf("Status: %s\n", job.Status)
// Poll job status
for {
job, err = client.GetJob(ctx, job.ID)
if err != nil {
log.Fatal(err)
}
if job.IsComplete() {
break
}
fmt.Printf("Progress: %d%%\n", job.Progress)
time.Sleep(5 * time.Second)
}
// Get results when complete
if job.IsSuccessful() {
results, err := job.GetResults()
if err != nil {
log.Fatal(err)
}
fmt.Printf("Extracted %d items\n", len(results))
} else {
fmt.Printf("Job failed: %s\n", job.ErrorMessage)
}
// List all jobs
jobs, err := client.ListJobs(ctx, &scrapehub.ListJobsOptions{
Limit: 10,
Status: "completed",
})
if err != nil {
log.Fatal(err)
}
for _, j := range jobs {
fmt.Printf("%s: %s - %s\n", j.ID, j.Status, j.CreatedAt)
}
}HTTP Handler Integration
server.go
package main
import (
"encoding/json"
"log"
"net/http"
"os"
"github.com/scrapehub/scrapehub-go"
)
type ScrapeRequest struct {
URL string `json:"url"`
Engine string `json:"engine"`
}
func main() {
client := scrapehub.NewClient(os.Getenv("SCRAPEHUB_API_KEY"))
http.HandleFunc("/api/scrape", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
var req ScrapeRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
result, err := client.Scrape(&scrapehub.ScrapeConfig{
URL: req.URL,
Engine: req.Engine,
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"success": true,
"itemCount": len(result.Data),
"data": result.Data,
})
})
log.Println("Server starting on :8080")
log.Fatal(http.ListenAndServe(":8080", nil))
}Webhook Handler
webhooks.go
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"github.com/scrapehub/scrapehub-go"
)
type WebhookPayload struct {
Event string `json:"event"`
JobID string `json:"job_id"`
}
func main() {
client := scrapehub.NewClient("sk_live_xxxx_449x")
// Create job with webhook
job, err := client.CreateJob(nil, &scrapehub.JobConfig{
URL: "https://example.com/products",
Engine: "neural-x1",
WebhookURL: "https://your-server.com/webhook",
})
if err != nil {
log.Fatal(err)
}
fmt.Printf("Job created: %s\n", job.ID)
// Webhook handler
http.HandleFunc("/webhook", func(w http.ResponseWriter, r *http.Request) {
var payload WebhookPayload
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
switch payload.Event {
case "job.completed":
fmt.Printf("Job %s completed!\n", payload.JobID)
// Fetch results
job, err := client.GetJob(nil, payload.JobID)
if err != nil {
log.Printf("Error fetching job: %v\n", err)
return
}
results, _ := job.GetResults()
fmt.Printf("Extracted %d items\n", len(results))
case "job.failed":
fmt.Printf("Job %s failed!\n", payload.JobID)
}
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]string{"status": "received"})
})
log.Fatal(http.ListenAndServe(":8080", nil))
}Error Handling
errors.go
package main
import (
"errors"
"fmt"
"log"
"github.com/scrapehub/scrapehub-go"
)
func main() {
client := scrapehub.NewClient("sk_live_xxxx_449x")
result, err := client.Scrape(&scrapehub.ScrapeConfig{
URL: "https://example.com",
})
if err != nil {
var authErr *scrapehub.AuthenticationError
var rateLimitErr *scrapehub.RateLimitError
var invalidReqErr *scrapehub.InvalidRequestError
var scraperErr *scrapehub.ScraperError
switch {
case errors.As(err, &authErr):
fmt.Println("Authentication failed:", err)
fmt.Println("Check your API key")
case errors.As(err, &rateLimitErr):
fmt.Println("Rate limit exceeded:", err)
fmt.Printf("Retry after: %d seconds\n", rateLimitErr.RetryAfter)
case errors.As(err, &invalidReqErr):
fmt.Println("Invalid request:", err)
fmt.Println("Error details:", invalidReqErr.Details)
case errors.As(err, &scraperErr):
fmt.Println("Scraper failed:", err)
fmt.Println("Target URL:", scraperErr.URL)
fmt.Println("Error code:", scraperErr.Code)
default:
fmt.Println("Unexpected error:", err)
}
log.Fatal(err)
}
fmt.Printf("Success! Extracted %d items\n", len(result.Data))
}Testing
scraper_test.go
package main
import (
"testing"
"github.com/scrapehub/scrapehub-go"
"github.com/scrapehub/scrapehub-go/mock"
"github.com/stretchr/testify/assert"
)
func TestScrapeProducts(t *testing.T) {
// Create mock client
mockClient := mock.NewClient()
// Set up mock response
mockClient.OnScrape(&scrapehub.ScrapeConfig{
URL: "https://example.com/products",
Engine: "neural-x1",
}).Return(&scrapehub.ScrapeResult{
Data: []map[string]interface{}{
{"name": "Product 1", "price": 29.99},
{"name": "Product 2", "price": 39.99},
},
Status: "completed",
}, nil)
// Test scraping
result, err := mockClient.Scrape(&scrapehub.ScrapeConfig{
URL: "https://example.com/products",
Engine: "neural-x1",
})
assert.NoError(t, err)
assert.Equal(t, "completed", result.Status)
assert.Len(t, result.Data, 2)
}
func TestScrapeError(t *testing.T) {
mockClient := mock.NewClient()
mockClient.OnScrape(&scrapehub.ScrapeConfig{
URL: "https://example.com/invalid",
}).Return(nil, &scrapehub.ScraperError{
Message: "Failed to scrape",
URL: "https://example.com/invalid",
})
_, err := mockClient.Scrape(&scrapehub.ScrapeConfig{
URL: "https://example.com/invalid",
})
assert.Error(t, err)
assert.IsType(t, &scrapehub.ScraperError{}, err)
}