Quickstart Guide

Get started with ScrapeHub in under 5 minutes

Prerequisites

  • An active ScrapeHub account
  • Your API key (found in Settings → API Keys)
  • Python 3.7+ or Node.js 14+ installed

Step 1: Get Your API Key

First, log in to your ScrapeHub dashboard and navigate to the API Keys section to generate your authentication token.

Terminal
# Your API key will look like this: sk_live_xxxx_449x

Step 2: Make Your First Request

Using Python

quickstart.py
python
import requests

url = "https://api.scrapehub.io/v4/scrape"

headers = {
    "X-API-KEY": "sk_live_xxxx_449x",
    "Content-Type": "application/json"
}

payload = {
    "target": "https://example.com/products",
    "engine": "neural-x1",
    "format": "json"
}

response = requests.post(url, json=payload, headers=headers)
data = response.json()

print(f"Job ID: {data['job_id']}")
print(f"Status: {data['status']}")
print(f"Results: {len(data['results'])} items extracted")

Using Node.js

quickstart.js
javascript
const axios = require('axios');

const scrape = async () => {
  const response = await axios.post(
    'https://api.scrapehub.io/v4/scrape',
    {
      target: 'https://example.com/products',
      engine: 'neural-x1',
      format: 'json'
    },
    {
      headers: {
        'X-API-KEY': 'sk_live_xxxx_449x',
        'Content-Type': 'application/json'
      }
    }
  );

  console.log('Job ID:', response.data.job_id);
  console.log('Status:', response.data.status);
  console.log('Results:', response.data.results.length, 'items extracted');
};

scrape();

Using cURL

Terminal
curl -X POST https://api.scrapehub.io/v4/scrape \ -H "X-API-KEY: sk_live_xxxx_449x" \ -H "Content-Type: application/json" \ -d '{ "target": "https://example.com/products", "engine": "neural-x1", "format": "json" }'

Step 3: Check Job Status

For long-running scrapes, you can poll the job status endpoint:

check_status.py
python
import requests
import time

job_id = "job_abc123"
url = f"https://api.scrapehub.io/v4/jobs/{job_id}"

headers = {
    "X-API-KEY": "sk_live_xxxx_449x"
}

while True:
    response = requests.get(url, headers=headers)
    data = response.json()

    if data['status'] == 'completed':
        print("Scrape completed!")
        print(f"Extracted {len(data['results'])} items")
        break
    elif data['status'] == 'failed':
        print("Scrape failed:", data['error'])
        break
    else:
        print(f"Status: {data['status']} ({data['progress']}%)")
        time.sleep(5)  # Wait 5 seconds before checking again

Step 4: Download Results

Export your scraped data in multiple formats:

# Download as JSON
response = requests.get(
    f"https://api.scrapehub.io/v4/jobs/{job_id}/export?format=json",
    headers=headers
)

with open('results.json', 'wb') as f:
    f.write(response.content)

# Download as CSV
response = requests.get(
    f"https://api.scrapehub.io/v4/jobs/{job_id}/export?format=csv",
    headers=headers
)

with open('results.csv', 'wb') as f:
    f.write(response.content)

Need Help?

Join our community or contact support if you run into any issues.