Node.js Integration

Integrate ScrapeHub into your Node.js applications

Installation

Terminal
# Using npm npm install @scrapehub/node # Using yarn yarn add @scrapehub/node # Using pnpm pnpm add @scrapehub/node

Quick Start

basic-usage.js
javascript
const { ScrapeHubClient } = require('@scrapehub/node');

// Initialize client
const client = new ScrapeHubClient({
  apiKey: 'sk_live_xxxx_449x'
});

// Simple scrape
async function main() {
  const result = await client.scrape({
    url: 'https://example.com/products',
    engine: 'neural-x1'
  });

  console.log(`Extracted ${result.data.length} items`);
  console.log(result.data);
}

main();

TypeScript Support

typescript-usage.ts
typescript
import { ScrapeHubClient, ScrapeConfig, ScrapeResult } from '@scrapehub/node';

interface Product {
  name: string;
  price: number;
  rating: number;
  url: string;
}

const client = new ScrapeHubClient({
  apiKey: process.env.SCRAPEHUB_API_KEY!
});

async function scrapeProducts(): Promise<Product[]> {
  const config: ScrapeConfig = {
    url: 'https://example.com/products',
    engine: 'neural-x1',
    format: 'json'
  };

  const result: ScrapeResult<Product> = await client.scrape(config);
  return result.data;
}

scrapeProducts().then(products => {
  console.log(`Found ${products.length} products`);
  products.forEach(product => {
    console.log(`${product.name}: $${product.price}`);
  });
});

Advanced Configuration

advanced-config.js
javascript
const { ScrapeHubClient } = require('@scrapehub/node');

const client = new ScrapeHubClient({
  apiKey: 'sk_live_xxxx_449x',
  timeout: 300000, // 5 minutes
  retries: 3,
  baseUrl: 'https://api.scrapehub.io/v4'
});

async function advancedScrape() {
  const result = await client.scrape({
    url: 'https://example.com/products',
    engine: 'neural-x1',
    format: 'json',

    // Pagination
    pagination: {
      enabled: true,
      maxPages: 10,
      selector: 'a.next-page'
    },

    // Custom headers
    headers: {
      'User-Agent': 'Mozilla/5.0...',
      'Accept-Language': 'en-US,en;q=0.9'
    },

    // JavaScript rendering
    renderJs: true,
    waitForSelector: '.product-list',

    // Proxy settings
    proxy: {
      enabled: true,
      region: 'us-east',
      residential: true
    },

    // Rate limiting
    rateLimit: {
      requestsPerSecond: 2,
      delayBetweenPages: 1000
    }
  });

  return result;
}

Async/Promises

async-promises.js
javascript
const { ScrapeHubClient } = require('@scrapehub/node');

const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });

// Multiple URLs with Promise.all
async function scrapeMultipleUrls() {
  const urls = [
    'https://example.com/category/1',
    'https://example.com/category/2',
    'https://example.com/category/3'
  ];

  const results = await Promise.all(
    urls.map(url => client.scrape({ url, engine: 'neural-x1' }))
  );

  const allItems = results.flatMap(result => result.data);
  console.log(`Total items: ${allItems.length}`);
  return allItems;
}

// With Promise.allSettled for error handling
async function scrapeWithErrorHandling() {
  const urls = ['url1', 'url2', 'url3'];

  const results = await Promise.allSettled(
    urls.map(url => client.scrape({ url, engine: 'neural-x1' }))
  );

  results.forEach((result, index) => {
    if (result.status === 'fulfilled') {
      console.log(`URL ${index}: ${result.value.data.length} items`);
    } else {
      console.error(`URL ${index} failed: ${result.reason}`);
    }
  });
}

Job Management

job-management.js
javascript
const { ScrapeHubClient } = require('@scrapehub/node');

const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });

// Create and monitor job
async function createAndMonitorJob() {
  // Create job
  const job = await client.createJob({
    url: 'https://example.com/large-dataset',
    engine: 'neural-x1'
  });

  console.log(`Job created: ${job.id}`);

  // Poll job status
  const interval = setInterval(async () => {
    const status = await job.getStatus();
    console.log(`Progress: ${status.progress}%`);

    if (status.isComplete) {
      clearInterval(interval);

      if (status.isSuccessful) {
        const results = await job.getResults();
        console.log(`Extracted ${results.length} items`);
      } else {
        console.error(`Job failed: ${status.errorMessage}`);
      }
    }
  }, 5000);
}

// List jobs
async function listJobs() {
  const jobs = await client.listJobs({
    limit: 10,
    status: 'completed'
  });

  jobs.forEach(job => {
    console.log(`${job.id}: ${job.status} - ${job.createdAt}`);
  });
}

Express.js Integration

express-integration.js
javascript
const express = require('express');
const { ScrapeHubClient } = require('@scrapehub/node');

const app = express();
const client = new ScrapeHubClient({ apiKey: process.env.SCRAPEHUB_API_KEY });

app.use(express.json());

// Scrape endpoint
app.post('/api/scrape', async (req, res) => {
  try {
    const { url, engine = 'neural-x1' } = req.body;

    const result = await client.scrape({ url, engine });

    res.json({
      success: true,
      itemCount: result.data.length,
      data: result.data
    });
  } catch (error) {
    res.status(500).json({
      success: false,
      error: error.message
    });
  }
});

// Webhook endpoint
app.post('/api/webhook', (req, res) => {
  const { event, jobId } = req.body;

  if (event === 'job.completed') {
    console.log(`Job ${jobId} completed`);
    // Process results
  }

  res.json({ status: 'received' });
});

app.listen(3000, () => {
  console.log('Server running on port 3000');
});

Error Handling

error-handling.js
javascript
const {
  ScrapeHubClient,
  AuthenticationError,
  RateLimitError,
  InvalidRequestError,
  ScraperError
} = require('@scrapehub/node');

const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });

async function scrapeWithErrorHandling() {
  try {
    const result = await client.scrape({
      url: 'https://example.com'
    });
    return result.data;

  } catch (error) {
    if (error instanceof AuthenticationError) {
      console.error('Authentication failed:', error.message);
      // Check your API key

    } else if (error instanceof RateLimitError) {
      console.error('Rate limit exceeded:', error.message);
      console.log(`Retry after: ${error.retryAfter} seconds`);
      // Implement exponential backoff

    } else if (error instanceof InvalidRequestError) {
      console.error('Invalid request:', error.message);
      console.log('Error details:', error.details);
      // Fix request parameters

    } else if (error instanceof ScraperError) {
      console.error('Scraper failed:', error.message);
      console.log('Target URL:', error.url);
      console.log('Error code:', error.code);
      // Handle scraper-specific errors

    } else {
      console.error('Unexpected error:', error);
    }

    throw error;
  }
}

API Reference

For complete API documentation, visit our Node.js SDK reference:

View Node.js SDK Docs →