PHP Integration

Integrate ScrapeHub into your PHP applications

Installation

Terminal
composer require scrapehub/scrapehub-php

Quick Start

basic_usage.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;

// Initialize client
$client = new Client('sk_live_xxxx_449x');

// Simple scrape
$result = $client->scrape([
    'url' => 'https://example.com/products',
    'engine' => 'neural-x1'
]);

echo "Extracted " . count($result->data) . " items\n";
foreach ($result->data as $item) {
    print_r($item);
}

Advanced Configuration

advanced.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;
use ScrapeHub\Config\ScrapeConfig;

$client = new Client([
    'api_key' => 'sk_live_xxxx_449x',
    'timeout' => 300, // 5 minutes
    'retries' => 3
]);

// Advanced scraping with custom config
$config = new ScrapeConfig([
    'url' => 'https://example.com/products',
    'engine' => 'neural-x1',
    'format' => 'json',

    // Pagination
    'pagination' => [
        'enabled' => true,
        'max_pages' => 10,
        'selector' => 'a.next-page'
    ],

    // Custom headers
    'headers' => [
        'User-Agent' => 'Mozilla/5.0...',
        'Accept-Language' => 'en-US,en;q=0.9'
    ],

    // JavaScript rendering
    'render_js' => true,
    'wait_for_selector' => '.product-list',

    // Proxy settings
    'proxy' => [
        'enabled' => true,
        'region' => 'us-east',
        'residential' => true
    ]
]);

$result = $client->scrapeWithConfig($config);

echo "Status: {$result->status}\n";
echo "Pages scraped: {$result->pages_scraped}\n";
echo "Items extracted: " . count($result->data) . "\n";
echo "Time taken: {$result->duration}s\n";

Async Promises

async.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;
use GuzzleHttp\Promise;

$client = new Client('sk_live_xxxx_449x');

$urls = [
    'https://example.com/category/1',
    'https://example.com/category/2',
    'https://example.com/category/3'
];

// Scrape all URLs concurrently
$promises = [];
foreach ($urls as $url) {
    $promises[] = $client->scrapeAsync([
        'url' => $url,
        'engine' => 'neural-x1'
    ]);
}

// Wait for all to complete
$results = Promise\Utils::unwrap($promises);

// Process results
$allItems = [];
foreach ($results as $result) {
    $allItems = array_merge($allItems, $result->data);
}

echo "Total items extracted: " . count($allItems) . "\n";

Job Management

jobs.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;

$client = new Client('sk_live_xxxx_449x');

// Start a scrape job (non-blocking)
$job = $client->createJob([
    'url' => 'https://example.com/large-dataset',
    'engine' => 'neural-x1'
]);

echo "Job ID: {$job->id}\n";
echo "Status: {$job->status}\n";

// Poll job status
while (!$job->isComplete()) {
    $job->refresh();
    echo "Progress: {$job->progress}%\n";
    sleep(5);
}

// Get results when complete
if ($job->isSuccessful()) {
    $results = $job->getResults();
    echo "Extracted " . count($results) . " items\n";
} else {
    echo "Job failed: {$job->error_message}\n";
}

// List all jobs
$jobs = $client->listJobs([
    'limit' => 10,
    'status' => 'completed'
]);

foreach ($jobs as $job) {
    echo "{$job->id}: {$job->status} - {$job->created_at}\n";
}

Laravel Integration

app/Services/ScraperService.php
php
<?php

namespace App\Services;

use ScrapeHub\Client;
use App\Models\Product;

class ScraperService
{
    protected $client;

    public function __construct()
    {
        $this->client = new Client(config('services.scrapehub.api_key'));
    }

    public function scrapeProducts(string $url): array
    {
        $result = $this->client->scrape([
            'url' => $url,
            'engine' => 'neural-x1',
            'format' => 'json'
        ]);

        // Save to database
        foreach ($result->data as $productData) {
            Product::create([
                'name' => $productData['name'],
                'price' => $productData['price'],
                'url' => $productData['url']
            ]);
        }

        return $result->data;
    }
}

// config/services.php
return [
    'scrapehub' => [
        'api_key' => env('SCRAPEHUB_API_KEY'),
    ],
];

Laravel Queue Jobs

app/Jobs/ScrapeJob.php
php
<?php

namespace App\Jobs;

use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use ScrapeHub\Client;
use App\Models\ScrapeRecord;

class ScrapeJob implements ShouldQueue
{
    use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;

    protected $url;
    protected $userId;

    public function __construct(string $url, int $userId)
    {
        $this->url = $url;
        $this->userId = $userId;
    }

    public function handle()
    {
        $client = new Client(config('services.scrapehub.api_key'));

        // Create async job
        $job = $client->createJob([
            'url' => $this->url,
            'engine' => 'neural-x1',
            'webhook_url' => route('scrape.webhook')
        ]);

        // Store job ID for tracking
        ScrapeRecord::create([
            'user_id' => $this->userId,
            'job_id' => $job->id,
            'url' => $this->url,
            'status' => 'pending'
        ]);
    }
}

// Controller
namespace App\Http\Controllers;

class ScraperController extends Controller
{
    public function create(Request $request)
    {
        ScrapeJob::dispatch($request->url, auth()->id());

        return response()->json(['message' => 'Scrape job started']);
    }

    public function webhook(Request $request)
    {
        $jobId = $request->input('job_id');
        $status = $request->input('status');

        $record = ScrapeRecord::where('job_id', $jobId)->first();
        $record->update(['status' => $status]);

        if ($status === 'completed') {
            // Process results
            $client = new Client(config('services.scrapehub.api_key'));
            $job = $client->getJob($jobId);
            // ... handle results
        }

        return response()->json(['status' => 'received']);
    }
}

Data Export

export.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;

$client = new Client('sk_live_xxxx_449x');

// Get job results
$job = $client->getJob('job_abc123');
$data = $job->getResults();

// Export to JSON
file_put_contents('results.json', json_encode($data, JSON_PRETTY_PRINT));

// Export to CSV
$fp = fopen('results.csv', 'w');

// Write headers
if (!empty($data)) {
    fputcsv($fp, array_keys($data[0]));

    // Write rows
    foreach ($data as $row) {
        fputcsv($fp, $row);
    }
}

fclose($fp);

// Or use built-in export
$job->exportToFile('results.json', 'json');
$job->exportToFile('results.csv', 'csv');

Error Handling

error_handling.php
php
<?php

require 'vendor/autoload.php';

use ScrapeHub\Client;
use ScrapeHub\Exceptions\AuthenticationException;
use ScrapeHub\Exceptions\RateLimitException;
use ScrapeHub\Exceptions\InvalidRequestException;
use ScrapeHub\Exceptions\ScraperException;
use ScrapeHub\Exceptions\ScrapeHubException;

$client = new Client('sk_live_xxxx_449x');

try {
    $result = $client->scrape(['url' => 'https://example.com']);

} catch (AuthenticationException $e) {
    echo "Authentication failed: {$e->getMessage()}\n";
    echo "Check your API key\n";

} catch (RateLimitException $e) {
    echo "Rate limit exceeded: {$e->getMessage()}\n";
    echo "Retry after: {$e->getRetryAfter()} seconds\n";
    sleep($e->getRetryAfter());
    // Retry logic here

} catch (InvalidRequestException $e) {
    echo "Invalid request: {$e->getMessage()}\n";
    echo "Error details: " . json_encode($e->getDetails()) . "\n";

} catch (ScraperException $e) {
    echo "Scraper failed: {$e->getMessage()}\n";
    echo "Target URL: {$e->getUrl()}\n";
    echo "Error code: {$e->getCode()}\n";

} catch (ScrapeHubException $e) {
    echo "General error: {$e->getMessage()}\n";

} catch (Exception $e) {
    echo "Unexpected error: {$e->getMessage()}\n";
}

Symfony Integration

src/Service/ScraperService.php
php
<?php

namespace App\Service;

use ScrapeHub\Client;
use Psr\Log\LoggerInterface;

class ScraperService
{
    private $client;
    private $logger;

    public function __construct(string $apiKey, LoggerInterface $logger)
    {
        $this->client = new Client($apiKey);
        $this->logger = $logger;
    }

    public function scrape(string $url, array $options = []): array
    {
        try {
            $result = $this->client->scrape(array_merge([
                'url' => $url,
                'engine' => 'neural-x1'
            ], $options));

            $this->logger->info('Scrape completed', [
                'url' => $url,
                'items' => count($result->data)
            ]);

            return $result->data;

        } catch (\Exception $e) {
            $this->logger->error('Scrape failed', [
                'url' => $url,
                'error' => $e->getMessage()
            ]);

            throw $e;
        }
    }
}

// config/services.yaml
services:
    App\Service\ScraperService:
        arguments:
            $apiKey: '%env(SCRAPEHUB_API_KEY)%'

Testing with PHPUnit

tests/ScraperServiceTest.php
php
<?php

namespace Tests;

use PHPUnit\Framework\TestCase;
use ScrapeHub\Client;
use App\Services\ScraperService;

class ScraperServiceTest extends TestCase
{
    public function testScrapeProducts()
    {
        // Create mock client
        $mockClient = $this->createMock(Client::class);

        $mockResult = (object)[
            'data' => [
                ['name' => 'Product 1', 'price' => 29.99],
                ['name' => 'Product 2', 'price' => 39.99]
            ],
            'status' => 'completed'
        ];

        $mockClient->expects($this->once())
            ->method('scrape')
            ->with([
                'url' => 'https://example.com/products',
                'engine' => 'neural-x1',
                'format' => 'json'
            ])
            ->willReturn($mockResult);

        // Test scraping
        $result = $mockClient->scrape([
            'url' => 'https://example.com/products',
            'engine' => 'neural-x1',
            'format' => 'json'
        ]);

        $this->assertEquals('completed', $result->status);
        $this->assertCount(2, $result->data);
    }

    public function testScrapeError()
    {
        $mockClient = $this->createMock(Client::class);

        $mockClient->expects($this->once())
            ->method('scrape')
            ->willThrowException(new \Exception('Failed to scrape'));

        $this->expectException(\Exception::class);
        $this->expectExceptionMessage('Failed to scrape');

        $mockClient->scrape(['url' => 'https://example.com']);
    }
}

API Reference

For complete API documentation, visit our PHP package reference:

View PHP Package Docs →