PHP Integration
Integrate ScrapeHub into your PHP applications
Installation
Terminalcomposer require scrapehub/scrapehub-php
Quick Start
basic_usage.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
// Initialize client
$client = new Client('sk_live_xxxx_449x');
// Simple scrape
$result = $client->scrape([
'url' => 'https://example.com/products',
'engine' => 'neural-x1'
]);
echo "Extracted " . count($result->data) . " items\n";
foreach ($result->data as $item) {
print_r($item);
}Advanced Configuration
advanced.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
use ScrapeHub\Config\ScrapeConfig;
$client = new Client([
'api_key' => 'sk_live_xxxx_449x',
'timeout' => 300, // 5 minutes
'retries' => 3
]);
// Advanced scraping with custom config
$config = new ScrapeConfig([
'url' => 'https://example.com/products',
'engine' => 'neural-x1',
'format' => 'json',
// Pagination
'pagination' => [
'enabled' => true,
'max_pages' => 10,
'selector' => 'a.next-page'
],
// Custom headers
'headers' => [
'User-Agent' => 'Mozilla/5.0...',
'Accept-Language' => 'en-US,en;q=0.9'
],
// JavaScript rendering
'render_js' => true,
'wait_for_selector' => '.product-list',
// Proxy settings
'proxy' => [
'enabled' => true,
'region' => 'us-east',
'residential' => true
]
]);
$result = $client->scrapeWithConfig($config);
echo "Status: {$result->status}\n";
echo "Pages scraped: {$result->pages_scraped}\n";
echo "Items extracted: " . count($result->data) . "\n";
echo "Time taken: {$result->duration}s\n";Async Promises
async.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
use GuzzleHttp\Promise;
$client = new Client('sk_live_xxxx_449x');
$urls = [
'https://example.com/category/1',
'https://example.com/category/2',
'https://example.com/category/3'
];
// Scrape all URLs concurrently
$promises = [];
foreach ($urls as $url) {
$promises[] = $client->scrapeAsync([
'url' => $url,
'engine' => 'neural-x1'
]);
}
// Wait for all to complete
$results = Promise\Utils::unwrap($promises);
// Process results
$allItems = [];
foreach ($results as $result) {
$allItems = array_merge($allItems, $result->data);
}
echo "Total items extracted: " . count($allItems) . "\n";Job Management
jobs.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
$client = new Client('sk_live_xxxx_449x');
// Start a scrape job (non-blocking)
$job = $client->createJob([
'url' => 'https://example.com/large-dataset',
'engine' => 'neural-x1'
]);
echo "Job ID: {$job->id}\n";
echo "Status: {$job->status}\n";
// Poll job status
while (!$job->isComplete()) {
$job->refresh();
echo "Progress: {$job->progress}%\n";
sleep(5);
}
// Get results when complete
if ($job->isSuccessful()) {
$results = $job->getResults();
echo "Extracted " . count($results) . " items\n";
} else {
echo "Job failed: {$job->error_message}\n";
}
// List all jobs
$jobs = $client->listJobs([
'limit' => 10,
'status' => 'completed'
]);
foreach ($jobs as $job) {
echo "{$job->id}: {$job->status} - {$job->created_at}\n";
}Laravel Integration
app/Services/ScraperService.php
<?php
namespace App\Services;
use ScrapeHub\Client;
use App\Models\Product;
class ScraperService
{
protected $client;
public function __construct()
{
$this->client = new Client(config('services.scrapehub.api_key'));
}
public function scrapeProducts(string $url): array
{
$result = $this->client->scrape([
'url' => $url,
'engine' => 'neural-x1',
'format' => 'json'
]);
// Save to database
foreach ($result->data as $productData) {
Product::create([
'name' => $productData['name'],
'price' => $productData['price'],
'url' => $productData['url']
]);
}
return $result->data;
}
}
// config/services.php
return [
'scrapehub' => [
'api_key' => env('SCRAPEHUB_API_KEY'),
],
];Laravel Queue Jobs
app/Jobs/ScrapeJob.php
<?php
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use ScrapeHub\Client;
use App\Models\ScrapeRecord;
class ScrapeJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
protected $url;
protected $userId;
public function __construct(string $url, int $userId)
{
$this->url = $url;
$this->userId = $userId;
}
public function handle()
{
$client = new Client(config('services.scrapehub.api_key'));
// Create async job
$job = $client->createJob([
'url' => $this->url,
'engine' => 'neural-x1',
'webhook_url' => route('scrape.webhook')
]);
// Store job ID for tracking
ScrapeRecord::create([
'user_id' => $this->userId,
'job_id' => $job->id,
'url' => $this->url,
'status' => 'pending'
]);
}
}
// Controller
namespace App\Http\Controllers;
class ScraperController extends Controller
{
public function create(Request $request)
{
ScrapeJob::dispatch($request->url, auth()->id());
return response()->json(['message' => 'Scrape job started']);
}
public function webhook(Request $request)
{
$jobId = $request->input('job_id');
$status = $request->input('status');
$record = ScrapeRecord::where('job_id', $jobId)->first();
$record->update(['status' => $status]);
if ($status === 'completed') {
// Process results
$client = new Client(config('services.scrapehub.api_key'));
$job = $client->getJob($jobId);
// ... handle results
}
return response()->json(['status' => 'received']);
}
}Data Export
export.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
$client = new Client('sk_live_xxxx_449x');
// Get job results
$job = $client->getJob('job_abc123');
$data = $job->getResults();
// Export to JSON
file_put_contents('results.json', json_encode($data, JSON_PRETTY_PRINT));
// Export to CSV
$fp = fopen('results.csv', 'w');
// Write headers
if (!empty($data)) {
fputcsv($fp, array_keys($data[0]));
// Write rows
foreach ($data as $row) {
fputcsv($fp, $row);
}
}
fclose($fp);
// Or use built-in export
$job->exportToFile('results.json', 'json');
$job->exportToFile('results.csv', 'csv');Error Handling
error_handling.php
<?php
require 'vendor/autoload.php';
use ScrapeHub\Client;
use ScrapeHub\Exceptions\AuthenticationException;
use ScrapeHub\Exceptions\RateLimitException;
use ScrapeHub\Exceptions\InvalidRequestException;
use ScrapeHub\Exceptions\ScraperException;
use ScrapeHub\Exceptions\ScrapeHubException;
$client = new Client('sk_live_xxxx_449x');
try {
$result = $client->scrape(['url' => 'https://example.com']);
} catch (AuthenticationException $e) {
echo "Authentication failed: {$e->getMessage()}\n";
echo "Check your API key\n";
} catch (RateLimitException $e) {
echo "Rate limit exceeded: {$e->getMessage()}\n";
echo "Retry after: {$e->getRetryAfter()} seconds\n";
sleep($e->getRetryAfter());
// Retry logic here
} catch (InvalidRequestException $e) {
echo "Invalid request: {$e->getMessage()}\n";
echo "Error details: " . json_encode($e->getDetails()) . "\n";
} catch (ScraperException $e) {
echo "Scraper failed: {$e->getMessage()}\n";
echo "Target URL: {$e->getUrl()}\n";
echo "Error code: {$e->getCode()}\n";
} catch (ScrapeHubException $e) {
echo "General error: {$e->getMessage()}\n";
} catch (Exception $e) {
echo "Unexpected error: {$e->getMessage()}\n";
}Symfony Integration
src/Service/ScraperService.php
<?php
namespace App\Service;
use ScrapeHub\Client;
use Psr\Log\LoggerInterface;
class ScraperService
{
private $client;
private $logger;
public function __construct(string $apiKey, LoggerInterface $logger)
{
$this->client = new Client($apiKey);
$this->logger = $logger;
}
public function scrape(string $url, array $options = []): array
{
try {
$result = $this->client->scrape(array_merge([
'url' => $url,
'engine' => 'neural-x1'
], $options));
$this->logger->info('Scrape completed', [
'url' => $url,
'items' => count($result->data)
]);
return $result->data;
} catch (\Exception $e) {
$this->logger->error('Scrape failed', [
'url' => $url,
'error' => $e->getMessage()
]);
throw $e;
}
}
}
// config/services.yaml
services:
App\Service\ScraperService:
arguments:
$apiKey: '%env(SCRAPEHUB_API_KEY)%'Testing with PHPUnit
tests/ScraperServiceTest.php
<?php
namespace Tests;
use PHPUnit\Framework\TestCase;
use ScrapeHub\Client;
use App\Services\ScraperService;
class ScraperServiceTest extends TestCase
{
public function testScrapeProducts()
{
// Create mock client
$mockClient = $this->createMock(Client::class);
$mockResult = (object)[
'data' => [
['name' => 'Product 1', 'price' => 29.99],
['name' => 'Product 2', 'price' => 39.99]
],
'status' => 'completed'
];
$mockClient->expects($this->once())
->method('scrape')
->with([
'url' => 'https://example.com/products',
'engine' => 'neural-x1',
'format' => 'json'
])
->willReturn($mockResult);
// Test scraping
$result = $mockClient->scrape([
'url' => 'https://example.com/products',
'engine' => 'neural-x1',
'format' => 'json'
]);
$this->assertEquals('completed', $result->status);
$this->assertCount(2, $result->data);
}
public function testScrapeError()
{
$mockClient = $this->createMock(Client::class);
$mockClient->expects($this->once())
->method('scrape')
->willThrowException(new \Exception('Failed to scrape'));
$this->expectException(\Exception::class);
$this->expectExceptionMessage('Failed to scrape');
$mockClient->scrape(['url' => 'https://example.com']);
}
}API Reference
For complete API documentation, visit our PHP package reference:
View PHP Package Docs →