Puppeteer Integration
Cloud Browser
Puppeteer
Puppeteer is Google's official Node.js library for controlling Chrome.
Connect it to Scrapfly Cloud Browser for scalable automation with built-in proxies and fingerprinting.
Beta Feature: Cloud Browser is currently in beta.
Installation
Install puppeteer-core (the browser-agnostic version):
npm install puppeteer-core
Use puppeteer-core instead of puppeteer since Cloud Browser provides the browser instance.
Quick Start
const puppeteer = require('puppeteer-core');
const API_KEY = '';
const BROWSER_WS = `wss://browser.scrapfly.io?api_key=${API_KEY}&proxy_pool=datacenter&os=linux`;
async function run() {
// Connect to Cloud Browser
const browser = await puppeteer.connect({
browserWSEndpoint: BROWSER_WS,
});
const page = await browser.newPage();
// Navigate and interact
await page.goto('https://web-scraping.dev');
const title = await page.title();
console.log('Page title:', title);
// Take a screenshot
await page.screenshot({ path: 'screenshot.png' });
await browser.close();
}
run();
Connection Parameters
Configure your Cloud Browser connection with these WebSocket URL parameters:
Parameter
Required
Description
api_key
Required
Your Scrapfly API key
proxy_pool
Optional
datacenter (default) or residential
os
Optional
OS fingerprint: linux, windows, macos
session
Optional
Session ID for persistent browser state
country
Optional
Proxy country code (ISO 3166-1 alpha-2)
Extract data from a dynamic page:
const puppeteer = require('puppeteer-core');
const API_KEY = '';
const BROWSER_WS = `wss://browser.scrapfly.io?api_key=${API_KEY}&proxy_pool=datacenter`;
async function scrapeProducts() {
let browser = null;
try {
browser = await puppeteer.connect({
browserWSEndpoint: BROWSER_WS,
});
const page = await browser.newPage();
// Navigate to the page
await page.goto('https://web-scraping.dev/products', {
waitUntil: 'networkidle2',
});
// Extract product data
const products = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.product')).map(el => ({
title: el.querySelector('.product-title')?.textContent?.trim(),
price: el.querySelector('.product-price')?.textContent?.trim(),
url: el.querySelector('a')?.href,
}));
});
console.log('Products:', products);
return products;
} finally {
if (browser) {
await browser.close();
}
}
}
scrapeProducts();
Fill forms and handle login flows:
const puppeteer = require('puppeteer-core');
const API_KEY = '';
const BROWSER_WS = `wss://browser.scrapfly.io?api_key=${API_KEY}&proxy_pool=datacenter`;
async function login() {
let browser = null;
try {
browser = await puppeteer.connect({
browserWSEndpoint: BROWSER_WS,
});
const page = await browser.newPage();
await page.goto('https://web-scraping.dev/login');
// Fill the login form
await page.type('#username', 'myuser');
await page.type('#password', 'mypassword');
// Click submit and wait for navigation
await Promise.all([
page.waitForNavigation(),
page.click('#submit-button'),
]);
// Check if login was successful
const isLoggedIn = await page.$('.user-profile') !== null;
console.log('Login successful:', isLoggedIn);
} finally {
if (browser) {
await browser.close();
}
}
}
login();
Session Persistence
Maintain browser state across connections using the session parameter:
const puppeteer = require('puppeteer-core');
const API_KEY = '';
const SESSION_ID = 'my-persistent-session';
// First connection: Login and set cookies
async function firstConnection() {
let browser = null;
try {
browser = await puppeteer.connect({
browserWSEndpoint: `wss://browser.scrapfly.io?api_key=${API_KEY}&session=${SESSION_ID}`,
});
const page = await browser.newPage();
await page.goto('https://web-scraping.dev/login');
// ... perform login ...
} finally {
if (browser) {
await browser.close(); // Session is preserved
}
}
}
// Second connection: Reuse the logged-in session
async function secondConnection() {
let browser = null;
try {
browser = await puppeteer.connect({
browserWSEndpoint: `wss://browser.scrapfly.io?api_key=${API_KEY}&session=${SESSION_ID}`,
});
const page = await browser.newPage();
await page.goto('https://web-scraping.dev/dashboard');
// Already logged in from previous session!
} finally {
if (browser) {
await browser.close();
}
}
}
Proxy Options
Proxy Pool
Use Case
Cost
datacenter
General scraping, high speed, lower cost
1 credits/30s + 2 credits/MB
residential
Protected sites, geo-targeting, anti-bot bypass
1 credits/30s + 10 credits/MB
Best Practices
Use puppeteer-core - Don't download bundled Chrome
Handle disconnects - Wrap connections in try/catch/finally blocks
Close browsers - Always call browser.close() to stop billing (use finally block)
Use sessions wisely - Reuse sessions for multi-step flows
Block unnecessary resources - Use request interception to reduce bandwidth
Set timeouts - Add reasonable timeouts to prevent hanging connections (e.g., timeout: 30000 in page.goto())
Error handling - Catch specific errors like timeout or WebSocket connection failures for better debugging