How to Scrape Ebay using Python

article feature image

In this web scraping tutorial, we'll be taking a look at how to scrape Ebay data - the biggest peer-to-peer e-commerce web market in the world.

We'll be scraping product details like pricing, variant information, features and descriptions.

To scrape Ebay data with Python we'll be using a few popular community packages and some clever parsing techniques.

We'll also take a look at how to scrape Ebay's search system to discover new item listings to be the first to know when a new deal is available.

Why Scrape Ebay?

Ebay is one of the biggest product marketplaces in the world, especially for more niche and rare items. This makes Ebay a great target for e-commerce data analytics.

Scraping Ebay data (like seller reviews) can also empower Ebay sellers allowing easy market and competitor analysis.

Available Ebay Data Fields

In this Ebay web scraping tutorial we'll be scraping common product data like pricing, stock, features and performance metadata. For more, see this example output:

Example Product Dataset
{
  "url": "https://www.ebay.com/itm/393531906094",
  "id": "393531906094",
  "price": "C $579.00",
  "price_converted": "US $427.32",
  "name": "Apple iPhone 11 Pro Max - Unlocked - 64GB / 256GB / 512GB - CA - Grade A",
  "seller_name": "device_care",
  "seller_url": "https://www.ebay.com/str/devicecare",
  "photos": [
    "https://i.ebayimg.com/images/g/93cAAOSwvEJgbLW8/s-l64.jpg",
    "https://i.ebayimg.com/images/g/l0sAAOSwextgbLYj/s-l64.jpg",
    "https://i.ebayimg.com/images/g/qxEAAOSwP~BgbLa5/s-l64.jpg",
    "https://i.ebayimg.com/images/g/7usAAOSwRbZgbLbE/s-l64.jpg",
    "https://i.ebayimg.com/images/g/ffMAAOSwhAxgbLbO/s-l64.jpg",
    "https://i.ebayimg.com/images/g/93cAAOSwvEJgbLW8/s-l500.jpg"
  ],
  "description_url": "https://vi.vipr.ebaydesc.com/ws/eBayISAPI.dll?ViewItemDescV4&item=393531906094&t=1631237959000&category=9355&seller=device_care&excSoj=1&excTrk=1&lsite=2&ittenable=true&domain=ebay.com&descgauge=1&cspheader=1&oneClk=2&secureDesc=1",
  "features": {
    "Condition": "Excellent - Refurbished: The item is in like-new condition, backed by a one year warranty. It has ... Read moreExcellent - Refurbished: The item is in like-new condition, backed by a one year warranty. It has been professionally refurbished, inspected and cleaned to excellent condition by qualified sellers. The item includes original or new accessories and will come in new generic packaging. See the seller's listing for full details. See all condition definitions",
    "Camera Resolution": "12.0 MP",
    "Operating System": "iOS",
    "Contract": "Without Contract",
    "Connectivity": "5G, Bluetooth, GPS, Lightning",
    "Features": "4K Video Recording, Accelerometer, Bluetooth Enabled, Camera, Facial Recognition",
    "Model Number": "A2161 (CDMA + GSM)",
    "RAM": "4 GB",
    "Lock Status": "Factory Unlocked",
    "Network": "1&1, Unlocked",
    "SIM Card Slot": "Dual SIM (SIM + eSIM)",
    "Brand": "Apple",
    "Processor": "Hexa Core",
    "Screen Size": "6.5 in"
  },
  "variants": {
    "Apple iPhone 11 Pro Max 512 GB Midnight Green": {
      "id": "662315637180",
      "price": "C $779.00",
      "price_converted": "US $574.93",
      "vat_price": null,
      "quantity": 5,
      "in_stock": false,
      "sold": 5,
      "available": 0,
      "watch_count": 27,
      "epid": "9034209121",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "512 GB",
        "Color": "Midnight Green"
      }
    },
    "Apple iPhone 11 Pro Max 512 GB Gold": {
      "id": "662315637181",
      "price": "C $779.00",
      "price_converted": "US $574.93",
      "vat_price": null,
      "quantity": 5,
      "in_stock": false,
      "sold": 5,
      "available": 0,
      "watch_count": 10,
      "epid": "9034209182",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "512 GB",
        "Color": "Gold"
      }
    },
    "Apple iPhone 11 Pro Max 512 GB Space Gray": {
      "id": "662315637182",
      "price": "C $779.00",
      "price_converted": "US $574.93",
      "vat_price": null,
      "quantity": 9,
      "in_stock": true,
      "sold": 4,
      "available": 5,
      "watch_count": 29,
      "epid": "19034211488",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "512 GB",
        "Color": "Space Gray"
      }
    },
    "Apple iPhone 11 Pro Max 256 GB    Midnight Green": {
      "id": "662315637176",
      "price": "C $639.00",
      "price_converted": "US $471.60",
      "vat_price": null,
      "quantity": 134,
      "in_stock": false,
      "sold": 134,
      "available": 0,
      "watch_count": 165,
      "epid": "11037566785",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "256 GB   ",
        "Color": "Midnight Green"
      }
    },
    "Apple iPhone 11 Pro Max 256 GB    Gold": {
      "id": "662315637177",
      "price": "C $639.00",
      "price_converted": "US $471.60",
      "vat_price": null,
      "quantity": 77,
      "in_stock": false,
      "sold": 77,
      "available": 0,
      "watch_count": 104,
      "epid": "27041453299",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "256 GB   ",
        "Color": "Gold"
      }
    },
    "Apple iPhone 11 Pro Max 256 GB    Space Gray": {
      "id": "662315637178",
      "price": "C $639.00",
      "price_converted": "US $471.60",
      "vat_price": null,
      "quantity": 161,
      "in_stock": false,
      "sold": 161,
      "available": 0,
      "watch_count": 169,
      "epid": "10057225571",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "256 GB   ",
        "Color": "Space Gray"
      }
    },
    "Apple iPhone 11 Pro Max 512 GB Silver": {
      "id": "662315637179",
      "price": "C $779.00",
      "price_converted": "US $574.93",
      "vat_price": null,
      "quantity": 4,
      "in_stock": true,
      "sold": 3,
      "available": 1,
      "watch_count": 10,
      "epid": "9034209212",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "512 GB",
        "Color": "Silver"
      }
    },
    "Apple iPhone 11 Pro Max 64 GB Midnight Green": {
      "id": "662315637172",
      "price": "C $579.00",
      "price_converted": "US $427.32",
      "vat_price": null,
      "quantity": 236,
      "in_stock": true,
      "sold": 199,
      "available": 37,
      "watch_count": 183,
      "epid": "19042851646",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "64 GB",
        "Color": "Midnight Green"
      }
    },
    "Apple iPhone 11 Pro Max 64 GB Gold": {
      "id": "662315637173",
      "price": "C $579.00",
      "price_converted": "US $427.32",
      "vat_price": null,
      "quantity": 257,
      "in_stock": true,
      "sold": 211,
      "available": 46,
      "watch_count": 161,
      "epid": "21042400312",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "64 GB",
        "Color": "Gold"
      }
    },
    "Apple iPhone 11 Pro Max 64 GB Space Gray": {
      "id": "662315637174",
      "price": "C $579.00",
      "price_converted": "US $427.32",
      "vat_price": null,
      "quantity": 279,
      "in_stock": true,
      "sold": 221,
      "available": 58,
      "watch_count": 226,
      "epid": "7034220649",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "64 GB",
        "Color": "Space Gray"
      }
    },
    "Apple iPhone 11 Pro Max 256 GB    Silver": {
      "id": "662315637175",
      "price": "C $639.00",
      "price_converted": "US $471.60",
      "vat_price": null,
      "quantity": 19,
      "in_stock": false,
      "sold": 19,
      "available": 0,
      "watch_count": 35,
      "epid": "23034220736",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "256 GB   ",
        "Color": "Silver"
      }
    },
    "Apple iPhone 11 Pro Max 64 GB Silver": {
      "id": "662315637171",
      "price": "C $579.00",
      "price_converted": "US $427.32",
      "vat_price": null,
      "quantity": 72,
      "in_stock": true,
      "sold": 53,
      "available": 19,
      "watch_count": 55,
      "epid": "9034209203",
      "top_product": false,
      "traits": {
        "Model": "Apple iPhone 11 Pro Max",
        "Storage Capacity": "64 GB",
        "Color": "Silver"
      }
    }
  }
}

We'll be also scraping Ebay's search which provides product preview datasets. See this example output:

Example Search Dataset
[
  {
    "url": "https://www.ebay.com/itm/394406593931",
    "title": "iPhone 11 Pro Max 256GB Space Gray (Unlocked) CRACKED FRONT BACK CLEAN ESN",
    "price": "$289.94",
    "shipping": "Free shipping",
    "list_date": "Jan-3 20:32",
    "subtitles": [
      "Apple iPhone 11 Pro Max",
      "256 GB",
      "Unlocked"
    ],
    "condition": "Parts Only",
    "photo": "https://i.ebayimg.com/thumbs/images/g/74AAAOSwRahjtQBg/s-l225.webp",
    "rating": "4.5 out of 5 stars.",
    "rating_count": "80 product ratings"
  },
...
]

Ebay's pages contain a lot of data and in our example Ebay web scraper we'll stick to the most important data fields but techniques covered in this guide can be applied to scrape any other part of Ebay.

Setup

In this tutorial, we'll be using Python with two important community libraries:

  • httpx - HTTP client library which will let us communicate with ebay.com's servers and retrieve raw page data.
  • parsel - HTML parsing library which will help us to parse our web scraped raw HTML data using CSS selectors or Xpath.

These packages can be easily installed via the pip install command:

$ pip install httpx parsel

Alternatively, feel free to swap httpx out with any other HTTP client package such as requests as we'll only need basic HTTP functions which are almost interchangeable in every library. As for, parsel, another great alternative is the beautifulsoup package.

Scraping Ebay Listings

Let's start with parsing a single Ebay listing page.
For this, we'll be using httpx to retrieve product's HTML page and parsel to parse it using CSS selectors.

To start, we can separate the Ebay listings into two types:

  • listings with multiple variants - like tech devices, clothes, shoes. Things that have multiple options.
  • listings with a single variant - usually simple products that have no options. Like toys or second hand items.

Let's begin with single variant listings as they are much more simple. For this example, let's use this product ebay.com/itm/332562282948

markup for fields that'll be scraped from Ebay.com
We'll capture the most important fields: pricing, description and product and seller details

In the image above we marked our fields and to build CSS selectors to select these fields we can use the browser's developer tools (F12 key or right click -> inspect option):

With this, our single listing Ebay scraper in Python would look something like this:

from parsel import Selector
import httpx

def parse_item(sel: Selector):
    # parsing shortcuts to avoid repetition:
    css_join = lambda css: "".join(sel.css(css).getall()).strip()  # join all selected elements
    css = lambda css: sel.css(css).get("").strip()  # take first selected element and strip of leading/trailing spaces

    item = {}
    item["url"] = css('link[rel="canonical"]::attr(href)')
    item["id"] = item["url"].split("/itm/")[1].split("?")[0]  # we can take ID from the URL
    item["price"] = css('span[itemprop="price"] .ux-textspans ::text')
    item["price_converted"] = css("span.x-price-approx__price ::text")  # ebay automatically converts price for some regions

    item["name"] = css_join("h1 span::text")
    item["seller_name"] = css_join("div[data-testid=str-title] a ::text")
    item["seller_url"] = css("div[data-testid=str-title] a::attr(href)").split("?")[0]
    item["photos"] = sel.css('.ux-image-filmstrip-carousel-item.image img::attr("src")').getall()  # carousel images
    item["photos"].extend(sel.css('.ux-image-carousel-item.image img::attr("src")').getall())  # main image
    # description is an iframe (independant page). We can keep it as an URL or scrape it later.
    item["description_url"] = css("div.d-item-description iframe::attr(src)")
    if not item["description_url"]:  # description can be in 2 locations - check both
        item["description_url"] = css("div#desc_div iframe::attr(src)")
    # feature details from the description table:
    feature_table = sel.css("div.ux-layout-section__item--table-view")
    features = {}
    for ft_label in feature_table.css(".ux-labels-values__labels"):
        # iterate through each label of the table and select first sibling for value:
        label = "".join(ft_label.css(".ux-textspans::text").getall()).strip(":\n ")
        ft_value = ft_label.xpath("following-sibling::div[1]")
        value = "".join(ft_value.css(".ux-textspans::text").getall()).strip()
        features[label] = value
    item["features"] = features
    return item

response = httpx.get("https://www.ebay.com/itm/332562282948")
selector = Selector(response.text)
item = parse_item(selector)
print(item)
Example Output
{
  "url": "https://www.ebay.com/itm/332562282948",
  "id": "332562282948",
  "price": "US $13.94",
  "price_converted": "",
  "name": "Sanei Kirby 5.5\" Plush Stuffed Doll (KP01) - Kirby Adventure All Star Collection",
  "seller_name": "ToysCollections",
  "seller_url": "https://www.ebay.com/str/huskylover228",
  "photos": [
    "https://i.ebayimg.com/images/g/ITEAAOSw9p9ajK16/s-l500.jpg"
  ],
  "description_url": "https://vi.vipr.ebaydesc.com/ws/eBayISAPI.dll?ViewItemDescV4&item=332562282948&t=1653362457000&category=69528&seller=the_northeshop&excSoj=1&excTrk=1&lsite=0&ittenable=true&domain=ebay.com&descgauge=1&cspheader=1&oneClk=2&secureDesc=1",
  "features": {
    "Condition": "New: A brand-new, unused, unopened, undamaged item (including handmade items). See the seller's ... Read moreNew: A brand-new, unused, unopened, undamaged item (including handmade items). See the seller's listing for full details. See all condition definitions",
    "Brand": "unbranded",
    "Type": "Plush",
    "UPC": "4905330122810",
    "Featured Refinements": "Kirby Plush",
    "Recommended Age Range": "4+",
    "Gender": "Boys & Girls",
    "Character Family": "Kirby Adventure"
  }
}

In the example above, we did some basic CSS selector based HTML parsing to extract item details such as price, name, features and photos.

Next, for products with variants we'll have to go a bit further and extract hidden web data - let's take a look at how to do that.

Ebay Variant Data

Ebay's listings can contain multiple products through a feature called variants.

For example, let's take this iPhone listing: ebay.com/itm/393531906094

markup for ebay.com variant options
Listings with variants have multiple selection options

We can see several variant options: Model, Storage Capacity and Color. The price is updated every time we choose a different option. How can we scrape this?

Ebay is using javascript to update the page with a different price every time we choose a different option. That means the price data is available somewhere in a javascript variable. All we have to do is extract this variable to scrape the variant dataset.

We're not going to go in-depth on how to capture Javascript variable data, for that refer to our article:

How to Scrape Hidden Web Data

For full introduction on scraping javascript variables see our hidden web data scraping tutorial.

How to Scrape Hidden Web Data

So, to capture variants we'll extract the hidden variant data and pair it together with our HTML scraper from before:

import json
from parsel import Selector


def find_json_objects(text: str, decoder=json.JSONDecoder()):
    """Find JSON objects in text, and generate decoded JSON data"""
    pos = 0
    while True:
        match = text.find("{", pos)
        if match == -1:
            break
        try:
            result, index = decoder.raw_decode(text[match:])
            yield result
            pos = match + index
        except ValueError:
            pos = match + 1


def parse_variants(sel: Selector) -> dict:
    # find script that contains itemVariationsMaps variable:
    script = sel.xpath('//script[contains(., "itemVariationsMap")]/text()').get()
    if not script:
        return {}

    # find all JSON objects in the script text
    all_data = list(find_json_objects(script))
    # find one JSON object that contains itemVariantionsMaps variable:
    variants = next(d for d in all_data if "itemVariationsMap" in str(d))["itemVariationsMap"]

    # extract option values for mapping variant trait ids to human labels
    selections = defaultdict(dict)
    for selection in sel.css(".x-msku__box-cont select"):
        name = selection.xpath("@selectboxlabel").get()
        selection_data = {}
        for option in selection.xpath("option"):
            value = int(option.xpath("@value").get())
            if value == -1:  # that's the placeholder
                continue
            label = option.xpath("text()").get().strip()
            label = label.split("(Out ")[0]
            selections[name][value] = label

    # map variant trait ids to human labels
    for variant_id, variant in variants.items():
        for trait, trait_id in variant["traitValuesMap"].items():
            variant["traitValuesMap"][trait] = selections[trait][trait_id]

    # parse variants to something more usable
    parsed_variants = {}
    for variant_id, variant in variants.items():
        label = " ".join(variant["traitValuesMap"].values())
        parsed_variants[label] = {
            "id": variant_id,
            "price": variant["price"],
            "price_converted": variant["convertedPrice"],
            "vat_price": variant["vatPrice"],
            "quantity": variant["quantity"],
            "in_stock": variant["inStock"],
            "sold": variant["quantitySold"],
            "available": variant["quantityAvailable"],
            "watch_count": variant["watchCount"],
            "epid": variant["epid"],
            "top_product": variant["topProduct"],
            "traits": variant["traitValuesMap"],
        }
    return parsed_variants
Run Code & Example Output
response = httpx.get("https://www.ebay.com/itm/393531906094")
selector = Selector(response.text)
item = parse_item(selector)
item['variants'] = parse_variants(selector)
print(item)

For an example output dataset, see the Available Data Fields section.

In this example Ebay scraper, we used hidden web data parsing technique to extract the javascript variable containing listing's variant data.
We further extended it with option names and cleaned it up to be more presentable using basic Python data types.

Next, let's take a look at how can we find listings on Ebay using the search system.

To start scraping Ebay's search let's first take a look at the way it works.

When we input a search keyword we can see that Ebay is redirecting us to a different URL where the search results are located. For example, if we search for the term iphone we'll be taken to an URL similar to ebay.com/sch/i.html?_nkw=iphone&_sacat=0.

This page is using several URL parameters to define the search query:

  • _nkw is for search keyword.
  • _sacar is the category restriction.
  • _sop is sorting type.
  • _pgn is page number.
  • _ipg is listings per page (default is 60).

We can find more arguments by clicking around and exploring the search though for this example let's stick with these 5 parameters.

import asyncio
import math
import httpx
from typing import TypedDict, List, Literal
from urllib import urlencode

from parsel import Selector


session = httpx.AsyncClient(follow_redirects=True)


class ProductPreviewResult(TypedDict):
    """type hint for search scrape results for product preview data"""

    url: str  # url to full product page
    title: str
    price: str
    shipping: str
    list_date: str
    subtitles: List[str]
    condition: str
    photo: str  # image url
    rating: str
    rating_count: str


def parse_search(sel: Selector) -> List[ProductPreviewResult]:
    """parse ebay's search page for listing preview details"""
    previews = []
    # each listing has it's own HTML box where all of the data is contained
    listing_boxes = sel.css(".srp-results li.s-item")
    for box in listing_boxes:
        # quick helpers to extract first element and all elements
        css = lambda css: box.css(css).get("").strip()
        css_all = lambda css: box.css(css).getall()
        previews.append(
            {
                "url": css("a.s-item__link::attr(href)").split("?")[0],
                "title": css(".s-item__title>span::text"),
                "price": css(".s-item__price::text"),
                "shipping": css(".s-item__shipping::text"),
                "list_date": css(".s-item__listingDate span::text"),
                "subtitles": css_all(".s-item__subtitle::text"),
                "condition": css(".s-item__subtitle .SECONDARY_INFO::text"),
                "photo": css("img.s-item__image-img::attr(src)"),
                "rating": css(".s-item__reviews .clipped::text"),
                "rating_count": css(".s-item__reviews-count span::text"),
            }
        )
    return previews


SORTING_MAP = {
    "best_match": 12,
    "ending_soonest": 1,
    "newly_listed": 10,
}


async def scrape_search(
    query,
    max_pages=1,
    category=0,
    items_per_page=240,
    sort: Literal["best_match", "ending_soonest", "newly_listed"] = "newly_listed",
) -> List[ProductPreviewResult]:
    """Scrape Ebay's search for product preview data for given"""

    def make_request(page):
        return "https://www.ebay.com/sch/i.html?" + urlencode(
            {
                "_nkw": query,
                "_sacat": category,
                "_ipg": items_per_page,
                "_sop": SORTING_MAP[sort],
                "_pgn": page,
            }
        )

    first_page = await session.get(make_request(page=1))
    results = parse_search(first_page)
    if max_pages == 1:
        return results
    # find total amount of results for concurrent pagination
    total_results = first_page.selector.css(".srp-controls__count-heading>span::text").get()
    total_results = int(total_results.replace(",", ""))
    total_pages = math.ceil(total_results / items_per_page)
    if total_pages > max_pages:
        total_pages = max_pages
    other_pages = [session.get(make_request(page=i)) for i in range(2, total_pages + 1)]
    for response in asyncio.as_completed(other_pages):
        response = await response
        try:
            results.extend(parse_search(response))
        except Exception as e:
            print(f"failed to scrape search page {response.url}")
    return results
Run Code & Example Output
session = httpx.AsyncClient()


async def run():
    search_results = await scrape_search("iphone", items_per_page=60, max_pages=2)
    print(search_results)


if __name__ == "__main__":
    asyncio.run(run())

Which will result in a dataset similar to:

[
    {
        "url": "https://www.ebay.com/itm/354493525522",
        "title": "Apple iPhone 11 - 128GB - Black (Unlocked) A2111 (CDMA + GSM)",
        "price": "$1,200.99",
        "shipping": "+$25.00 shipping",
        "list_date": "Jan-3 04:32",
        "subtitles": [
            "Apple iPhone 11",
            "128 GB",
            "Unlocked"
        ],
        "condition": "Pre-Owned",
        "photo": "https://i.ebayimg.com/thumbs/images/g/m5QAAOSwrsxjtB~R/s-l225.webp",
        "rating": "4.5 out of 5 stars.",
        "rating_count": "68 product ratings"
    },
    ...  # trucated for the blog
]

In the example above, we wrote a small scraper for Ebay's search. We built a search URL using Python's urlencode function to turn dictionary parameters into URL parameters.

Then, we parsed the scraped data using CSS selectors. First, we've selected all of the listing box containers and iterated through them to safely extract each listing's details.

We could further use our listing scraper from the previous section to extract full listing details if we'd like to expand this search dataset.

There are a lot of listings on Ebay and when we scale our scraper up to thousands of listing scrapes we might start encountering blocking. Next, let's take a look at how to avoid being blocked by eBay.

Avoiding Ebay Blocking

Web scraping Ebay is not too difficult, however when scaling up our scraper beyond a few listing scrapes we might start to run into captchas and scraper blocking.

To scale up our ebay crawler, let's take advantage of ScrapFly API which offers several powerful features that can help us to scale our web scrapers and avoid Ebay's blocking:

For this, we'll be using the scrapfly-sdk python package and the Anti Scraping Protection Bypass feature. To start, let's install scrapfly-sdk using pip:

$ pip install scrapfly-sdk

To take advantage of ScrapFly's API in our Zoopla web scraper all we need to do is change our httpx session code with scrapfly-sdk client requests:

import httpx

response = httpx.get("some redfin.com url")
# in ScrapFly SDK becomes
from scrapfly import ScrapflyClient, ScrapeConfig
client = ScrapflyClient("YOUR SCRAPFLY KEY")
result = client.scrape(ScrapeConfig(
    # some zoopla URL
    "https://www.ebay.com/itm/393531906094",
    # we can select specific proxy country
    country="US",
    # and enable anti scraping protection bypass:
    asp=True,
))

For more on how to scrape Ebay.com using ScrapFly, see the Full Scraper Code section.

FAQ

To wrap this guide up, let's take a look at some frequently asked questions regarding how to scrape data from ebay:

Yes. Ebay's data is publically available - scraping Ebay at slow, respectful rates would fall under the ethical scraping definition.
That being said, be aware of GDRP compliance in the EU when storing personal data such as sellers personal details like names or location. For more, see our Is Web Scraping Legal? article.

How to crawl Ebay.com?

To web crawl Ebay we can adapt the scraping techniques covered in this article. Every ebay listing contains related products which we can extract and feed into our scraping loop turning our scraper into a crawler that is capable of finding new details to crawl.

Is there an Ebay API?

No. While Ebay does have a private catalog API it contains only metadata fields like product ids. For product prices and other details, the only way is to scrape Ebay as described in this guide.

Ebay Scraping Summary

In this guide, we wrote a Python Ebay scraper for product listing data using nothing but Python and a few community packages: httpx for retrieving the content and parsel for parsing it.

We've discovered two types of product listings: single variant and multiple variant ones. For the former, we used CSS selectors to parse listing data from the HTML. For the latter, however, we had to employ hidden web data scraping techniques to extract variation data from hidden javascript variables.

To find listings on Ebay we've taken a look at how the search system works and how can we scrape it by replicating its behavior.

Finally, to avoid being blocked we used ScrapFly's API which smartly configures every web scraper connection to avoid being blocked. For more about ScrapFly, see our documentation and try it out for FREE!

Full Ebay Scraper Code

import asyncio
import json
import math
from collections import defaultdict
from pathlib import Path
from typing import List, Literal, Optional, TypedDict
from urllib.parse import urlencode

from scrapfly import ScrapeApiResponse, ScrapeConfig, ScrapflyClient

scrapfly = ScrapflyClient(key="YOUR SCRAPFLY KEY", max_concurrency=10)

# -------------------------------------------------------------------------
# UTILITY
# -------------------------------------------------------------------------


def find_json_objects(text: str, decoder=json.JSONDecoder()):
    """Find JSON objects in text, and generate decoded JSON data"""
    pos = 0
    while True:
        match = text.find("{", pos)
        if match == -1:
            break
        try:
            result, index = decoder.raw_decode(text[match:])
            yield result
            pos = match + index
        except ValueError:
            pos = match + 1


# -------------------------------------------------------------------------
# PRODUCT scraping
# -------------------------------------------------------------------------


def parse_variants(result: ScrapeApiResponse) -> dict:
    """Parse variant data from ebay's listing page of a product with variants. This data is located in a js variable"""
    sel = result.selector
    script = sel.xpath('//script[contains(., "itemVariationsMap")]/text()').get()
    if not script:
        return {}

    all_data = list(find_json_objects(script))
    variants = next(d for d in all_data if "itemVariationsMap" in str(d))["itemVariationsMap"]

    # extract option values for mapping variant trait ids to human labels
    selections = defaultdict(dict)
    for selection in sel.css(".x-msku__box-cont select"):
        name = selection.xpath("@selectboxlabel").get()
        selection_data = {}
        for option in selection.xpath("option"):
            value = int(option.xpath("@value").get())
            if value == -1:  # that's the placeholder
                continue
            label = option.xpath("text()").get().strip()
            label = label.split("(Out ")[0]
            selections[name][value] = label

    # map variant trait ids to human labels
    for variant_id, variant in variants.items():
        for trait, trait_id in variant["traitValuesMap"].items():
            variant["traitValuesMap"][trait] = selections[trait][trait_id]

    # parse variants to something more usable
    parsed_variants = {}
    for variant_id, variant in variants.items():
        label = " ".join(variant["traitValuesMap"].values())
        parsed_variants[label] = {
            "id": variant_id,
            "price": variant["price"],
            "price_converted": variant["convertedPrice"],
            "vat_price": variant["vatPrice"],
            "quantity": variant["quantity"],
            "in_stock": variant["inStock"],
            "sold": variant["quantitySold"],
            "available": variant["quantityAvailable"],
            "watch_count": variant["watchCount"],
            "epid": variant["epid"],
            "top_product": variant["topProduct"],
            "traits": variant["traitValuesMap"],
        }
    return parsed_variants


def parse_product(result: ScrapeApiResponse):
    """Parse Ebay's product listing page for core product data"""
    sel = result.selector
    css_join = lambda css: "".join(sel.css(css).getall()).strip()  # join all selected elements
    css = lambda css: sel.css(css).get("").strip()  # take first selected element and strip of leading/trailing spaces

    item = {}
    item["url"] = css('link[rel="canonical"]::attr(href)')
    item["id"] = item["url"].split("/itm/")[1].split("?")[0]  # we can take ID from the URL
    item["price"] = css('span[itemprop="price"] .ux-textspans ::text')
    item["price_converted"] = css(
        "span.x-price-approx__price ::text"
    )  # ebay automatically converts price for some regions

    item["name"] = css_join("h1 span::text")
    item["seller_name"] = css_join("div[data-testid=str-title] a ::text")
    item["seller_url"] = css("div[data-testid=str-title] a::attr(href)").split("?")[0]
    item["photos"] = sel.css('.ux-image-filmstrip-carousel-item.image img::attr("src")').getall()  # carousel images
    item["photos"].extend(sel.css('.ux-image-carousel-item.image img::attr("src")').getall())  # main image
    # description is an iframe (independant page). We can keep it as an URL or scrape it later.
    item["description_url"] = css("div.d-item-description iframe::attr(src)")
    if not item["description_url"]:
        item["description_url"] = css("div#desc_div iframe::attr(src)")
    # feature details from the description table:
    feature_table = sel.css("div.ux-layout-section__item--table-view")
    features = {}
    for ft_label in feature_table.css(".ux-labels-values__labels"):
        # iterate through each label of the table and select first sibling for value:
        label = "".join(ft_label.css(".ux-textspans::text").getall()).strip(":\n ")
        ft_value = ft_label.xpath("following-sibling::div[1]")
        value = "".join(ft_value.css(".ux-textspans::text").getall()).strip()
        features[label] = value
    item["features"] = features
    return item


async def scrape_product(url):
    page = await scrapfly.async_scrape(ScrapeConfig(url=url, asp=True, country="US"))
    product = parse_product(page)
    product["variants"] = parse_variants(page)
    return product


# -------------------------------------------------------------------------
# SEARCH scraping
# -------------------------------------------------------------------------


def parse_search(result: ScrapeApiResponse):
    previews = []
    for box in result.selector.css(".srp-results li.s-item"):
        css = lambda css: box.css(css).get("").strip()
        css_all = lambda css: box.css(css).getall()
        previews.append(
            {
                "url": css("a.s-item__link::attr(href)").split("?")[0],
                "title": css(".s-item__title>span::text"),
                "price": css(".s-item__price::text"),
                "shipping": css(".s-item__shipping::text"),
                "list_date": css(".s-item__listingDate span::text"),
                "subtitles": css_all(".s-item__subtitle::text"),
                "condition": css(".s-item__subtitle .SECONDARY_INFO::text"),
                "photo": css("img.s-item__image-img::attr(src)"),
                "rating": css(".s-item__reviews .clipped::text"),
                "rating_count": css(".s-item__reviews-count span::text"),
            }
        )
    return previews


SORTING_MAP = {
    "best_match": 12,
    "ending_soonest": 1,
    "newly_listed": 10,
}


class ProductPreviewResult(TypedDict):
    """type hint for search scrape results for product preview data"""

    url: str  # url to full product page
    title: str
    price: str
    shipping: str
    list_date: str
    subtitles: List[str]
    condition: str
    photo: str  # image url
    rating: str
    rating_count: str


async def scrape_search(
    query,
    max_pages=1,
    category=0,
    items_per_page=240,
    sort: Literal["best_match", "ending_soonest", "newly_listed"] = "newly_listed",
) -> List[ProductPreviewResult]:
    """Scrape Ebay's search for product preview data for given"""

    def make_request(page):
        url = "https://www.ebay.com/sch/i.html?" + urlencode(
            {
                "_nkw": query,
                "_sacat": category,
                "_ipg": items_per_page,
                "_sop": SORTING_MAP[sort],
                "_pgn": page,
            }
        )
        return ScrapeConfig(url=url, asp=True, country="US")

    first_page = await scrapfly.async_scrape(make_request(page=1))
    results = parse_search(first_page)
    if max_pages == 1:
        return results
    # find total amount of results for concurrent pagination
    total_results = first_page.selector.css(".srp-controls__count-heading>span::text").get()
    total_results = int(total_results.replace(",", ""))
    total_pages = math.ceil(total_results / items_per_page)
    if total_pages > max_pages:
        total_pages = max_pages
    other_pages = [make_request(page=i) for i in range(2, total_pages + 1)]
    async for result in scrapfly.concurrent_scrape(other_pages):
        try:
            results.extend(parse_search(result))
        except Exception as e:
            print(f"failed to scrape search page {result.context['url']}")
    return results


async def run():
    # this example run will scrape search and 2 different products (with variants and without)
    save_dir = Path(__file__).parent.joinpath("results")

    search_results = await scrape_search("iphone", items_per_page=60, max_pages=2)
    save_dir.joinpath("search.json").write_text(json.dumps(search_results, indent=2))

    product_result = await scrape_product("https://www.ebay.com/itm/332562282948")
    save_dir.joinpath("product.json").write_text(json.dumps(product_result, indent=2))

    product_with_variants_result = await scrape_product("https://www.ebay.com/itm/393531906094")
    save_dir.joinpath("product-with-variants.json").write_text(json.dumps(product_with_variants_result, indent=2))


if __name__ == "__main__":
    asyncio.run(run())

Related Posts

How to Scrape Goat.com for Fashion Apparel Data in Python

Goat.com is a rising storefront for luxury fashion apparel items. It's known for high quality apparel data so in this tutorial we'll take a look how to scrape it using Python.

How to Scrape Fashionphile for Second Hand Fashion Data

In this fashion scrapeguide we'll be taking a look at Fashionphile - another major 2nd hand luxury fashion marketplace. We'll be using Python and hidden web data scraping to grap all of this data in just few lines of code.

How to Scrape Vestiaire Collective for Fashion Product Data

In this fashion scrapeguide we'll be taking a look at Vestiaire Collective - one of the biggest 2nd hand luxury fashion marketplaces. We'll be using hiddden web data scraping to scrape data in just a few lines of Python code.