Apr-21-2021, 04:38 PM
Hi,
I am writing this code for scraping off of a website into an excel spreadsheet, i am having an issue where the website doesn't use a list of the same length and so it means that I get an attribute error for the find_next function, wondering if anyone knows of a workaround.
My coding is a bit of mess
I am writing this code for scraping off of a website into an excel spreadsheet, i am having an issue where the website doesn't use a list of the same length and so it means that I get an attribute error for the find_next function, wondering if anyone knows of a workaround.
My coding is a bit of mess
import requests from bs4 import BeautifulSoup import pandas as pd page_number = 1 url = 'https://www.autotrader.co.uk/car-search?advertClassification=standard&postcode=la94py&onesearchad=Used&onesearchad=Nearly%20New&onesearchad=New&advertising-location=at_cars&is-quick-search=TRUE&include-delivery-option=on&page=' agent = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'} car_spec = [] car_age = [] car_style = [] car_mileage =[] car_engine_size =[] car_BHP = [] price_lst = [] car_detail = [] car_gearbox_style = [] car_fuel_type = [] car_next=[] while page_number < 100: all_car = [] page_number += 1 pg_no = str(page_number) print(page_number) url2= url+pg_no response = requests.get(url2, headers=agent) soup = BeautifulSoup(response.content, 'lxml') car_elements = soup.find_all('div', class_='product-card-content__car-info') for tag in car_elements: price = tag.find('div', class_='product-card-pricing__price') price_lst.append(price.text.strip()) for tag in car_elements: car = tag.find('h3', class_='product-card-details__title') car_detail.append(car.text.strip()) for tag in car_elements: car = tag.find('li', class_='atc-type-picanto--medium') if car is None: car='0' car_age.append(car) else: car_age.append(car.text) car = tag.find('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium') if car is None: car='0' car_style.append(car) else: car_style.append(car.text) car = tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium') if car is None: car='0' car_mileage.append(car) else: car_mileage.append(car.text) car = tag.find('li',class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium') if car is None: car='0' car_engine_size.append(car) else: car_engine_size.append(car.text) car= tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium') if car is None: car='0' car_gearbox_style.append(car) else: car_gearbox_style.append(car.text) car = tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium') if car is None: car='0' car_fuel_type.append(car) else: car_fuel_type.append(car.text) all_car = zip(car_detail, price_lst, car_age,car_style,car_mileage,car_engine_size,car_gearbox_style,car_fuel_type) # Create the pandas DataFrame df = pd.DataFrame(all_car) df.to_excel("car_info.xlsx", index=False, sheet_name='car_info')