Answer the question
In order to leave comments, you need to log in
Incorrect display of the rabbit when uploading data to .csv How to solve?
Incorrect display of the rabbit when uploading data to .csv How to solve?
import requests
from bs4 import BeautifulSoup
import csv
import os
URL = "https://cars.av.by/subaru"
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" , "Accept": "*/*"}
FILE = "cars.csv"
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS,)
r.encoding = r.apparent_encoding
return r
def get_pages_count(html):
soup = BeautifulSoup(html, "html.parser")
pagination = soup.find_all("li",class_="pages-arrows-index")
if pagination:
return int(soup.find("li",class_="pages-arrows-index").get_text().replace("1 из ",""))
else:
return 1
def get_content(html):
soup = BeautifulSoup(html, "html.parser")
items = soup.find_all('div', class_="listing-item")
cars = []
for item in items:
cars.append({
"title": item.find("div", class_="listing-item-title").find("a").get_text().replace("\n ","").replace(" ",""),
"link": item.find("div", class_="listing-item-title").find("a").get("href"),
"bny": item.find("div", class_="listing-item-price").find("strong").get_text(),
"usd": item.find("div", class_="listing-item-price").find("small").get_text() + " $",
"сity": item.find("div", class_="listing-item-other").find("p").get_text()
})
return cars
def save_files(items, path):
with open(path, "w", newline="", encoding='utf-8') as file:
writer = csv.writer(file, delimiter=";")
writer.writerow(["Марка", "Ссылка", "Цена в BNY", "Цена в $", "Город"])
for item in items:
writer.writerow([item["title"], item["link"], item["bny"], item["usd"], item["сity"]])
def parse():
URL = input("Введите url: ")
URL = URL.strip()
html = get_html(URL)
print(html.url)
if html.status_code == 200:
cars = []
pages_count = get_pages_count(html.text)
for page in range(1, pages_count + 1):
#html = get_html(URL, params={"page":page})
html = get_html(URL + f'/page/{page}')
print(f"Парсинг страницы {page} из {pages_count}...{html.url}")
cars.extend(get_content(html.text))
# get_content(html.text)
save_files(cars, FILE)
print(cars)
print(f'Получено {len(cars)} автомобилей')
#os.startfile(FILE)
else:
print("Error")
parse()
Answer the question
In order to leave comments, you need to log in
Add the language to the request. But in theory it should give a utf 8 if not, then there are packages for recoding. It is important to understand what initially
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question