A
A
Andrei Sayevich2020-06-02 18:39:29
Python
Andrei Sayevich, 2020-06-02 18:39:29

Incorrect display of the rabbit when uploading data to .csv How to solve?

Incorrect display of the rabbit when uploading data to .csv How to solve?

import requests
from bs4 import BeautifulSoup
import csv
import os

URL = "https://cars.av.by/subaru"
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" , "Accept": "*/*"}
FILE = "cars.csv"

def get_html(url, params=None):
    r = requests.get(url, headers=HEADERS,)
    r.encoding = r.apparent_encoding
    return r


def get_pages_count(html):
    soup = BeautifulSoup(html, "html.parser")
    pagination = soup.find_all("li",class_="pages-arrows-index")
    if pagination:
        return int(soup.find("li",class_="pages-arrows-index").get_text().replace("1 из ",""))
    else:
        return 1


def get_content(html):
    soup = BeautifulSoup(html, "html.parser")
    items = soup.find_all('div', class_="listing-item")
    cars = []
    for item in items:
        cars.append({
            "title":    item.find("div", class_="listing-item-title").find("a").get_text().replace("\n                            ","").replace("                        ",""),
            "link":     item.find("div", class_="listing-item-title").find("a").get("href"),
            "bny":      item.find("div", class_="listing-item-price").find("strong").get_text(),
            "usd":      item.find("div", class_="listing-item-price").find("small").get_text() + " $",
            "сity":     item.find("div", class_="listing-item-other").find("p").get_text()
        })
    return cars

def save_files(items, path):
    with open(path, "w", newline="", encoding='utf-8') as file:
        writer = csv.writer(file, delimiter=";")
        writer.writerow(["Марка", "Ссылка", "Цена в BNY", "Цена в $", "Город"])
        for item in items:
            writer.writerow([item["title"], item["link"], item["bny"], item["usd"], item["сity"]])


def parse():
    URL = input("Введите url: ")
    URL = URL.strip()
    html = get_html(URL)
    print(html.url)
    if html.status_code == 200:
        cars = []
        pages_count = get_pages_count(html.text)
        for page in range(1, pages_count + 1):
            #html = get_html(URL, params={"page":page})
            html = get_html(URL + f'/page/{page}')
            print(f"Парсинг страницы {page} из {pages_count}...{html.url}")
            cars.extend(get_content(html.text))
            # get_content(html.text)

        save_files(cars, FILE)
        print(cars)
        print(f'Получено {len(cars)} автомобилей')
        #os.startfile(FILE)
    else:
        print("Error")
parse()

Answer the question

In order to leave comments, you need to log in

2 answer(s)
V
Vladimir Korotenko, 2020-06-02
@firedragon

Add the language to the request. But in theory it should give a utf 8 if not, then there are packages for recoding. It is important to understand what initially

S
soremix, 2020-06-02
@SoreMix

I have the correct file created
5ed678974b756984208471.jpeg

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question