A
A
Alex_Kuzen2021-10-31 23:40:26
Python
Alex_Kuzen, 2021-10-31 23:40:26

Why isn't the parser working?

import requests
from bs4 import BeautifulSoup
import csv

URL = 'https://www.olx.kz/transport/moto/alma-ata/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0',
           'accept': '*/*'}

FILE_CSV = 'motocycles.csv'



def get_html(url,params= None):
    r = requests.get(url, headers= HEADERS,params = params )
    return r


def get_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    pagenation = soup.find_all('span', class_='item')
    if pagenation:
        return int(pagenation[-1].get_text())
    else:
        return 1


def get_content(html):
    soup = BeautifulSoup(html,'html.parser')
    items = soup.find_all('tr',class_= 'wrap')
    moto = []
    for item in items:
        moto.append({
            'title': item.find('h3', class_='lheight22 margintop5').get_text(strip = True),
            'price': item.find('p', class_='price').get_text(strip=True),
            'link': item.find('a', class_='link').get('href'),
            'Adress-time': item.find('td', class_='bottom-cell').get_text(strip = True),

        })

    return moto


def save_csv(items, path):
    with open (path,'w', newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow(['Марка', 'Цена', 'Ссылка', 'Адрес-Время'])
        for item in items:
            writer.writerow([item['title'], item['price'],item['link'],item['Adress-time']])

def pars():
    html = get_html(URL)
    if html.status_code == 200:
        motos_while = []
        #moto = get_content(html.text)
        pages = get_page(html.text)
        for page in range(1,pages + 1 ):
            print(f'Парсинг страницы{page} из {pages}')
            html = get_html(URL,params ={'page': page})
            motos_while.extend(get_content(html.text))
        save_csv(motos_while, FILE_CSV)
        print(motos_while)

    else:
        print('error')

pars()


Gives this error
Traceback (most recent call last):
File "C:\Users\AlexK\PycharmProjects\numpy\main.py", line 66, in
pars()
File "C:\Users\AlexK\PycharmProjects\numpy\main.py" , line 60, in pars
save_csv(motos_while, FILE_CSV)
File "C:\Users\AlexK\PycharmProjects\numpy\main.py", line 48, in save_csv
writer.writerow([item['title'], item[' price'],item['link'],item['Adress-time']])
File "C:\Users\AlexK\AppData\Local\Programs\Python\Python39\lib\encodings\cp1251.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u0493' in position 20:character maps to


I have no idea why.

Answer the question

In order to leave comments, you need to log in

1 answer(s)
S
Sergey Gornostaev, 2021-10-31
@Alex_Kuzen

with open (path,'w', newline='', encoding='utf-8' ) as file:

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question