Parsing a page in python gives an error when creating a csv file?

I

Ivan Koryakin2020-11-10 16:43:21

Python

Ivan Koryakin, 2020-11-10 16:43:21

from bs4 import BeautifulSoup
import requests
import csv

CSV = 'cards.csv'
HOST = 'https://zeon18.ru'
URL = 'https://zeon18.ru/page/search/?name=%D0%A2%D0%B5%D1%80%D0%BC%D0%BE%D0%BF%D0%B0%D1%81%D1%82%D0%B0'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19041'
}
def get_html(url, params = 'params'):
    r = requests.get(url, headers = HEADERS, params = params)
    return r

def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_='catalog-grid-cell')

    cards = []
    
    for item in items:
        cards.append(
            {
            'title':item.find('a', class_ = 'catalog-item-title').get_text(),
            'brand':item.find('div', class_ = 'catalog-item-price-main').get_text()
     
            }
        )
    return cards

def save_doc(items, path):
    with open(path, "w", newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow()
        writer.writerow(['Термопаста', 'Цена'])
        for item in items:
            writer.writerow([item['title'], item['brand']])

def parser():
    PAGENATION = input('Укажите номер: ')
    PAGENATION = int(PAGENATION.strip())
    html = get_html(URL)
    if html.status_code == 200:
        cards = []
        for p in range(1, PAGENATION):
            print(f'Парсим страницу:  {p}')
            html = get_html(URL, params={'p' : p})
            cards.extend(get_content(html.text))
            save_doc(cards, CSV)
        pass
    else: 
        print('Error')
    
parser()

<Error>
Traceback (most recent call last):
File "C:\python\main.py", line 53, in
parser()
File "C:\python\main.py", line 48, in parser
save_doc(cards , CSV)
File "C:\python\main.py", line 33, in save_doc
writer.writerow()
TypeError: writer.writerow() takes exactly one argument (0 given)
Error>

Reply

Answer the question

In order to leave comments, you need to log in

2 answer(s)

S

sswwssww, 2020-11-10
@valera228822

def save_doc(items, path):
    with open(path, "w", newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow()
        writer.writerow(['Термопаста', 'Цена'])
        for item in items:
            writer.writerow([item['title'], item['brand']])

- from here, remove the extra call to writer.writerow()
"PermissionError: [Errno 13] Permission denied: 'cards.csv'" - judging by the error, you do not have enough permissions to write. Try running the script as an administrator, or change the path where you write the file to one that does not require special rights.

A

Andrey Dugin, 2020-11-13
@adugin

I would do something like this:

import requests
import pandas as pd
from lxml import html

url = 'https://zeon18.ru/page/search/?name=%D0%A2%D0%B5%D1%80%D0%BC%D0%BE%D0%BF%D0%B0%D1%81%D1%82%D0%B0'

tree = html.fromstring(requests.get(url).text)

def get_elements_by_xpath(xpath):
    return [
        html.tostring(element, method='text', encoding='unicode', with_tail=False)
        for element in tree.xpath(xpath)            
    ]

titles = get_elements_by_xpath('.//a[@class="catalog-item-title"]')
prices = get_elements_by_xpath('.//div[@class="catalog-item-price-main"]/span[@class="value"]')

pd.DataFrame(zip(titles, prices), columns=['title', 'price']).to_csv('catalog.csv', index=False)