Answer the question
In order to leave comments, you need to log in
Parsing a page in python gives an error when creating a csv file?
from bs4 import BeautifulSoup
import requests
import csv
CSV = 'cards.csv'
HOST = 'https://zeon18.ru'
URL = 'https://zeon18.ru/page/search/?name=%D0%A2%D0%B5%D1%80%D0%BC%D0%BE%D0%BF%D0%B0%D1%81%D1%82%D0%B0'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19041'
}
def get_html(url, params = 'params'):
r = requests.get(url, headers = HEADERS, params = params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='catalog-grid-cell')
cards = []
for item in items:
cards.append(
{
'title':item.find('a', class_ = 'catalog-item-title').get_text(),
'brand':item.find('div', class_ = 'catalog-item-price-main').get_text()
}
)
return cards
def save_doc(items, path):
with open(path, "w", newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow()
writer.writerow(['Термопаста', 'Цена'])
for item in items:
writer.writerow([item['title'], item['brand']])
def parser():
PAGENATION = input('Укажите номер: ')
PAGENATION = int(PAGENATION.strip())
html = get_html(URL)
if html.status_code == 200:
cards = []
for p in range(1, PAGENATION):
print(f'Парсим страницу: {p}')
html = get_html(URL, params={'p' : p})
cards.extend(get_content(html.text))
save_doc(cards, CSV)
pass
else:
print('Error')
parser()
Answer the question
In order to leave comments, you need to log in
def save_doc(items, path):
with open(path, "w", newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow()
writer.writerow(['Термопаста', 'Цена'])
for item in items:
writer.writerow([item['title'], item['brand']])
I would do something like this:
import requests
import pandas as pd
from lxml import html
url = 'https://zeon18.ru/page/search/?name=%D0%A2%D0%B5%D1%80%D0%BC%D0%BE%D0%BF%D0%B0%D1%81%D1%82%D0%B0'
tree = html.fromstring(requests.get(url).text)
def get_elements_by_xpath(xpath):
return [
html.tostring(element, method='text', encoding='unicode', with_tail=False)
for element in tree.xpath(xpath)
]
titles = get_elements_by_xpath('.//a[@class="catalog-item-title"]')
prices = get_elements_by_xpath('.//div[@class="catalog-item-price-main"]/span[@class="value"]')
pd.DataFrame(zip(titles, prices), columns=['title', 'price']).to_csv('catalog.csv', index=False)
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question