Answer the question
In order to leave comments, you need to log in
Bs4 stopped seeing text attribute?
Everything was fine, the script parsed the site normally, but once again refused to work with words
line = pagination.text
AttributeError: 'NoneType' object has no attribute 'text'
import requests
from bs4 import BeautifulSoup
import os
import csv
from datetime import datetime
start_time = datetime.now()
URL = 'https://www.avito.ru/murmanskaya_oblast/avtomobili/mitsubishi-ASgBAgICAUTgtg3ymCg?cd=1'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/81.0.4044.138 Safari/537.36', 'accept': '*/*'}
HOST = 'https://www.avito.ru'
FILE = 'Cars.svc'
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_pages_count(html):
soup = BeautifulSoup(html, 'html.parser')
pagination = soup.find('div', class_='pagination-root-2oCjZ')
line = pagination.text
p_count = int(line[-8])
if p_count > 1:
return p_count
else:
return 1
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='snippet-horizontal item item_table clearfix js-catalog-item-enum'
' item-with-contact js-item-extended')
cars = []
for item in items:
cars.append({
'Cars': item.find('a', class_='snippet-link').get_text(),
'Settings': item.find('div', class_='specific-params specific-params_block').get_text().replace('\n ', ''),
'Price': item.find('span', class_='snippet-price').get_text(strip=True).replace('\n ', ''),
'City': item.find('span', class_='item-address-georeferences-item__content').get_text(),
'Link': HOST + item.find('a', class_='snippet-link').get('href'),
})
return cars
def save_file(items, path):
with open(path, 'w', newline='', encoding='UTF-8') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Авто', 'Параметры', 'Цена', 'город', 'ссылка'])
for item in items:
writer.writerow([item['Cars'], item['Settings'], item['Price'], item['City'], item['Link']])
def parse():
html = get_html(URL)
if html.status_code == 200:
cars = []
pages_count = get_pages_count(html.text)
for page in range(1, pages_count):
print(f'Парсинг старницы {page} из {pages_count}...')
html = get_html(URL, params={"p": page})
cars.extend(get_content(html.text))
save_file(cars, FILE)
print(cars)
os.startfile(FILE)
else:
print('Error')
parse()
print(datetime.now() - start_time)
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question