B
B
Banki002020-05-17 00:58:06
Python
Banki00, 2020-05-17 00:58:06

Bs4 stopped seeing text attribute?

Everything was fine, the script parsed the site normally, but once again refused to work with words

line = pagination.text
AttributeError: 'NoneType' object has no attribute 'text'


Restart didn't help...

import requests
from bs4 import BeautifulSoup
import os
import csv
from datetime import datetime


start_time = datetime.now()

URL = 'https://www.avito.ru/murmanskaya_oblast/avtomobili/mitsubishi-ASgBAgICAUTgtg3ymCg?cd=1'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/81.0.4044.138 Safari/537.36', 'accept': '*/*'}
HOST = 'https://www.avito.ru'
FILE = 'Cars.svc'


def get_html(url, params=None):
    r = requests.get(url, headers=HEADERS, params=params)
    return r


def get_pages_count(html):
    soup = BeautifulSoup(html, 'html.parser')
    pagination = soup.find('div', class_='pagination-root-2oCjZ')
    line = pagination.text
    p_count = int(line[-8])
    if p_count > 1:
        return p_count
    else:
        return 1


def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_='snippet-horizontal item item_table clearfix js-catalog-item-enum'
                                        ' item-with-contact js-item-extended')

    cars = []
    for item in items:
        cars.append({
            'Cars': item.find('a', class_='snippet-link').get_text(),
            'Settings': item.find('div', class_='specific-params specific-params_block').get_text().replace('\n ', ''),
            'Price': item.find('span', class_='snippet-price').get_text(strip=True).replace('\n ', ''),
            'City': item.find('span', class_='item-address-georeferences-item__content').get_text(),
            'Link': HOST + item.find('a', class_='snippet-link').get('href'),
        })

    return cars


def save_file(items, path):
    with open(path, 'w', newline='', encoding='UTF-8') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow(['Авто', 'Параметры', 'Цена', 'город', 'ссылка'])
        for item in items:
            writer.writerow([item['Cars'], item['Settings'], item['Price'], item['City'], item['Link']])


def parse():
    html = get_html(URL)
    if html.status_code == 200:
        cars = []
        pages_count = get_pages_count(html.text)
        for page in range(1, pages_count):
            print(f'Парсинг старницы {page} из {pages_count}...')
            html = get_html(URL, params={"p": page})
            cars.extend(get_content(html.text))
        save_file(cars, FILE)
        print(cars)
        os.startfile(FILE)
    else:
        print('Error')


parse()
print(datetime.now() - start_time)

Answer the question

In order to leave comments, you need to log in

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question