I
I
Ivan Voronin2020-07-19 01:21:17
Python
Ivan Voronin, 2020-07-19 01:21:17

The parser does not change the page, what should I do?

For some reason, the pages do not change, and the program adds the same information to the apt list every time.

import requests
from bs4 import BeautifulSoup
import csv

URL = 'https://www.avito.ru/sankt-peterburg/kvartiry/sdam/2-komnatnye-ASgBAQICAUSSA8gQAUDMCBSQWQ?user=1&f=ASgBAQICAkSSA8gQ8AeQUgFAzAgUkFk'
HEADERS = {'user-agent': '********************************************', 'accept': '*/*'}
FILE = 'apt.csv'

def get_html(url, params=None):
    r = requests.get(url, headers=HEADERS, params=params)
    return r

def get_pages_count(html):
    soup = BeautifulSoup(html, 'html.parser')
    pages = soup.find_all('span', class_='pagination-item-1WyVp')
    if pages:
        return int(pages[-2].get_text())
        print(int(pages[-2].get_text()))
    else:
        return 1

def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_='snippet-horizontal')
    apt = []
    for item in items:
        try:
            apt.append({
                'adress' : item.find('span', class_='item-address__string').get_text(strip=True),
                'metro' : item.find('span', class_='item-address-georeferences-item__content').get_text(strip=True),
                'cost' : item.find(itemprop='price').get('content'),
                'link' : 'https://www.avito.ru/' + item.find('a', class_='snippet-link').get('href')
            })
        except AttributeError:
            pass
    return apt


def save_file(items, path):
    with open(path, 'w', newline='') as file:
        writer = csv.writer(file, delimiter = ';')
        writer.writerow(['Адрес','Метро','Цена','Ссылка'])
        for item in items:
            writer.writerow([item['adress'],item['metro'],item['cost'],item['link']])

def parse():
    html = get_html(URL)
    if html.status_code == 200:
        apt=[]
        pages_count = get_pages_count(html.text)
        for page in range(1, pages_count + 1):
            print(f'Парсинг страницы {page} из {pages_count}...')
            html = get_html(URL, params={'&p': page})
            apt.extend(get_content(html.text))
        save_file(apt, FILE)
        print(apt)
        print(f'Получено {len(apt)} квартир.')
    else:
        print('ERROR')

parse()

Answer the question

In order to leave comments, you need to log in

1 answer(s)
S
soremix, 2020-07-19
@sabaseera

params={'&p': page}
Most likely here is the problem. Remove the ampersand

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question