Answer the question
In order to leave comments, you need to log in
The problem of moving from page to page when parsing?
Here is the code:
import requests
from bs4 import BeautifulSoup
import csv
import os
URL = 'https://rezka.ag/?filter=last&genre=1'
HEADERS = {'user-agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'}
FILE = 'muvies.csv'
user_param_y = int(input('Введите год:\n '))
user_param_j = input('Введите жанр. Пример: Комедии\n ')
pages = int(input('Введите кол-во страниц:\n'))
FILE = 'musics.csv'
def get_html(url, params = None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='b-content__inline_item')
muvies = []
for item in items:
parametr = item.find('div', class_='b-content__inline_item-link').find_next('div').get_text()
name1 = item.find('div', class_='b-content__inline_item-link').find_next('a').get_text()
parametr1 = parametr.split(', ')
muvie_y = int(parametr1[0])
muvie_j = parametr1[2]
if user_param_y == muvie_y and user_param_j == muvie_j :
muvies.append({
'name': name1,
'param' : parametr,
})
return muvies
def save_file(items, path):
with open(path, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Год', 'Данные'])
for item in items:
writer.writerow([item['name'], item['param']])
def parse():
html = get_html(URL)
if html.status_code == 200:
muviee = []
for page in range(1, pages + 1):
print(f'Парсинг страницы {page} из {pages}...')
html = get_html(URL, params={'page': page})
muviee.extend(get_content(html.text))
print(f'Получено {len(muviee)} фильмов')
print(muviee)
#save_file(muviee, FILE)
#os.startfile(FILE)
else:
print('Error')
parse()
Answer the question
In order to leave comments, you need to log in
The transition through the pages is carried out not through the "page" parameter, but by editing the uri.
def parse():
url = 'https://rezka.ag/page/{}/?filter=last&genre=1'
muviee = []
for page in range(1, pages+1):
print('Парсинг страницы', page, 'из', pages)
r = get_html(url.format(page))
if r.status_code == 200:
muviee.extend(get_content(r.text))
print('Получено', len(muviee), 'фильмов')
else:
print('Error')
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question