Answer the question
In order to leave comments, you need to log in
How to eliminate parsing of the same picture?
Hello!
I wrote a code for parsing url-images, but the problem is that the same image is parsed. As I understand it, the value of index does not change when parsing? How can this be fixed? Tell me please.
import requests
from bs4 import BeautifulSoup
import json
URL = 'https://www.luscious.net/albums/chikan-densha_268925/read/?index=0&view=slideshow&sorting=rating_all_time'
HEADERS = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent': '...'
}
def get_html(url, params = ' '):
r = requests.get(url, headers = HEADERS, params = params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_ = 'o-flex-column-center')
img_url = []
for item in items:
img_url.append(item.find('div', class_ = 'o-flex-center picture-frame-wrapper').find('img').get('src')) # получение url картинки
return img_url
def parser():
PAGENATION = input('Введите кол-во картинок: ') # ввод до какого index идёт парсинг
PAGENATION = int(PAGENATION.strip())
html = get_html(URL)
if html.status_code == 200:
img_url = []
for index in range(1, PAGENATION):
print(f'Парсим {index} картинку')
html = get_html(URL, params= {'index': index}) # получение html и смена значения index
img_url.extend(get_content(html.text))
with open('test.txt', 'w') as f: # запись результата
f.write(json.dumps(img_url))
else:
print('error')
parser()
Answer the question
In order to leave comments, you need to log in
And what exactly should you do here
He also add one more index parameter to the URL
html = get_html(URL, params= {'index': index})
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question