How to eliminate parsing of the same picture?

N

NewUser99012020-12-17 14:32:08

Python

NewUser9901, 2020-12-17 14:32:08

Hello!
I wrote a code for parsing url-images, but the problem is that the same image is parsed. As I understand it, the value of index does not change when parsing? How can this be fixed? Tell me please.

import requests
from bs4 import BeautifulSoup
import json

URL = 'https://www.luscious.net/albums/chikan-densha_268925/read/?index=0&view=slideshow&sorting=rating_all_time' 
HEADERS = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'user-agent': '...'
}

def get_html(url, params = ' '):
    r = requests.get(url, headers = HEADERS, params = params)
    return r

def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_ = 'o-flex-column-center')
    img_url = []
    for item in items:
        img_url.append(item.find('div', class_ = 'o-flex-center picture-frame-wrapper').find('img').get('src')) # получение url картинки

    return img_url

def parser():
    PAGENATION = input('Введите кол-во картинок: ') # ввод до какого index идёт парсинг
    PAGENATION = int(PAGENATION.strip())
    html = get_html(URL)
    if html.status_code == 200:
        img_url = []
        for index in range(1, PAGENATION):
            print(f'Парсим {index} картинку')
            html = get_html(URL, params= {'index': index}) # получение html и смена значения index
            img_url.extend(get_content(html.text))
        with open('test.txt', 'w') as f: # запись результата
            f.write(json.dumps(img_url))
    else:
        print('error')

parser()

Reply

Answer the question

In order to leave comments, you need to log in

1 answer(s)

D

Denis Melnikov, 2020-12-17
@Mi11er

And what exactly should you do here He also add one more index parameter to the URL
html = get_html(URL, params= {'index': index})