Answer the question
In order to leave comments, you need to log in
How to keep track of changes on the site?
There is a task - you need to be the first to receive a fresh ad on Avito.
I don't know Python as a language. But he still managed to throw something on his own.
I settled on this - how now to keep track of when a new ad appears? (We are talking about seconds, you need to get the data of a newly arrived ad in a few seconds)
In which direction to dig?
Here is the code:
import requests
from bs4 import BeautifulSoup
URL = 'https://www.avito.ru/novosibirsk/kvartiry/prodam-ASgBAgICAUSSA8YQ?cd=1&f=ASgBAQICAUSSA8YQAUCQvg0Ulq41&proprofile=1&s=104'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36', 'accept': '*/*'}
HOST = 'https://www.avito.ru'
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
item = soup.find('div', class_='item__line')
aparts = []
# for item in items:
M = item.find('span', class_='snippet-link-name').get_text(strip=True).find('м')
SLASH = item.find('span', class_='snippet-link-name').get_text(strip=True).find('/')
STREET = item.find('span', class_='item-address__string').get_text(strip=True)
house = item.find('span', class_='item-address__string').get_text(strip=True).replace('д. ', '').replace('стр. ', '')
rooms = item.find('span', class_='snippet-link-name').get_text(strip=True)[:1]
meters = item.find('span', class_='snippet-link-name').get_text(strip=True)
if rooms == 'К':
rooms = '1'
meters = meters[17:M-1]
else:
meters = meters[14:M-1]
if STREET.find('у') == 0:
street = item.find('span', class_='item-address__string').get_text(strip=True).split(', ')[0].replace('ул. ', '')
else:
street = item.find('span', class_='item-address__string').get_text(strip=True).split(', ')[-2].replace(' ул.', '')
aparts.append({
'price': item.find('span', class_='snippet-price').get_text(strip=True).replace(' ₽', '').replace(' ', '')[:-3],
'rooms': rooms,
'meters': meters,
'floor': item.find('span', class_='snippet-link-name').get_text(strip=True)[M+4:SLASH],
'street': street,
'house': house.split(', ')[-1],
'link': HOST + item.find('a', class_='snippet-link').get('href'),
})
print(aparts)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('Error')
parse()
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question