Answer the question
In order to leave comments, you need to log in
How to fix a problem in Avito parsing?
There was a problem with parsing Avito.
from tqdm import tqdm
from bs4 import BeautifulSoup
import asyncio
from scripts import config, search, excel, headers, asyncfetch, Files
import os, sys
import time, random
import requests
class Counter(object):
def __init__(self, startingValue: int) -> None:
self.counter = startingValue
def increaseCounter(self) -> None:
self.counter += 1
async def makeRequest(f: asyncfetch.Fetch, url: str, mode: str):
rFlag = False
r = None
while not rFlag:
try:
cProxy = 'http://{}'.format(random.choice(proxyList))
result = await f.fetch(url=url, mode=mode, headers=random.choice(headers.headers), proxy=cProxy)
print(time.strftime(config.settings['timeFormat'], time.localtime()),
'Made request. Status: {}'.format(result[1]))
if result[1] == 200:
r = result[0]
rFlag = True
except Exception as e:
print(time.strftime(config.settings['timeFormat'], time.localtime()),
'Made request. Status: Error. {}'.format(e))
return r
async def findMaxPage(f: asyncfetch.Fetch, url:str, mode:str):
return await makeRequest(f, url, mode=mode)
pageCounter = Counter(1)
itemsCounter = Counter(1)
f = asyncfetch.Fetch()
async def main():
maxPage = await findMaxPage(f, 'https://www.avito.ru/', mode='text')
# print(maxPage)
if __name__ == '__main__':
print(time.strftime(config.settings['timeFormat'], time.localtime()), 'Started.')
finishTime = time.strftime('%a-%b-%Y-%H-%M', time.localtime())
print(time.strftime(config.settings['timeFormat'], time.localtime()), 'Initializing...')
proxyList = Files.Files.readFile(config.PROXIES)
policy = asyncio.WindowsSelectorEventLoopPolicy()
asyncio.set_event_loop_policy(policy)
loop = asyncio.get_event_loop()
print(time.strftime(config.settings['timeFormat'], time.localtime()), 'Initialized.')
loop.run_until_complete(main())
print(time.strftime(config.settings['timeFormat'], time.localtime()), 'Authorization done.')
# links = [
# search.link + '&p=' + str(i) for i in range(1, int(maxPage) + 1)
# ]
# print(links)
path = os.path.join(os.getcwd(), 'avito-{}.csv'.format(finishTime))
items = []
print(time.strftime(config.settings['timeFormat'], time.localtime()), 'Finished. {}'.format(path))
[17:36:41] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://122.232.151.67:8085')
[17:36:42] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://36.22.141.194:8908')
[17:36:45] Made request. Status: Bad proxy. Cannot connect to host 49.82.27.184:8089 ssl:default [Connect call failed ('49.82.27.184', 8089)]
[17:36:46] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://36.22.163.47:8908')
[17:37:07] Made request. Status: Bad proxy. Cannot connect to host 113.11.20.187:8080 ssl:default [Connect call failed ('113.11.20.187', 8080)]
[17:37:14] Made request. Status: Bad proxy. Cannot connect to host www.avito.ru:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1124)')]
[17:37:14] Made request. Status: Bad proxy. Server disconnected
[17:37:16] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://175.17.182.45:8085')
[17:37:18] Made request. Status: 403
[17:37:19] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://60.169.221.188:8908')
[17:37:26] Made request. Status: Bad proxy. [WinError 10054] An existing connection was forcibly closed by the remote host
[17:37:40] Made request. Status: 403
[17:38:14] Made request. Status: 403
[17:38:15] Made request. Status: Bad proxy. [WinError 10054] An existing connection was forcibly closed by the remote host
[17:38:16] Made request. Status: 403
[17:38:17] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://199.19.225.250:80')
[17:38:38] Made request. Status: Bad proxy. Cannot connect to host 114.233.70.65:9000 ssl:default [Connect call failed ('114.233.70.65', 9000)]
[17:38:44] Made request. Status: 403
[17:38:50] Made request. Status: 403
[17:38:57] Made request. Status: 403
[17:38:58] Made request. Status: 403
[17:38:59] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://199.19.225.250:80')
[17:39:20] Made request. Status: Bad proxy. Cannot connect to host 27.153.140.36:23180 ssl:default [Connect call failed ('27.153.140.36', 23180)]
[17:39:20] Made request. Status: Bad proxy. 400, message='Bad Request', url=URL('http://115.46.81.147:8085')
[17:39:42] Made request. Status: Bad proxy. Cannot connect to host 89.140.125.17:80 ssl:default [Connect call failed ('89.140.125.17', 80)]
[17:39:42] Made request. Status: 403
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question