Answer the question
In order to leave comments, you need to log in
Python invalid literal for int() with base 10: ''?
import requests
from bs4 import BeautifulSoup
from time import sleep
print('Telegram Parser v1.4\nCreator: vk.com/lucifer\nLast update: 07.03.2021\n')
print('\nЗапускаю бота...\n')
class code():
def __init__(self):
with open('cookie.txt',mode='r') as file2:
cookie = file2.read()
self.headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0",
"Cookie":cookie
}
self.session = requests.session()
self.page_count = int(self.count_page())
self.parse_page()
def zapis_in_file(self,telegram):
with open('list.txt',mode='a') as file:
file.write(f'{telegram}\n')
def parse_tg_from_profile(self,massiv):
for b in massiv:
try:
info2 = self.session.get(b,headers=self.headers).text
main_2 = BeautifulSoup(info2,'lxml')
simp = main_2.find_all('div',class_='count')[1].get_text()
if int(simp) >= 180:
telega = main_2.find_all('a',rel="nofollow noopener")
if str(telega) == '[]':
pass
else:
if 'tg:' in str(telega[0]):
self.zapis_in_file(telega[0]['href'].split('=')[1])
print('{} | {} симпатий | TG: {}'.format(b,simp,telega[0]['href'].split('=')[1]))
elif len(telega) == 2:
if 'tg:' in str(telega[1]):
self.zapis_in_file(telega[1]['href'].split('=')[1])
print('{} | @{}'.format(b,telega[1]['href'].split('=')[1]))
sleep(0.8)
except:
sleep(15)
def parse_page(self):
for stranitsa in range(1,self.page_count+1):
try:
info = self.session.get(f'https://lolz.guru/online/?type=registered&page={stranitsa}',headers=self.headers)
main_2 = BeautifulSoup(info.text,'lxml')
links_to_profile = main_2.find_all('a',class_='username StatusTooltip')
links_lolz = []
for link in links_to_profile:
links_lolz.append('https://lolz.guru/{}'.format(link['href']))
self.parse_tg_from_profile(links_lolz)
except:
sleep(15)
def count_page(self):
page_count = self.session.get('https://lolz.guru/online/?type=registered&page=1',headers=self.headers)
main_text = BeautifulSoup(page_count.text,'лксмл')
result = main_text.find_all('a',_class="")[81].get_text()
print('Получено {} страниц. Начинаю парсинг:'.format(result))
return result
code()
Answer the question
In order to leave comments, you need to log in
disabled cookies
changed "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
This method got the result
def count_page(self):
page_count = self.session.get('https://lolz.guru/online/?type=registered&page=1',headers=self.headers)
print(page_count.text)
<!doctype html><html><head><script src="/process-qv9ypsgmv9.js"></script></head><body><script>window.onload=function(){process();}</script><noscript><p>Please enable JavaScript and Cookies in your browser.</p></noscript></body></html>
<a>
import requests
from multiprocessing.dummy import Pool as ThreadPool
url = 'https://lolz.guru/online/?type=registered&page='
# Не забудь хедеры и куки прикрутить, а то точно работать не будет
urls = [url+str(i) for i in range(1,8)]# С первой по максимальную страницу
print(urls)
def get_url(url):
r = requests.get(url)
print(r.text)
pool = ThreadPool(20) # Кол-во потоков - по числу ядер, но можно забить и больше, просто работать будет по кол-ву ядер
results = pool.map(get_url, urls)
pool.close()
pool.join()
1) As an option, it's better to write code in python's native language. It will work better if the line: lxml is replaced by lxml
2)
main_text = BeautifulSoup(page_count.text,'лксмл')
result = main_text.find_all('a',_class="")[81].get_text()
I have an IndexError here, most likely due to the fact that the cookie.txt file is empty. This is not a very good method of finding links.
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question