Answer the question
In order to leave comments, you need to log in
Working with CSV in Python or importing files?
I welcome everyone!
I practice on the Avito parser, in fact the first one is the ad parser file, with the subsequent saving of information to a csv file, namely: "ad name", "price", "publication date" and "link".
The second one is the file responsible for parsing a picture with a phone number and then converting it to a string using the tesseract-ocr library.
I imported the second file into the first and the problem is to fasten it there competently and efficiently.
So that the first file, when it reaches the first ad, opens it and also parses the picture with the number, with further translation into text, as well as saving it to the same csv plate.
Tell me how to tie it there, or is there an option to work with csv files so that in the second file I can refer to the csv table, where the links to the ads have already been parsed?
Many thanks in advance to all who respond! Do not throw tomatoes, green is still in this case :)
The first file.
import requests
from bs4 import BeautifulSoup
import csv
from number import Bot
def get_html(url):
r = requests.get(url)
return r.text
def get_total_pages(html):
soup = BeautifulSoup(html, 'lxml')
pages = soup.find('div', class_='pagination-pages').find_all('a', class_='pagination-page')[-1].get('href')
total_pages = pages.split('=')[1].split('&')[0]
return int(total_pages)
def write_csv(data):
with open('avito.csv', 'a', errors='ignore') as f:
writer = csv.writer(f, delimiter=';')
writer.writerow( (data['title'],
data['price'],
data['published'],
data['url']))
def get_page_data(html):
soup = BeautifulSoup(html, 'lxml')
ads = soup.find('div', class_='js-catalog_serp').find_all('div', class_='item_table')
for ad in ads:
name = ad.find('div', class_='description').find('h3').text.strip().lower()
if 'бытовка' in name:
try:
title = ad.find('div', class_='description').find('h3').text.strip()
#print(title)
except:
title = ''
try:
url = 'https://www.avito.ru' + ad.find('div', class_='description').find('h3').find('a').get('href').strip()
print(url)
except:
url = ''
try:
price = ad.find('div', class_='about').text.strip()
print(price)
except:
price = ''
try:
published = ad.find('div', class_='item-date').text.strip()
print(published)
except:
published = ''
data = {'title': title,
'price': price,
'published': published,
'url': url}
write_csv(data)
else:
continue
def main():
url = "https://www.avito.ru/irkutsk?q=%D0%B2%D0%B0%D0%B3%D0%BE%D0%BD%D1%87%D0%B8%D0%BA+%D0%B1%D1%8B%D1%82%D0%BE%D0%B2%D0%BA%D0%B0&p=1"
base_url = 'https://www.avito.ru/irkutsk?'
category_part = 'q=%D0%B2%D0%B0%D0%B3%D0%BE%D0%BD%D1%87%D0%B8%D0%BA+%D0%B1%D1%8B%D1%82%D0%BE%D0%B2%D0%BA%D0%B0&'
page_part = 'p='
total_pages = get_total_pages(get_html(url))
for i in range(1, total_pages + 1):
url_gen = base_url + category_part + page_part + str(i)
print(url_gen)
html = get_html(url_gen)
get_page_data(html)
if __name__ == '__main__':
main()
from selenium import webdriver
from time import sleep
from PIL import Image
from pytesseract import image_to_string
class Bot(object):
def __init__(self):
self.driver = webdriver.Firefox()
self.navigate()
def take_screenshot(self):
self.driver.save_screenshot('avito_screenshot.png')
def vagon_recon(self):
image = Image.open('vagon.gif')
print(image_to_string(image))
def crop(self, location, size):
image = Image.open('avito_screenshot.png')
x = location['x']
y = location['y']
width = size['width']
height = size['height']
image.crop((x, y, x + width, y + height)).save('vagon.gif')
self.vagon_recon()
def navigate(self):
self.driver.get('')
button = self.driver.find_element_by_xpath('//button[@class="button-button-2Fo5k button-size-l-3LVJf button-primary-1RhOG width-width-12-2VZLz"]')
button.click()
sleep(3)
self.take_screenshot()
image = self.driver.find_element_by_class_name('contacts-phone-3KtSI')
location = image.location
size = image.size
self.crop(location, size)
def main():
b = Bot()
if __name__ == '__main__':
main()
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question