I am parsing olx.kx numbers, using ublock origin, after a certain period of time ublock starts blocking the opening, what should I do?

K

kopelev20002020-02-01 21:16:02

Python

kopelev2000, 2020-02-01 21:16:02

I am parsing olx.kx numbers, using ublock origin so that captcha does not pop up, etc., after a certain period of time (about 3-4 pages), ublock starts blocking the opening of numbers, what can I do about it?
The code:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

f = open('text-for-OLX.txt', 'a', encoding='utf8')
urls = open("input.txt", "r")

for url in urls:

    def get_url(driver):
        driver.get(url)
        print("GOT URL")
        time.sleep(3)


    def press_cookie_btn(driver):
        cookie_btn = driver.find_element_by_xpath("//div[@class='topinfo rel']"
                                                  "/a[@class='cookiesBarClose abs close']")
        cookie_btn.click()
        print("COOKIE")
        time.sleep(2)


    def get_content(driver):
        try:
            time.sleep(1)
            # heading = driver.find_element_by_xpath("//div[@class='offer-titlebox']/h1").text
            # description = driver.find_element_by_xpath("//div[@class='clr lheight20 large']").text
            driver.find_element_by_xpath("//span[@class='link spoiler small nowrap']/span").click()
            time.sleep(2)
            # f.write(heading + "& ")
            # f.write(description + "& ")
            try:
                phone = driver.find_element_by_xpath("//strong[@class='fnormal xx-large']").text
                print(phone)
                f.write("Тел: " + phone + '\n')
                time.sleep(1)
            except:
                phone_1 = driver.find_element_by_xpath("//strong[@class='fnormal xx-large']/span[@class='block'][1]").text
                phone_2 = driver.find_element_by_xpath("//strong[@class='fnormal xx-large']/span[@class='block'][2]").text
                print(phone_1, phone_2)
                f.write("Тел: " + phone_1 + phone_2 + '\n')
                time.sleep(1)
        except:
            pass


    def page_pagination(driver):
        ars = driver.find_elements_by_xpath("//a[@class='marginright5 link linkWithHash detailsLink']")
        urls_1 = []
        for ar in ars:
            url_1 = ar.get_attribute("href")
            urls_1.append(url_1)
        for url_2 in urls_1:
            driver.get(url_2)
            time.sleep(1.5)
            get_content(driver)
            time.sleep(1.5)


    def pages_pagination(driver, last_elem):
        page_pagination(driver)
        for i in range(2, int(last_elem)+2):
            driver.get(url+"/?page="+str(i))
            page_pagination(driver)


    def main():
        options = Options()
        options.add_argument('user-agent=Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7')
        options.add_extension("D:\\UB\\cjpalhdlnbpafiamejdnhcphjbkeiagm.crx")
        driver = webdriver.Chrome(options=options)
        driver.implicitly_wait(10)
        get_url(driver)
        press_cookie_btn(driver)
        try:
            last_elem = driver.find_element_by_xpath("//span[@class='item fleft'][last()]/a[@data-cy='page-link-last']/span").text
            print(last_elem)
            pages_pagination(driver, last_elem)
        except:
            page_pagination(driver)
        driver.quit()


    main()

urls.close()
f.close()

Reply

Answer the question

In order to leave comments, you need to log in

2 answer(s)

D

Dr. Bacon, 2020-02-01
@bacon

Bgg, why did you decide that if you block the captcha, the site will not block you for this;)
All your questions are related to spam, so no one will help you much.