Answer the question
In order to leave comments, you need to log in
There is a code, it works like this, first it gets all the links, and then it starts parsing, how to make it take 1 link and start parsing?
There is a code, it works like this, first it gets all the links, and then it starts parsing, how to make it take 1 link and start parsing? If the account has 1000 pages to go to, it will spend a lot of time filling in the "urls" list, and only then it will start scraping off the information, how can it be redone?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import *
import time
import random
f = open('udemy_titles', 'a', encoding='utf8')
file = open("input_1.txt", 'r')
login_pass_dict = dict()
for x in [line.strip().split(':') for line in file]:
login_pass_dict.update({x[0]: x[1]})
for login, password in login_pass_dict.items():
f.write("\n" + "USERNAME: ")
f.write(login + "\n")
f.write("PASSWORD: ")
f.write(password + "\n")
myProxy = random.choice(open('proxy.txt').readlines())
Proxy_list = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'httpsProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''
})
print(myProxy + "\n")
driver= webdriver.Firefox(proxy=Proxy_list)
driver.get('https://www.udemy.com')
print("GOT URL\n")
time.sleep(5)
driver.find_element_by_xpath("//button[@data-purpose='header-login']").click()
print("OPEN LOGIN FORM\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(login).perform()
print("PRINT MAIL\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(password).perform()
print("PRINT PASSWORD\n")
time.sleep(5)
try:
webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform()
print("AUTORIZATION\n")
time.sleep(6)
driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click()
print("GO TO URL\n")
time.sleep(5)
try:
def parse_pagination(driver):
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
urls = []
for i in range(2, count_page + 1):
urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
time.sleep(2)
return urls
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
except:
pass
try:
div = driver.find_element_by_xpath("//div[@class='pager-label']").text
div_1 = div.split(" ")[-2]
f.write("TOTAL COURSES: " )
f.write(div_1 + "\n")
except:
pass
try:
parse_list(driver)
urls = parse_pagination(driver)
for url in urls:
driver.get(url)
time.sleep(2)
parse_list(driver)
except:
pass
driver.close()
time.sleep(2)
except:
f.write("LOGIN OR PASSWORD IS INCORRECTLY PROVIDED" + "\n")
driver.close()
time.sleep(1)
f.close()
file.close()
def url_parse(driver):
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
for page in range(2, count_page):
urls = driver.get("https://www.udemy.com/home/my-courses/learning/?p=" + str(page))
time.sleep(2)
return urls
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
parse_list(driver)
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
for number_page in range(2, count_page):
parse_list(driver)
url_parse(driver)
Answer the question
In order to leave comments, you need to log in
This is where you get the url
urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
, Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question