It is necessary to log in to the site from different accounts and parse course titles (which are on these accounts), how to do it?

K

kopelev20002019-02-09 19:58:34

Python

kopelev2000, 2019-02-09 19:58:34

The essence of the question is:
1) The script logs in, enters the section with courses and then extracts everything that is needed, writing it all to a .txt file.
2) It is necessary to make the script take the login and password from the .txt file (login and password are located there, like this:
login:password
login_1:password_1
login_n:password_n), parse everything you need, then switch to a new account, in case of occurrence errors, the account was skipped and the script was transferred to a new account.
Roughly speaking, the 1st stage was successfully completed, but at the expense of the second, everything is not entirely good. Like I can't think of anything. Thanks in advance

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

print("ENTER MAIL: ")
mail = input()
print("ENTER PASSWORD: ")
pas = input()

f = open('udemy_titles','w', encoding='utf8')
f.write("USERNAME: ")
f.write(mail + "\n")
f.write("PASSWORD: ")
f.write(pas + "\n")

def parse_pagination(driver):
    try:
        ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
        li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
        count_page = int(li_pagination.find_element_by_css_selector("a").text)
        urls = []
        for i in range(2, count_page + 1):
            urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
            time.sleep(2)
        return urls
    except:
        pass

def parse_list(driver):
    try:
        div_card_wrapper = WebDriverWait(driver, 10).until(
             EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
        div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
        for div_card in div_cards:
            a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
            name = a.text
            print(name)
            f.write(name + "\n")
    except:
        pass

driver= webdriver.Firefox()
driver.get('https://www.udemy.com')
print("\nGOT URL\n")
time.sleep(5)

driver.find_element_by_xpath("//button[@data-purpose='header-login']").click()
print("OPEN LOGIN FORM\n")
time.sleep(5)

webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(mail).perform()
print("PRINT MAIL\n")
time.sleep(5)

webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(pas).perform()
print("PRINT PASSWORD\n")
time.sleep(5)

webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform()
print("AUTORIZATION\n")
time.sleep(5)

driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click()
print("GO TO URL\n")
time.sleep(5)

try:
    div  = driver.find_element_by_xpath("//div[@class='pager-label']").text
    div_1 = div.split(" ")[-2]
    f.write("TOTAL COURSES: " )
    f.write(div_1 + "\n")
except:
    pass

try:
    parse_list(driver)
    urls = parse_pagination(driver)
    for url in urls:
        driver.get(url)
        parse_list(driver)
except:
    pass

f.close()

driver.close()
time.sleep(2)

Reply

Answer the question

In order to leave comments, you need to log in

1 answer(s)

D

Dimonchik, 2019-02-09
@kopelev2000

Like I can't think of anything.

can't read from a file?