Answer the question
In order to leave comments, you need to log in
It is necessary to parse questions and answers to them from the site, but in order to get to them, you need to go to the discussion of this issue, how to implement it?
It is necessary to parse questions and answers to them from the site, but in order to get to them, you need to go to the discussion of this issue, how to implement it?
Here is the site itself with the necessary section: https://www.avvo.com/topics/landlord-tenant-law/ad... I'm trying to do this: 1) I go to the URL 2) I go to the 1st post, I collect the necessary information , I exit 3) I try to go to the next (does not exit) + everything at the second stage knocks out the error "selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed" The code itself:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException as TE
import time
driver = webdriver.Firefox()
url = "https://www.avvo.com/topics/landlord-tenant-law/advice?order=recency&page=1&search_topic_advice_search[content_type]=Q%26A&search_topic_advice_search[query]=security+deposit&search_topic_advice_search[state]=CA"
def get_url(driver, url):
driver.get(url)
time.sleep(3)
def page_pagination(driver):
divs_pagination = driver.find_elements_by_xpath("//div[@class='col-xs-12 advice-content']/div[@class='js-documents-list gtm-context']/div[@class='v-topic-page-card-list']/div[@class='card topic-advice-question-card']")
for div_pagination in divs_pagination:
div_pagination.click()
time.sleep(2)
def get_content(driver):
# divs = driver.find_elements_by_xpath("//div[@class='col-xs-12 advice-content']/div[@class='js-documents-list gtm-context']/div[@class='v-topic-page-card-list']/div[@class='card topic-advice-question-card']")
# for div in divs:
# div = driver.find_element_by_xpath("//div[@class='card topic-advice-question-card']/div[@class='row']/div[@class='col-xs-12 u-margin-top-half']/a[@class='block-link']").click()
# time.sleep(3)
try:
span = driver.find_element_by_xpath("//div[@class='col-xs-12']/div[@id='qa-body-display']/p[@class='a button btn btn-link u-vertical-padding-0']/span[@class='icon-chevron-down-after-blue']").click()
time.sleep(2)
except:
pass
try:
mores_lawyer = driver.find_elements_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/button[@class='btn btn-link u-vertical-padding-0']")
for more_lawyer in mores_lawyer:
more_lawyer.click()
time.sleep(2)
except:
pass
date_question_post = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-metadata']/div[@class='col-xs-12']/ul[@class='inline-list']/li[@class='text-muted small hidden-xs']")
question_short = driver.find_element_by_xpath("//h1[@itemprop='name']")
question_long = driver.find_element_by_xpath("//p[@itemprop='text']")
name_lawyer = driver.find_element_by_xpath("//span[@itemprop='name']")
lawyer_answer = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/div[@id='answer-body-9088247']")
name_data_question_post = date_question_post.text
name_short = question_short.text
name_long = question_long.text
name_of_lawyer = name_lawyer.text
name_of_lawyer_answer = lawyer_answer.text
print(name_short, "\n" ,name_long, "\n" ,name_data_question_post ,"\nLawyer name: ", name_of_lawyer, "\nLawyer answer:", name_of_lawyer_answer)
return (name_short, name_long, name_of_lawyer, name_data_question_post, name_of_lawyer_answer)
get_url(driver, url)
urls = page_pagination(driver)
for url in urls:
get_content(driver)
get_url(driver, url)
driver.close()
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question