Answer the question
In order to leave comments, you need to log in
Python only parse last value?
#!/home/artddss/parse/bin/python3.4
from urllib.request import urlopen
from lxml.etree import XMLSyntaxError
from lxml.html import fromstring
from pandas import DataFrame, ExcelWriter
from urllib.parse import urljoin
URL = 'http://test.exete.ru/Aastra.html'
PAG_PATH = 'a'
DESR_PATH = '.prdbrief_name'
ART_PATH = '.prdbrief_name i'
def parse_soud():
f = urlopen(URL) # Открывает http://test.exete.ru/Aastra.html
list_html = f.read().decode('utf-8')
list_doc = fromstring(list_html)
df = DataFrame(columns=('zagalovok', 'articul'))
for elem in list_doc.cssselect(PAG_PATH): #переходит по сылкам
a = elem.cssselect('a')[0]
href = a.get('href')
details_html = urlopen(href).read().decode('utf-8') #открывает сылки
try:
details_doc = fromstring(details_html)
except XMLSyntaxError:
continue
for ter in details_doc.cssselect(DESR_PATH): #парсит загаловок и артикул
b = ter.cssselect('a')[0]
x = b.text
c = ter.cssselect('i')[0]
v = c.text
haret_elems_list = [('zagalovok', x), ('articul', v)]
df = df.append(dict(haret_elems_list), ignore_index=True)
print(haret_elems_list)
writer = ExcelWriter('second.xlsx', engine='xlsxwriter') #сохроняет в excel
df.to_excel(writer, sheet_name='astra', header=True, index=False)
writer.save()
def main():
parse_soud()
if __name__ == '__main__':
main()
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question