A
A
artds2016-06-12 11:24:17
Python
artds, 2016-06-12 11:24:17

Python only parse last value?

#!/home/artddss/parse/bin/python3.4
from urllib.request import urlopen
from lxml.etree import XMLSyntaxError
from lxml.html import fromstring
from pandas import DataFrame, ExcelWriter
from urllib.parse import urljoin
URL = 'http://test.exete.ru/Aastra.html'
PAG_PATH = 'a'
DESR_PATH = '.prdbrief_name'
ART_PATH = '.prdbrief_name i'
def parse_soud(): 
    f = urlopen(URL)  # Открывает http://test.exete.ru/Aastra.html
    list_html = f.read().decode('utf-8')
    list_doc = fromstring(list_html)

    df = DataFrame(columns=('zagalovok', 'articul'))
    
    for elem in list_doc.cssselect(PAG_PATH): #переходит по сылкам
        a = elem.cssselect('a')[0]
        href = a.get('href')
        
        details_html = urlopen(href).read().decode('utf-8') #открывает сылки

        try:
                details_doc = fromstring(details_html)
        except XMLSyntaxError:
                continue
  
        for ter in details_doc.cssselect(DESR_PATH): #парсит загаловок и артикул
           b = ter.cssselect('a')[0]
           x = b.text
           c = ter.cssselect('i')[0]
           v = c.text
        haret_elems_list = [('zagalovok', x), ('articul', v)]
        df = df.append(dict(haret_elems_list), ignore_index=True)
        print(haret_elems_list)
    writer = ExcelWriter('second.xlsx', engine='xlsxwriter') #сохроняет в excel
    df.to_excel(writer, sheet_name='astra', header=True, index=False)
    writer.save()     


def main():
    parse_soud()

if __name__ == '__main__':
    main()

why is only one value article and title parsed from my list and not the whole list?
the list itself is shop.secondlevel.ru/category/vse-komplektujushie-p...
and only the last one is taken

Answer the question

In order to leave comments, you need to log in

1 answer(s)
G
GavriKos, 2016-06-12
@artds

Well maybe because

haret_elems_list = [('zagalovok', x), ('articul', v)]
df = df.append(dict(haret_elems_list), ignore_index=True)
print(haret_elems_list)

should it also be done inside the cycle that parses the title and article?

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question