The parser returns none?

G

Gorin432020-11-05 16:49:31

Python

Gorin43, 2020-11-05 16:49:31

Wrote a simple parser, produces None.
Is it necessary to use selenium?

import requests
from bs4 import BeautifulSoup



URL = 'http://www.zagrya.ru/'
HEADERS = {
    
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9/',
       'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36',

    }

def get_html(url, params = ''):
    r = requests.get(url,headers =HEADERS, params=params)
    return r

def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('span', class_='hor-menu__item has-subm')
    tovari = []

    for item in items:
        tovari.append(
            {
            'karegoria': item.find('a', {'class':'hor-menu__lnk'}).find('span', {'class':'hor-menu__text'}).get_text(),
            


            }

            )
        return tovati

html = get_html(URL)
print(get_content(html.text))

Thanks in advance for your reply :)

Reply

Answer the question

In order to leave comments, you need to log in

2 answer(s)

S

Sand, 2020-11-05
@sand3001

items = soup.find_all('span', class_='hor-menu__item has-subm')

Is this really on the site?

V

Valery Mamontov, 2020-11-06
@vmamontov

Gorin43 , hello!
You have a lot of errors in your code:

import requests
from bs4 import BeautifulSoup


url = 'http://www.zagrya.ru/'
headers = {
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9/',
       'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36',
    }


def get_html(url):
    r = requests.get(url, headers=headers)
    return r


def get_content(html):
    soup = BeautifulSoup(html.content)
    items = soup.find_all('li', {"class": "hor-menu__item has-subm"})
    tovari = []
    category = {}

    for item in items:
        tovari.append(item.find('a', {'class':'hor-menu__lnk'}).find('span', {'class':'hor-menu__text'}).get_text())
    
    category['karegorii'] = tovari
    return category

html = get_html(URL)
print(get_content(html))