Answer the question
In order to leave comments, you need to log in
How to parse the entire table?
I need to parse a table with the class schedule of my university . Here is a link from which I need to parse. I tried the classic method using beautifulsoup and output through a loop, but it turns out some kind of mess, maybe someone knows how to completely parse this table without a loop?
Answer the question
In order to leave comments, you need to log in
from bs4 import BeautifulSoup
from urllib.request import urlopen
from json import dumps
url = "https://rozklad.ztu.edu.ua/schedule/group/%D0%86%D0%9F%D0%97-21-3?new"
bs=BeautifulSoup(urlopen(url).read(), features="lxml")
schedule = []
schedule_table = bs.find("table", attrs={"class":"schedule"})
for tr in schedule_table.findAll("tr"):
if str(tr.th.text).strip() == "":
continue
tds = tr.findAll("td")
for td in tds:
newitem = {
"day": td.get("day")[0:-2],
"hour": td.get("hour")
}
if len(td.get("class")) > 0:
newitem.update({
"content": {
"variative": {
"subject": td.find("div", attrs={"class":"subject"}).text.strip(),
"room": td.find("span", attrs={"class":"room"}).parent.text.strip(),
"room_span": td.find("span", attrs={"class":"room"}).text.strip(),
"room_changed": "changed" in td.find("span", attrs={"class":"room"}).get("class"),
"teacher": td.find("div", attrs={"class":"teacher"}).text.strip(),
}
}
})
schedule.append(newitem)
print(dumps(schedule, indent = 4, ensure_ascii = False))
# так можно фильтровать
wednesdays = [d for d in schedule if d["day"] == "Середа"]
print(dumps(wednesdays, indent = 4, ensure_ascii = False))
# и так по любому из параметров
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question