43 lines
1.2 KiB
Python

#!/usr/bin/env python3
import re
from string import Template
import urllib
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
def excludeAllergens(html_soup):
exclude_allergens = html_soup.find_all("sup")
for element in exclude_allergens:
element.extract()
def excludeForm(html_soup):
excludeForm = html_soup.find('form')
excludeForm.extract()
def excludeImages(html_soup):
exclude_img = html_soup.find_all("img")
for element in exclude_img:
element.extract()
if __name__ == '__main__':
url = 'https://www.werkswelt.de/?id=mohm'
page = urllib.request.urlopen(url).read().decode()
correctedPage = re.sub('/br', 'br', page)
html_soup = BeautifulSoup(correctedPage, features='html.parser')
parsed_html = html_soup.findAll('body')
#parsed_html = html_soup.body.html.body
excludeForm(parsed_html[1])
excludeAllergens(parsed_html[1])
excludeImages(parsed_html[1])
template = Template('<html>\n<div style="background:black; color:white">\n$parsed_html\n</div>\n</html>')
# write html-file
with open("speiseplan.html", "w") as file:
file.write(template.substitute(parsed_html=parsed_html[1].div.decode_contents()))