43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
import re
|
|
from string import Template
|
|
import urllib
|
|
try:
|
|
from BeautifulSoup import BeautifulSoup
|
|
except ImportError:
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def excludeAllergens(html_soup):
|
|
exclude_allergens = html_soup.find_all("sup")
|
|
for element in exclude_allergens:
|
|
element.extract()
|
|
|
|
|
|
def excludeForm(html_soup):
|
|
excludeForm = html_soup.find('form')
|
|
excludeForm.extract()
|
|
|
|
|
|
def excludeImages(html_soup):
|
|
exclude_img = html_soup.find_all("img")
|
|
for element in exclude_img:
|
|
element.extract()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
url = 'https://www.werkswelt.de/?id=mohm'
|
|
page = urllib.request.urlopen(url).read().decode()
|
|
correctedPage = re.sub('/br', 'br', page)
|
|
html_soup = BeautifulSoup(correctedPage, features='html.parser')
|
|
parsed_html = html_soup.findAll('body')
|
|
#parsed_html = html_soup.body.html.body
|
|
excludeForm(parsed_html[1])
|
|
excludeAllergens(parsed_html[1])
|
|
excludeImages(parsed_html[1])
|
|
template = Template('<html>\n<div style="background:black; color:white">\n$parsed_html\n</div>\n</html>')
|
|
|
|
# write html-file
|
|
with open("speiseplan.html", "w") as file:
|
|
file.write(template.substitute(parsed_html=parsed_html[1].div.decode_contents()))
|