partoschph68073
/
SmartMirror-Projektarbeit


			
							123456789101112131415161718192021222324252627282930313233343536373839404142
							#!/usr/bin/env python3
import re
from string import Template
import urllib
try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup


def excludeAllergens(html_soup):
    exclude_allergens = html_soup.find_all("sup")
    for element in exclude_allergens:
        element.extract()


def excludeForm(html_soup):
    excludeForm = html_soup.find('form')
    excludeForm.extract()


def excludeImages(html_soup):
    exclude_img = html_soup.find_all("img")
    for element in exclude_img:
        element.extract()


if __name__ == '__main__':
    url = 'https://www.werkswelt.de/?id=mohm'
    page = urllib.request.urlopen(url).read().decode()
    correctedPage = re.sub('/br', 'br', page)
    html_soup = BeautifulSoup(correctedPage, features='html.parser')
    parsed_html = html_soup.findAll('body')
    #parsed_html = html_soup.body.html.body
    excludeForm(parsed_html[1])
    excludeAllergens(parsed_html[1])
    excludeImages(parsed_html[1])
    template = Template('<html>\n<div style="background:black; color:white">\n$parsed_html\n</div>\n</html>')

    # write html-file
    with open("speiseplan.html", "w") as file:
        file.write(template.substitute(parsed_html=parsed_html[1].div.decode_contents()))