hurl/integration/test_html_output.py

38 lines
865 B
Python
Raw Normal View History

2021-11-29 15:58:18 +03:00
#!/usr/bin/env python3
2021-09-23 17:26:04 +03:00
# Extract hurl file from html output
import sys
from bs4 import BeautifulSoup
import os
import codecs
2021-09-23 17:26:04 +03:00
def test(html_file):
print(html_file)
actual = extract_hurl_content(html_file)
hurl_file = os.path.splitext(html_file)[0] + ".hurl"
2021-09-23 17:26:04 +03:00
if not os.path.isfile(hurl_file):
return
expected = codecs.open(
hurl_file, encoding="utf-8-sig"
).read() # Input file can be saved with a BOM
if actual.strip() != expected.strip():
print(">>> error in html file")
print(f"actual: <{actual}>\nexpected: <{expected}>")
2021-09-23 17:26:04 +03:00
sys.exit(1)
def extract_hurl_content(hurl_file):
s = open(hurl_file).read()
return BeautifulSoup(s, "lxml").text
def main():
print("** test html output")
2021-09-23 17:26:04 +03:00
for html_file in sys.argv[1:]:
test(html_file)
if __name__ == "__main__":
main()