mirror of
https://github.com/Orange-OpenSource/hurl.git
synced 2024-11-22 15:42:20 +03:00
Replace lxml.HTMLParser with BeautifulSoup parser.
lxml.HTMLParser is having troubled with emoji content https://github.com/Orange-OpenSource/hurl/issues/959. Besides, BeautifulSoup is already a dependance for our tests suite.
This commit is contained in:
parent
b38d108eb2
commit
1c796d8f0c
@ -2,15 +2,15 @@
|
||||
# Check that issues in CHANGELOG are up-to-to-date
|
||||
set -eu
|
||||
|
||||
#version=$(head -1 <CHANGELOG.md| cut -d" " -f1 | cut -d'[' -f2)
|
||||
#changelog=$(bin/release/changelog_extract.py "$version" <CHANGELOG.md| grep '^ \* ')
|
||||
#issues=$(bin/release/get_release_note.py "$version" 2>/dev/null | grep '^ \* ')
|
||||
#
|
||||
#if [ "$changelog" != "$issues" ]; then
|
||||
# echo "Diff in issues in CHANGELOG"
|
||||
# diff <(echo "$changelog") <(echo "$issues")
|
||||
# exit 1
|
||||
#fi
|
||||
version=$(head -1 <CHANGELOG.md| cut -d" " -f1 | cut -d'[' -f2)
|
||||
changelog=$(bin/release/changelog_extract.py "$version" <CHANGELOG.md| grep '^\* ')
|
||||
issues=$(bin/release/get_release_note.py "$version" 2>/dev/null | grep '^\* ')
|
||||
|
||||
if [ "$changelog" != "$issues" ]; then
|
||||
echo "Diff in issues in CHANGELOG"
|
||||
diff <(echo "$changelog") <(echo "$issues")
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
@ -8,11 +8,10 @@ Example:
|
||||
import datetime
|
||||
import json
|
||||
import sys
|
||||
from io import StringIO
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
hurl_repo_url = "https://github.com/Orange-OpenSource/hurl"
|
||||
|
||||
@ -135,15 +134,14 @@ def get_linked_pulls(issue_number) -> List[Pull]:
|
||||
|
||||
|
||||
def webscrapping_linked_pulls(html) -> List[Pull]:
|
||||
parser = etree.HTMLParser()
|
||||
tree = etree.parse(StringIO(html), parser)
|
||||
links = tree.xpath("//development-menu//a")
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
links = soup.select("development-menu a")
|
||||
pulls = []
|
||||
for link in links:
|
||||
url = link.attrib["href"]
|
||||
url = link["href"]
|
||||
if url == "/Orange-OpenSource/hurl":
|
||||
continue
|
||||
description = "".join(link.itertext()).strip()
|
||||
description = "".join(link.getText()).strip()
|
||||
pull = Pull(url, description)
|
||||
pulls.append(pull)
|
||||
return pulls
|
||||
|
@ -153,6 +153,22 @@ ISSUE_HTML = """<development-menu data-catalyst="">
|
||||
"""
|
||||
|
||||
|
||||
ISSUE_WITH_EMOJI_HTML = """
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="description" content="👋 hello">
|
||||
<title>Issue</title>
|
||||
</head>
|
||||
<body>
|
||||
<development-menu>
|
||||
<a href="/Orange-OpenSource/hurl/pull/958">Issue 958</a>
|
||||
</development-menu>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class GetReleaseNoteTest(unittest.TestCase):
|
||||
def test_authors_from_issues(self):
|
||||
self.assertEqual(["bob", "bill"], authors_from_issues(ISSUES))
|
||||
@ -166,6 +182,12 @@ class GetReleaseNoteTest(unittest.TestCase):
|
||||
webscrapping_linked_pulls(ISSUE_HTML),
|
||||
)
|
||||
|
||||
def test_webscrapping_issue_with_emoji(self):
|
||||
self.assertEqual(
|
||||
[Pull("/Orange-OpenSource/hurl/pull/958", "Issue 958", [], [])],
|
||||
webscrapping_linked_pulls(ISSUE_WITH_EMOJI_HTML),
|
||||
)
|
||||
|
||||
def test_generate_md(self):
|
||||
self.assertEqual(
|
||||
"""[1.0.0 (2022-01-01)](https://github.com/Orange-OpenSource/hurl/blob/master/CHANGELOG.md#1.0.0)
|
||||
@ -178,16 +200,16 @@ Thanks to
|
||||
|
||||
Enhancements:
|
||||
|
||||
* pull1 [#1](https://github.com/Orange-OpenSource/hurl/issues/1)
|
||||
* pull1 [#1](https://github.com/Orange-OpenSource/hurl/issues/1)
|
||||
|
||||
* pull4 [#3](https://github.com/Orange-OpenSource/hurl/issues/3) [#4](https://github.com/Orange-OpenSource/hurl/issues/4)
|
||||
* pull4 [#3](https://github.com/Orange-OpenSource/hurl/issues/3) [#4](https://github.com/Orange-OpenSource/hurl/issues/4)
|
||||
|
||||
|
||||
Bugs Fixed:
|
||||
|
||||
* pull2 [#2](https://github.com/Orange-OpenSource/hurl/issues/2)
|
||||
* pull2 [#2](https://github.com/Orange-OpenSource/hurl/issues/2)
|
||||
|
||||
* pull3 [#2](https://github.com/Orange-OpenSource/hurl/issues/2)
|
||||
* pull3 [#2](https://github.com/Orange-OpenSource/hurl/issues/2)
|
||||
""",
|
||||
generate_md(
|
||||
milestone="1.0.0",
|
||||
|
@ -3,8 +3,7 @@ beautifulsoup4==4.11.1
|
||||
black==22.6.0
|
||||
blinker==1.4
|
||||
Brotli==1.0.9
|
||||
bs4==0.0.1
|
||||
certifi==2022.6.15
|
||||
certifi==2022.9.24
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
click==8.1.3
|
||||
@ -14,7 +13,7 @@ h11==0.13.0
|
||||
h2==4.1.0
|
||||
hpack==4.0.0
|
||||
hyperframe==6.0.1
|
||||
idna==3.3
|
||||
idna==3.4
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
kaitaistruct==0.9
|
||||
@ -26,9 +25,9 @@ msgpack==1.0.4
|
||||
mypy==0.971
|
||||
mypy-extensions==0.4.3
|
||||
passlib==1.7.4
|
||||
pathspec==0.9.0
|
||||
pathspec==0.10.1
|
||||
platformdirs==2.5.2
|
||||
protobuf==3.19.5
|
||||
protobuf==3.19.6
|
||||
publicsuffix2==2.20191221
|
||||
pyasn1==0.4.8
|
||||
pycparser==2.21
|
||||
@ -37,12 +36,12 @@ pyparsing==3.0.9
|
||||
pyperclip==1.8.2
|
||||
requests==2.28.1
|
||||
ruamel.yaml==0.17.21
|
||||
ruamel.yaml.clib==0.2.6
|
||||
ruamel.yaml.clib==0.2.7
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.3.2.post1
|
||||
tomli==2.0.1
|
||||
tornado==6.2
|
||||
typing_extensions==4.3.0
|
||||
typing_extensions==4.4.0
|
||||
urllib3==1.26.12
|
||||
urwid==2.1.2
|
||||
Werkzeug==2.0.3
|
||||
|
@ -1,5 +1,5 @@
|
||||
black==22.6.0
|
||||
bs4==0.0.1
|
||||
beautifulsoup4==4.11.1
|
||||
Flask==2.0.3
|
||||
lxml==4.9.1
|
||||
mitmproxy==8.0.0
|
||||
|
Loading…
Reference in New Issue
Block a user