diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 209fb67..735c103 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,10 @@ Changelog 0.16.0 (unreleased) ------------------- +- Refine support for GitHub /archive/refs/tags/ URLs in ``url2purl``. + The whole tag is now captured as the version. + This allows to properly reconstruct valid URLs in ``purl2url``. + 0.15.5 (2024-07-24) ------------------- diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index aa3bd93..0d2cfb5 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -502,6 +502,13 @@ def build_github_purl(url): Return a PackageURL object from GitHub `url`. """ + # https://github.com/apache/nifi/archive/refs/tags/rel/nifi-2.0.0-M3.tar.gz + archive_tags_pattern = ( + r"https?://github.com/(?P.+)/(?P.+)" + r"/archive/refs/tags/" + r"(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + ) + # https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip archive_pattern = ( r"https?://github.com/(?P.+)/(?P.+)" @@ -538,6 +545,7 @@ def build_github_purl(url): git_pattern = r"https?://github.com/(?P.+)/(?P.+).(git)" patterns = ( + archive_tags_pattern, archive_pattern, raw_pattern, blob_pattern, diff --git a/tests/contrib/data/url2purl.json b/tests/contrib/data/url2purl.json index 877d0c4..2de615b 100644 --- a/tests/contrib/data/url2purl.json +++ b/tests/contrib/data/url2purl.json @@ -193,6 +193,7 @@ "https://github.com/TG1999/fetchcode/fetchcode/src": "pkg:github/tg1999/fetchcode@fetchcode#src", "https://github.com/NEXB/SCANCODE-TOOLKIT/tree/develop/PLUGINS/scancode-ctags-macosx_10_9_intel": "pkg:github/nexb/scancode-toolkit@develop#PLUGINS/scancode-ctags-macosx_10_9_intel", "https://github.com/NEXB/SCANCODE-TOOLKIT/tree/DEVELOP/PLUGINS/scancode-ctags-macosx_10_9_intel": "pkg:github/nexb/scancode-toolkit@DEVELOP#PLUGINS/scancode-ctags-macosx_10_9_intel", + "https://github.com/apache/nifi/archive/refs/tags/rel/nifi-2.0.0-M3.tar.gz": "pkg:github/apache/nifi@rel/nifi-2.0.0-M3", "https://raw.githubusercontent.com/volatilityfoundation/dwarf2json/master/LICENSE.txt": "pkg:github/volatilityfoundation/dwarf2json@master#LICENSE.txt", "https://raw.githubusercontent.com/LeZuse/flex-sdk/master/frameworks/projects/mx/src/mx/containers/accordionClasses/AccordionHeader.as": "pkg:github/lezuse/flex-sdk@master#frameworks/projects/mx/src/mx/containers/accordionClasses/AccordionHeader.as", "https://raw.githubusercontent.com/NCIP/lexevs/master/lgSharedLibraries/jettison/jettison-1.1.jar": "pkg:github/ncip/lexevs@master#lgSharedLibraries/jettison/jettison-1.1.jar", diff --git a/tests/contrib/test_url2purl.py b/tests/contrib/test_url2purl.py index 626aeea..f1cc7f3 100644 --- a/tests/contrib/test_url2purl.py +++ b/tests/contrib/test_url2purl.py @@ -61,7 +61,7 @@ def python_safe(s): def get_url2purl_test_method(test_url, expected_purl): def test_method(self): - self.assertEqual(expected_purl, get_purl(test_url)) + self.assertEqual(expected_purl, get_purl(test_url), msg=test_url) return test_method