diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d46e46c..b314ab3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,13 +37,13 @@ jobs: - name: install package run: python -m pip install .[dev] - # - name: test - # run: - # python -m unittest discover --verbose --start-directory tests/ + - name: test + run: + python -m unittest discover --verbose --start-directory tests/ - name: lint run: flake8 - name: mypy run: - mypy --strict license_tools/ + mypy --strict license_tools/ tests/ if: ${{ matrix.python != '3.7' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e108a9..90488f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Development version +* Switch to *mypy* strict mode. +* Add unit tests. +* Fix handling of license clues. + # Version 0.3.2 - 2023-08-21 * Fix type hints. diff --git a/license_tools/scancode_tools.py b/license_tools/scancode_tools.py index 38130e5..b2abd9c 100644 --- a/license_tools/scancode_tools.py +++ b/license_tools/scancode_tools.py @@ -14,6 +14,7 @@ import math import shutil import subprocess +import sys import zipfile from collections import defaultdict from dataclasses import dataclass, field as dataclass_field @@ -74,9 +75,9 @@ class Copyrights: authors: list[Author] = dataclass_field(default_factory=list) def __post_init__(self) -> None: - self.copyrights = [Copyright(**x) for x in self.copyrights] # type: ignore[arg-type] - self.holders = [Holder(**x) for x in self.holders] # type: ignore[arg-type] - self.authors = [Author(**x) for x in self.authors] # type: ignore[arg-type] + self.copyrights = [Copyright(**x) if not isinstance(x, Copyright) else x for x in self.copyrights] # type: ignore[arg-type] + self.holders = [Holder(**x) if not isinstance(x, Holder) else x for x in self.holders] # type: ignore[arg-type] + self.authors = [Author(**x) if not isinstance(x, Author) else x for x in self.authors] # type: ignore[arg-type] @dataclass @@ -99,7 +100,7 @@ class Emails: emails: list[Email] = dataclass_field(default_factory=list) def __post_init__(self) -> None: - self.emails = [Email(**x) for x in self.emails] # type: ignore[arg-type] + self.emails = [Email(**x) if not isinstance(x, Email) else x for x in self.emails] # type: ignore[arg-type] @dataclass @@ -122,7 +123,7 @@ class Urls: urls: list[Url] = dataclass_field(default_factory=list) def __post_init__(self) -> None: - self.urls = [Url(**x) for x in self.urls] # type: ignore[arg-type] + self.urls = [Url(**x) if not isinstance(x, Url) else x for x in self.urls] # type: ignore[arg-type] @dataclass @@ -166,13 +167,22 @@ class LicenseMatch: license_expression: str rule_identifier: str rule_relevance: int - rule_url: str + rule_url: str | None + + +@dataclass +class LicenseClue(LicenseMatch): + """ + Enriched matching information about a license. + """ + + pass @dataclass class LicenseDetection: """ - Information an a specific detected license. + Information on a specific detected license. """ license_expression: str @@ -180,7 +190,7 @@ class LicenseDetection: matches: list[LicenseMatch] = dataclass_field(default_factory=list) def __post_init__(self) -> None: - self.matches = [LicenseMatch(**x) for x in self.matches] # type: ignore[arg-type] + self.matches = [LicenseMatch(**x) if not isinstance(x, LicenseMatch) else x for x in self.matches] # type: ignore[arg-type] @dataclass @@ -189,15 +199,18 @@ class Licenses: Information on all detected licenses. """ - detected_license_expression: str - detected_license_expression_spdx: str + detected_license_expression: str | None + detected_license_expression_spdx: str | None percentage_of_license_text: float license_detections: list[LicenseDetection] = dataclass_field(default_factory=list) - license_clues: list[str] = dataclass_field(default_factory=list) + license_clues: list[LicenseClue] = dataclass_field(default_factory=list) def __post_init__(self) -> None: self.license_detections = [ - LicenseDetection(**x) for x in self.license_detections # type: ignore[arg-type] + LicenseDetection(**x) if not isinstance(x, LicenseDetection) else x for x in self.license_detections # type: ignore[arg-type] + ] + self.license_clues = [ + LicenseClue(**x) if not isinstance(x, LicenseClue) else x for x in self.license_clues # type: ignore[arg-type] ] def get_scores_of_detected_license_expression_spdx(self) -> list[float]: @@ -286,6 +299,19 @@ def to_int( cls.LDD_DATA * retrieve_ldd_data ) + @classmethod + def all(cls, as_kwargs: bool = False) -> int | dict[str, bool]: + """ + Utility method to enable all flags. + + :param: If enabled, return kwargs instead of the integer value. + :return: The value for all flags enabled. + """ + value = cls.to_int(True, True, True, True, True) + if as_kwargs: + return cls.to_kwargs(value) + return value + @classmethod def is_set(cls, flags: int, flag: int) -> bool: """ @@ -321,10 +347,11 @@ def check_shared_objects(path: Path) -> str | None: :param path: The file path to analyze. :return: The analysis results if the path points to a shared object, `None` otherwise. """ - if path.suffix != '.so' and not (path.suffixes and path.suffixes[0] == '.so'): + # TODO: Handle binary files here as well (like `/usr/bin/bc`). + if path.suffix != ".so" and not (path.suffixes and path.suffixes[0] == ".so"): return None - output = subprocess.check_output(['ldd', path], stderr=subprocess.PIPE) - return output.decode('UTF-8') + output = subprocess.check_output(["ldd", path], stderr=subprocess.PIPE) + return output.decode("UTF-8") def run_on_file( @@ -343,10 +370,10 @@ def run_on_file( retrieval_kwargs = RetrievalFlags.to_kwargs(flags=retrieval_flags) # This data is not yet part of the dataclasses above, as it is a custom analysis. - if retrieval_kwargs.pop('retrieve_ldd_data'): + if retrieval_kwargs.pop("retrieve_ldd_data"): results = check_shared_objects(path=path) if results: - print(short_path + '\n' + results) + print(short_path + "\n" + results) # Register this here as each parallel process has its own directory. atexit.register(cleanup, scancode_config.scancode_temp_dir) @@ -359,6 +386,24 @@ def run_on_file( ) +def get_files_from_directory(directory: str | Path) -> Generator[tuple[Path, str], None, None]: + """ + Get the files from the given directory, recursively. + + :param directory: The directory to walk through. + :return: For each file, the complete Path object as well as the path string + relative to the given directory. + """ + directory_string = str(directory) + common_prefix_length = len(directory_string) + int(not directory_string.endswith("/")) + + for path in sorted(Path(directory).rglob("*"), key=str): + if path.is_dir(): + continue + distribution_path = str(path)[common_prefix_length:] + yield path, distribution_path + + def run_on_directory( directory: str, job_count: int = 4, @@ -367,26 +412,17 @@ def run_on_directory( """ Run the analysis on the given directory. - :param path: The directory to analyze. + :param directory: The directory to analyze. :param job_count: The number of parallel jobs to use. :param retrieval_flags: Values to retrieve. :return: The requested results per file. """ - common_prefix_length = len(directory) + int(not directory.endswith("/")) - - def get_paths() -> Generator[tuple[Path, str], None, None]: - for path in sorted(Path(directory).rglob("*"), key=str): - if path.is_dir(): - continue - distribution_path = str(path)[common_prefix_length:] - yield path, distribution_path - results = Parallel(n_jobs=job_count)( delayed(run_on_file)( path=path, short_path=short_path, retrieval_flags=retrieval_flags, - ) for path, short_path in get_paths() + ) for path, short_path in get_files_from_directory(directory) ) yield from results @@ -399,7 +435,7 @@ def run_on_package_archive_file( """ Run the analysis on the given package archive file. - :param path: The package archive path to analyze. + :param archive_path: The package archive path to analyze. :param job_count: The number of parallel jobs to use. :param retrieval_flags: Values to retrieve. :return: The requested results. @@ -435,6 +471,8 @@ def run_on_downloaded_package_file( """ with TemporaryDirectory() as download_directory: command = [ + sys.executable, + "-m", "pip", "download", "--no-deps", @@ -444,7 +482,14 @@ def run_on_downloaded_package_file( ] if index_url: command += ["--index-url", index_url] - subprocess.check_output(command) + try: + subprocess.run(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True) + except subprocess.CalledProcessError as exception: + if exception.stdout: + sys.stdout.write(exception.stdout.decode("UTF-8")) + if exception.stderr: + sys.stderr.write(exception.stderr.decode("UTF-8")) + raise name = list(Path(download_directory).glob("*"))[0] yield from run_on_package_archive_file( archive_path=name.resolve(), @@ -505,9 +550,9 @@ def run( assert _check_that_exactly_one_value_is_set( [directory, file_path, archive_path, package_definition] - ), 'Exactly one source is required.' + ), "Exactly one source is required." - license_counts: dict[str, int] = defaultdict(int) + license_counts: dict[str | None, int] = defaultdict(int) retrieval_flags = RetrievalFlags.to_int( retrieve_copyrights=retrieve_copyrights, retrieve_emails=retrieve_emails, @@ -550,6 +595,8 @@ def run( retrieval_flags=retrieval_flags, ) ] + else: + return [] # Display the file-level results. max_path_length = max(len(result.short_path) for result in results) diff --git a/setup.py b/setup.py index 29926df..9852038 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,8 @@ 'flake8-bugbear', 'pep8-naming', 'mypy', + 'requests', + 'types-requests', ] }, classifiers=[ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data.py b/tests/data.py new file mode 100644 index 0000000..2d6b896 --- /dev/null +++ b/tests/data.py @@ -0,0 +1,226 @@ +from pathlib import Path + +from license_tools.scancode_tools import FileResults, LicenseClue, LicenseDetection, LicenseMatch, Licenses, NOT_REQUESTED + +# Generated by `scancode-toolkit==32.0.8`. +SETUP_PY_LICENSES = Licenses( + detected_license_expression='apache-2.0 AND (unknown-license-reference AND apache-2.0)', + detected_license_expression_spdx='Apache-2.0 AND (LicenseRef-scancode-unknown-license-reference AND Apache-2.0)', + percentage_of_license_text=22.44, + license_detections=[ + LicenseDetection( + license_expression='apache-2.0', + identifier='apache_2_0-627405aa-ea35-7b6c-9436-402113a6866a', + matches=[ + LicenseMatch( + score=100.0, start_line=2, end_line=2, matched_length=6, match_coverage=100.0, matcher='1-spdx-id', + license_expression='apache-2.0', rule_identifier='spdx-license-identifier-apache-2.0-8b7d7ba520c6ab392deaea36b8b1f018b637027e', + rule_relevance=100, rule_url=None + ), + LicenseMatch( + score=100.0, start_line=3, end_line=3, matched_length=13, match_coverage=100.0, matcher='2-aho', + license_expression='apache-2.0', rule_identifier='apache-2.0_1251.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_1251.RULE' + ) + ] + ), + LicenseDetection( + license_expression='unknown-license-reference AND apache-2.0', + identifier='unknown_license_reference_and_apache_2_0-1a5d5a31-4478-b9fc-43db-706bd5353d3d', + matches=[ + LicenseMatch( + score=80.0, start_line=15, end_line=15, matched_length=3, match_coverage=100.0, matcher='2-aho', + license_expression='unknown-license-reference', rule_identifier='unknown-license-reference_331.RULE', rule_relevance=80, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown-license-reference_331.RULE' + ), + LicenseMatch( + score=100.0, start_line=17, end_line=17, matched_length=4, match_coverage=100.0, matcher='2-aho', + license_expression='apache-2.0', rule_identifier='apache-2.0_65.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_65.RULE' + ) + ] + ), + LicenseDetection( + license_expression='apache-2.0', identifier='apache_2_0-e267f9d9-ae62-e9c9-9cc2-8cd0a1e4928f', + matches=[ + LicenseMatch( + score=95.0, start_line=44, end_line=44, matched_length=6, match_coverage=100.0, matcher='2-aho', + license_expression='apache-2.0', rule_identifier='pypi_apache_no-version.RULE', rule_relevance=95, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pypi_apache_no-version.RULE' + ) + ] + ) + ], + license_clues=[ + LicenseClue( + score=50.0, start_line=50, end_line=50, matched_length=3, match_coverage=100.0, matcher='2-aho', + license_expression='free-unknown', rule_identifier='free-unknown_88.RULE', rule_relevance=50, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/free-unknown_88.RULE' + ) + ] +) + +TYPING_EXTENSION_4_8_0__WHEEL_FILES = [ + "typing_extensions-4.8.0.dist-info/LICENSE", + "typing_extensions-4.8.0.dist-info/METADATA", + "typing_extensions-4.8.0.dist-info/RECORD", + "typing_extensions-4.8.0.dist-info/WHEEL", + "typing_extensions.py" +] + +TYPING_EXTENSION_4_8_0__SOURCE_FILES = [ + "typing_extensions-4.8.0/CHANGELOG.md", + "typing_extensions-4.8.0/LICENSE", + "typing_extensions-4.8.0/PKG-INFO", + "typing_extensions-4.8.0/README.md", + "typing_extensions-4.8.0/pyproject.toml", + "typing_extensions-4.8.0/src/_typed_dict_test_helper.py", + "typing_extensions-4.8.0/src/test_typing_extensions.py", + "typing_extensions-4.8.0/src/typing_extensions.py", + "typing_extensions-4.8.0/tox.ini" +] + +# Generated by `scancode-toolkit==32.0.8`. +# `retrieve_licenses` has been set to `False` to avoid actually retrieving them. +TYPING_EXTENSION_4_8_0__LICENSES = [ + FileResults( + path=Path('/tmp/tmpr6n2cx2i/typing_extensions-4.8.0.dist-info/LICENSE'), + short_path='typing_extensions-4.8.0.dist-info/LICENSE', + retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, retrieve_licenses=False, retrieve_file_info=False, + copyrights=NOT_REQUESTED, emails=NOT_REQUESTED, urls=NOT_REQUESTED, + licenses=Licenses( + detected_license_expression='python AND (python AND bsd-new) AND (python AND bsd-new AND bsd-zero)', + detected_license_expression_spdx='Python-2.0 AND (Python-2.0 AND BSD-3-Clause) AND (Python-2.0 AND BSD-3-Clause AND 0BSD)', + percentage_of_license_text=96.26, + license_detections=[ + LicenseDetection( + license_expression='python', identifier='python-0a1026f6-4441-3a49-a425-36ae51b9b171', + matches=[ + LicenseMatch( + score=20.38, start_line=5, end_line=59, matched_length=400, match_coverage=20.38, matcher='3-seq', + license_expression='python', rule_identifier='python_43.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/python_43.RULE' + ), + LicenseMatch( + score=100.0, start_line=62, end_line=63, matched_length=10, match_coverage=100.0, matcher='2-aho', + license_expression='python', rule_identifier='python_16.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/python_16.RULE' + ) + ] + ), + LicenseDetection( + license_expression='python AND bsd-new', identifier='python_and_bsd_new-ef6a0b00-3e20-7b5f-60ad-13fd68dfafaa', + matches=[ + LicenseMatch( + score=100.0, start_line=66, end_line=66, matched_length=3, match_coverage=100.0, matcher='2-aho', + license_expression='unknown-license-reference', rule_identifier='lead-in_unknown_30.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/lead-in_unknown_30.RULE' + ), + LicenseMatch( + score=100.0, start_line=66, end_line=66, matched_length=2, match_coverage=100.0, matcher='2-aho', + license_expression='python', rule_identifier='python_34.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/python_34.RULE' + ), + LicenseMatch( + score=99.0, start_line=67, end_line=67, matched_length=2, match_coverage=100.0, matcher='2-aho', + license_expression='bsd-new', rule_identifier='bsd-new_26.RULE', rule_relevance=99, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_26.RULE' + ) + ] + ), + LicenseDetection( + license_expression='python AND bsd-new AND bsd-zero', identifier='python_and_bsd_new_and_bsd_zero-c0222c9a-2a19-8ee7-903b-cebffd111794', + matches=[ + LicenseMatch( + score=78.37, start_line=73, end_line=265, matched_length=1540, match_coverage=78.37, matcher='3-seq', + license_expression='python', rule_identifier='python_70.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/python_70.RULE' + ), + LicenseMatch( + score=99.0, start_line=267, end_line=267, matched_length=2, match_coverage=100.0, matcher='2-aho', + license_expression='bsd-new', rule_identifier='bsd-new_26.RULE', rule_relevance=99, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_26.RULE' + ), + LicenseMatch( + score=100.0, start_line=270, end_line=279, matched_length=98, match_coverage=100.0, matcher='2-aho', + license_expression='bsd-zero', rule_identifier='bsd-zero.LICENSE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/bsd-zero.LICENSE' + ) + ] + ) + ], + license_clues=[] + ), + file_info=NOT_REQUESTED + ), + FileResults( + path=Path('/tmp/tmpr6n2cx2i/typing_extensions-4.8.0.dist-info/METADATA'), + short_path='typing_extensions-4.8.0.dist-info/METADATA', + retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, retrieve_licenses=False, retrieve_file_info=False, + copyrights=NOT_REQUESTED, emails=NOT_REQUESTED, urls=NOT_REQUESTED, + licenses=Licenses( + detected_license_expression='python', + detected_license_expression_spdx='Python-2.0', + percentage_of_license_text=2.01, + license_detections=[ + LicenseDetection( + license_expression='python', identifier='python-03cf89ce-88f1-7600-71f7-302015c97123', + matches=[ + LicenseMatch( + score=99.0, start_line=12, end_line=12, matched_length=8, match_coverage=100.0, matcher='2-aho', + license_expression='python', rule_identifier='pypi_python_software_foundation_license2.RULE', rule_relevance=99, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pypi_python_software_foundation_license2.RULE' # noqa: E501 + ) + ] + ) + ], + license_clues=[] + ), + file_info=NOT_REQUESTED + ), + FileResults( + path=Path('/tmp/tmpr6n2cx2i/typing_extensions-4.8.0.dist-info/RECORD'), + short_path='typing_extensions-4.8.0.dist-info/RECORD', + retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, retrieve_licenses=False, retrieve_file_info=False, + copyrights=NOT_REQUESTED, emails=NOT_REQUESTED, urls=NOT_REQUESTED, + licenses=Licenses( + detected_license_expression=None, detected_license_expression_spdx=None, percentage_of_license_text=0, license_detections=[], license_clues=[] + ), + file_info=NOT_REQUESTED + ), + FileResults( + path=Path('/tmp/tmpr6n2cx2i/typing_extensions-4.8.0.dist-info/WHEEL'), + short_path='typing_extensions-4.8.0.dist-info/WHEEL', + retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, retrieve_licenses=False, retrieve_file_info=False, + copyrights=NOT_REQUESTED, emails=NOT_REQUESTED, urls=NOT_REQUESTED, + licenses=Licenses( + detected_license_expression=None, detected_license_expression_spdx=None, percentage_of_license_text=0, license_detections=[], license_clues=[] + ), + file_info=NOT_REQUESTED + ), + FileResults( + path=Path('/tmp/tmpr6n2cx2i/typing_extensions.py'), + short_path='typing_extensions.py', + retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, retrieve_licenses=False, retrieve_file_info=False, + copyrights=NOT_REQUESTED, emails=NOT_REQUESTED, urls=NOT_REQUESTED, + licenses=Licenses( + detected_license_expression=None, detected_license_expression_spdx=None, percentage_of_license_text=0, license_detections=[], license_clues=[] + ), + file_info=NOT_REQUESTED + ) +] + +# Remove the leading linebreak which is just used to improve display here. +TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT = """ + typing_extensions-4.8.0.dist-info/LICENSE Python-2.0 AND (Python-2.0 AND BSD-3-Clause) AND (Python-2.0 AND BSD-3-Clause AND 0BSD) +typing_extensions-4.8.0.dist-info/METADATA Python-2.0 [99.0] + typing_extensions-4.8.0.dist-info/RECORD + typing_extensions-4.8.0.dist-info/WHEEL + typing_extensions.py + +==================================================================================================== + + None 3 + Python-2.0 1 +Python-2.0 AND (Python-2.0 AND BSD-3-Clause) AND (Python-2.0 AND BSD-3-Clause AND 0BSD) 1 +"""[1:] # noqa: W291 diff --git a/tests/test_scancode_tools.py b/tests/test_scancode_tools.py new file mode 100644 index 0000000..e9a69a2 --- /dev/null +++ b/tests/test_scancode_tools.py @@ -0,0 +1,440 @@ +from __future__ import annotations + +import datetime +import os +import subprocess +import sys +from contextlib import contextmanager, redirect_stderr, redirect_stdout +from io import StringIO +from pathlib import Path +from tempfile import mkdtemp, NamedTemporaryFile, TemporaryDirectory +from typing import Any, cast, Generator, List +from unittest import mock, TestCase + +import requests + +from license_tools import scancode_tools +from license_tools.scancode_tools import Copyright, Copyrights, Emails, FileInfo, FileResults, Holder, Licenses, RetrievalFlags, Url, Urls +from tests.data import SETUP_PY_LICENSES, TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, TYPING_EXTENSION_4_8_0__LICENSES, TYPING_EXTENSION_4_8_0__SOURCE_FILES, \ + TYPING_EXTENSION_4_8_0__WHEEL_FILES + +SETUP_PATH = Path(__file__).parent.parent / "setup.py" +LICENSE_PATH = SETUP_PATH.parent / "LICENSE.txt" + + +class LicensesTestCase(TestCase): + def test_get_scores_of_detected_license_expression_spdx(self) -> None: + result = SETUP_PY_LICENSES.get_scores_of_detected_license_expression_spdx() + self.assertEqual([], result) + + file_results = FileResults(path=LICENSE_PATH, short_path="LICENSE.txt", retrieve_licenses=True) + licenses = cast(Licenses, file_results.licenses) + result = licenses.get_scores_of_detected_license_expression_spdx() + self.assertEqual([100.0], result) + + +class FileResultsTestCase(TestCase): + def assert_not_requested(self, result: FileResults, fields: list[str], invert: bool = False) -> None: + method = self.assertNotEqual if invert else self.assertEqual + for field in fields: + with self.subTest(field=field): + method(scancode_tools.NOT_REQUESTED, getattr(result, field)) + + def test_full(self) -> None: + flags = cast(dict[str, bool], RetrievalFlags.all(as_kwargs=True)) + del flags["retrieve_ldd_data"] + result = FileResults(path=SETUP_PATH, short_path="setup.py", retrieve_licenses=True, **flags) + self.assertEqual(SETUP_PATH, result.path) + self.assertEqual("setup.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "emails", "urls", "licenses", "file_info"], invert=True) + + def test_none(self) -> None: + result = FileResults(path=Path("/tmp/dummy.py"), short_path="dummy.py") + self.assertEqual(Path("/tmp/dummy.py"), result.path) + self.assertEqual("dummy.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "emails", "urls", "licenses", "file_info"]) + + def test_retrieve_copyrights(self) -> None: + result = scancode_tools.FileResults(path=SETUP_PATH, short_path="setup.py", retrieve_copyrights=True) + self.assertEqual(SETUP_PATH, result.path) + self.assertEqual("setup.py", result.short_path) + self.assert_not_requested(result=result, fields=["emails", "urls", "licenses", "file_info"]) + expected = Copyrights( + copyrights=[Copyright(copyright="Copyright (c) stefan6419846", start_line=1, end_line=1)], + holders=[Holder(holder="stefan6419846", start_line=1, end_line=1)], + authors=[] + ) + self.assertEqual(expected, result.copyrights) + + def test_retrieve_emails(self) -> None: + result = scancode_tools.FileResults(path=SETUP_PATH, short_path="setup.py", retrieve_emails=True) + self.assertEqual(SETUP_PATH, result.path) + self.assertEqual("setup.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "urls", "licenses", "file_info"]) + expected = Emails(emails=[]) + self.assertEqual(expected, result.emails) + + def test_retrieve_urls(self) -> None: + result = scancode_tools.FileResults(path=SETUP_PATH, short_path="setup.py", retrieve_urls=True) + self.assertEqual(SETUP_PATH, result.path) + self.assertEqual("setup.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "emails", "licenses", "file_info"]) + expected = Urls(urls=[ + Url(url="http://www.apache.org/licenses/LICENSE-2.0", start_line=3, end_line=3), + Url(url="https://github.com/stefan6419846/license_tools", start_line=21, end_line=21) + ]) + self.assertEqual(expected, result.urls) + + def test_retrieve_licenses(self) -> None: + self.maxDiff = None + result = scancode_tools.FileResults(path=SETUP_PATH, short_path="setup.py", retrieve_licenses=True) + self.assertEqual(SETUP_PATH, result.path) + self.assertEqual("setup.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "emails", "urls", "file_info"]) + self.assertEqual(SETUP_PY_LICENSES, result.licenses) + + def test_retrieve_file_info(self) -> None: + with NamedTemporaryFile(suffix=".py") as file_object: + path = Path(file_object.name) + path.write_text('print("Hello World!")\n') + result = scancode_tools.FileResults(path=path, short_path="test.py", retrieve_file_info=True) + + self.assertEqual(path, result.path) + self.assertEqual("test.py", result.short_path) + self.assert_not_requested(result=result, fields=["copyrights", "emails", "urls", "licenses"]) + expected = FileInfo( + date=datetime.date.today(), size=22, + sha1="e343a35cf2fa04782749dab102d45129cdb0b644", md5="a97c0affb458a65d8682bf0a48f36e63", + sha256="c63bf759e5502fc9f4ad863b883423a2d75992aeaebee6a713eb81fe3f714a4b", + mime_type="text/plain", file_type="ASCII text", programming_language="Python", + is_binary=False, is_text=True, is_archive=False, is_media=False, is_source=True, is_script=False + ) + self.assertEqual(expected, result.file_info) + + +class RetrievalFlagsTestCase(TestCase): + def test_to_int(self) -> None: + self.assertEqual(0, RetrievalFlags.to_int()) + self.assertEqual(21, RetrievalFlags.to_int(True, False, True, False, True)) + + def test_all(self) -> None: + self.assertEqual(31, RetrievalFlags.all()) + self.assertDictEqual( + dict(retrieve_copyrights=True, retrieve_emails=True, retrieve_file_info=True, retrieve_urls=True, retrieve_ldd_data=True), + cast(dict[str, bool], RetrievalFlags.all(as_kwargs=True)) + ) + + def test_is_set(self) -> None: + self.assertFalse(RetrievalFlags.is_set(flags=0, flag=RetrievalFlags.EMAILS)) + self.assertTrue(RetrievalFlags.is_set(flags=2, flag=RetrievalFlags.EMAILS)) + self.assertTrue(RetrievalFlags.is_set(flags=31, flag=RetrievalFlags.EMAILS)) + self.assertFalse(RetrievalFlags.is_set(flags=9, flag=RetrievalFlags.EMAILS)) + + def test_to_kwargs(self) -> None: + self.assertDictEqual( + dict(retrieve_copyrights=False, retrieve_emails=False, retrieve_file_info=False, retrieve_urls=False, retrieve_ldd_data=False), + RetrievalFlags.to_kwargs(0) + ) + self.assertDictEqual( + dict(retrieve_copyrights=True, retrieve_emails=False, retrieve_file_info=True, retrieve_urls=False, retrieve_ldd_data=True), + RetrievalFlags.to_kwargs(21) + ) + + +class CheckSharedObjectsTestCase(TestCase): + def test_so_suffix(self) -> None: + path = Path("/tmp/libdummy.so") + with mock.patch("subprocess.check_output", return_value=b"Test output\nAnother line\n") as subprocess_mock: + result = scancode_tools.check_shared_objects(path) + self.assertEqual("Test output\nAnother line\n", result) + subprocess_mock.assert_called_once_with(["ldd", path], stderr=subprocess.PIPE) + + def test_so_suffix_with_multiple_suffixes(self) -> None: + path = Path("/tmp/libdummy.so.42") + with mock.patch("subprocess.check_output", return_value=b"Test output\nAnother line\n") as subprocess_mock: + result = scancode_tools.check_shared_objects(path) + self.assertEqual("Test output\nAnother line\n", result) + subprocess_mock.assert_called_once_with(["ldd", path], stderr=subprocess.PIPE) + + def test_no_so(self) -> None: + path = Path("/tmp/libdummy.py") + with mock.patch("subprocess.check_output", return_value=b"Test output\nAnother line\n") as subprocess_mock: + result = scancode_tools.check_shared_objects(path) + self.assertIsNone(result) + subprocess_mock.assert_not_called() + + +class RunOnFileTestCase(TestCase): + def _run_mocked(self, flags: int, return_value: str | None = "") -> tuple[mock.Mock, mock.Mock, str]: + stdout = StringIO() + file_result = object() + with mock.patch.object(scancode_tools, "FileResults", return_value=file_result) as results_mock, \ + redirect_stdout(stdout), \ + mock.patch.object(scancode_tools, "check_shared_objects", return_value=return_value) as check_mock: + result = scancode_tools.run_on_file(path=SETUP_PATH, short_path="setup.py", retrieval_flags=flags) + self.assertEqual(file_result, result) + return results_mock, check_mock, stdout.getvalue() + + def test_run_on_file(self) -> None: + # 1) LDD handling is inactive. + results_mock, check_mock, stdout = self._run_mocked(flags=15) + check_mock.assert_not_called() + results_mock.assert_called_once_with( + path=SETUP_PATH, short_path="setup.py", retrieve_licenses=True, retrieve_copyrights=True, retrieve_emails=True, + retrieve_file_info=True, retrieve_urls=True + ) + self.assertEqual("", stdout) + + # 2) LDD handling is active, but has no results. + for result in ["", None]: + with self.subTest(result=result): + results_mock, check_mock, stdout = self._run_mocked(flags=31, return_value=result) + check_mock.assert_called_once_with(path=SETUP_PATH) + results_mock.assert_called_once_with( + path=SETUP_PATH, short_path="setup.py", retrieve_licenses=True, retrieve_copyrights=True, retrieve_emails=True, + retrieve_file_info=True, retrieve_urls=True + ) + self.assertEqual("", stdout) + + # 3) LDD handling is active and has results. + ldd_usr_bin_bc = """ linux-vdso.so.1 (0x00007fff30abf000) + libreadline.so.7 => /lib64/libreadline.so.7 (0x00007fbe48c00000) + libc.so.6 => /lib64/libc.so.6 (0x00007fbe48a09000) + libtinfo.so.6 => /lib64/libtinfo.so.6 (0x00007fbe48600000) + /lib64/ld-linux-x86-64.so.2 (0x00007fbe492b8000) +""" + results_mock, check_mock, stdout = self._run_mocked(flags=31, return_value=ldd_usr_bin_bc) + check_mock.assert_called_once_with(path=SETUP_PATH) + results_mock.assert_called_once_with( + path=SETUP_PATH, short_path="setup.py", retrieve_licenses=True, retrieve_copyrights=True, retrieve_emails=True, + retrieve_file_info=True, retrieve_urls=True + ) + self.assertEqual("setup.py\n" + ldd_usr_bin_bc + "\n", stdout) + + +class GetFilesFromDirectoryTestCase(TestCase): + def test_get_files_from_directory(self) -> None: + with TemporaryDirectory() as temporary_directory: + directory = Path(temporary_directory) + + directory.joinpath("module1.py").touch() + directory.joinpath("module2.py").touch() + directory.joinpath("submodule").mkdir(parents=True) + directory.joinpath("submodule").joinpath("nested.py").touch() + directory.joinpath("empty").joinpath("sub").mkdir(parents=True) + directory.joinpath("empty").joinpath("sub").joinpath("hello.py").touch() + + result = list(scancode_tools.get_files_from_directory(temporary_directory)) + self.assertListEqual( + [ + (directory / "empty" / "sub" / "hello.py", "empty/sub/hello.py"), + (directory / "module1.py", "module1.py"), + (directory / "module2.py", "module2.py"), + (directory / "submodule" / "nested.py", "submodule/nested.py"), + ], + result + ) + + +class RunOnDirectoryTestCase(TestCase): + def test_run_on_directory(self) -> None: + file_results = [object()] * 5 + file_results_iterable = iter(file_results) + paths = [(Path(f"/tmp/file{i}.py"), f"file{i}.py") for i in range(1, 6)] + + def run_on_file(path: Path, short_path: str, retrieval_flags: int = 0) -> Any: + return next(file_results_iterable) + + with mock.patch.object(scancode_tools, "run_on_file", side_effect=run_on_file) as run_mock, \ + mock.patch.object(scancode_tools, "get_files_from_directory", return_value=paths) as get_mock: + results = list(scancode_tools.run_on_directory("/tmp/dummy/directory", job_count=1, retrieval_flags=42)) + self.assertListEqual(file_results, results) + run_mock.assert_has_calls( + [mock.call(path=current_path, short_path=current_short_path, retrieval_flags=42) for current_path, current_short_path in paths], + any_order=False + ) + self.assertEqual(len(paths), run_mock.call_count, run_mock.call_args_list) + get_mock.assert_called_once_with("/tmp/dummy/directory") + + +class RunOnPackageArchiveFileTestCase(TestCase): + def _check_call(self, suffix: str, url: str, expected_files: List[str]) -> None: + with NamedTemporaryFile(suffix=suffix) as archive_file: + archive_path = Path(archive_file.name) + archive_path.write_bytes(requests.get(url).content) + + directory_result = [object(), object(), object()] + + def run_on_directory(directory: Path, job_count: int, retrieval_flags: int) -> Generator[Any, None, None]: + self.assertEqual(2, job_count) + self.assertEqual(42, retrieval_flags) + actual = [x[1] for x in scancode_tools.get_files_from_directory(directory)] + self.assertListEqual(expected_files, actual) + yield from directory_result + + with mock.patch.object(scancode_tools, "run_on_directory", side_effect=run_on_directory): + result = list(scancode_tools.run_on_package_archive_file(archive_path=archive_path, job_count=2, retrieval_flags=42)) + self.assertEqual(directory_result, result) + + def test_wheel_file(self) -> None: + url = "https://files.pythonhosted.org/packages/24/21/7d397a4b7934ff4028987914ac1044d3b7d52712f30e2ac7a2ae5bc86dd0/typing_extensions-4.8.0-py3-none-any.whl" # noqa: E501 + self._check_call(suffix=".whl", url=url, expected_files=TYPING_EXTENSION_4_8_0__WHEEL_FILES) + + def test_non_wheel_file(self) -> None: + url = "https://files.pythonhosted.org/packages/1f/7a/8b94bb016069caa12fc9f587b28080ac33b4fbb8ca369b98bc0a4828543e/typing_extensions-4.8.0.tar.gz" + self._check_call(suffix=".tar.gz", url=url, expected_files=TYPING_EXTENSION_4_8_0__SOURCE_FILES) + + +class RunOnDownloadedPackageFileTestCase(TestCase): + def test_valid_package_name(self) -> None: + stderr = StringIO() + + archive_result = [object(), object(), object()] + + def run_on_package_archive_file(archive_path: Path, job_count: int, retrieval_flags: int) -> Generator[Any, None, None]: + self.assertEqual(3, job_count) + self.assertEqual(42, retrieval_flags) + self.assertEqual(31584, len(archive_path.read_bytes())) + yield from archive_result + + with redirect_stderr(stderr), \ + mock.patch.object(scancode_tools, "run_on_package_archive_file", side_effect=run_on_package_archive_file): + result = list(scancode_tools.run_on_downloaded_package_file( + package_definition="typing_extensions==4.8.0", index_url="https://pypi.org/simple", job_count=3, retrieval_flags=42 + )) + self.assertEqual(archive_result, result) + self.assertEqual("", stderr.getvalue()) + + def test_invalid_package_name(self) -> None: + stdout, stderr = StringIO(), StringIO() + + archive_result = [object(), object(), object()] + with redirect_stdout(stdout), redirect_stderr(stderr), \ + mock.patch.object(scancode_tools, "run_on_package_archive_file", return_value=iter(archive_result)): + with self.assertRaises(subprocess.CalledProcessError): + list(scancode_tools.run_on_downloaded_package_file( + package_definition="typing_extensions==1234567890", index_url="https://pypi.org/simple", job_count=2, retrieval_flags=13 + )) + + stderr_string = stderr.getvalue() + self.assertEqual("", stdout.getvalue()) + self.assertIn("ERROR: Could not find a version that satisfies the requirement typing_extensions==1234567890 (from versions: ", stderr_string) + self.assertIn("\nERROR: No matching distribution found for typing_extensions==1234567890\n", stderr_string) + + def test_index_url_handling(self) -> None: + directories = [] + + def check_output(command: list[str | Path], *args: Any, **kwargs: Any) -> 'subprocess.CompletedProcess[bytes]': + directory = command[command.index("--dest") + 1] + directories.append(directory) + Path(directory).joinpath("dummy.py").touch() + return subprocess.CompletedProcess(args=args, returncode=0, stdout=b"") + + with mock.patch.object(scancode_tools, "run_on_package_archive_file", return_value=[]), \ + mock.patch("subprocess.run", side_effect=check_output) as subprocess_mock: + list(scancode_tools.run_on_downloaded_package_file(package_definition="testing", job_count=1, retrieval_flags=13)) + list(scancode_tools.run_on_downloaded_package_file(package_definition="testing", job_count=2, retrieval_flags=37, index_url="DUMMY")) + + subprocess_mock.assert_has_calls( + [ + mock.call( + [sys.executable, "-m", "pip", "download", "--no-deps", "testing", "--dest", directories[0]], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True + ), + mock.call( + [sys.executable, "-m", "pip", "download", "--no-deps", "testing", "--dest", directories[1], "--index-url", "DUMMY"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True + ), + ], + any_order=False + ) + self.assertEqual(2, subprocess_mock.call_count, subprocess_mock.call_args_list) + + +class CheckThatExactlyOneValueIsSetTestCase(TestCase): + def test_check_that_exactly_one_value_is_set(self) -> None: + self.assertIs( + True, + scancode_tools._check_that_exactly_one_value_is_set([None, None, "test", None]) + ) + self.assertIs( + False, + scancode_tools._check_that_exactly_one_value_is_set([]) + ) + self.assertIs( + False, + scancode_tools._check_that_exactly_one_value_is_set([None, None]) + ) + + +class CleanupTestCase(TestCase): + def test_cleanup(self) -> None: + directory_string = mkdtemp() + directory = Path(directory_string) + self.assertTrue(directory.is_dir()) + + # 1) Existing directory. + scancode_tools.cleanup(directory) + self.assertFalse(directory.is_dir()) + + # 2) Missing directory. + scancode_tools.cleanup(directory) + self.assertFalse(directory.is_dir()) + + +class Stdout: + def __init__(self) -> None: + self.stdout = StringIO() + + def __str__(self) -> str: + return self.stdout.getvalue() + + +class RunTestCase(TestCase): + @contextmanager + def record_stdout(self) -> Generator[Stdout, None, None]: + result = Stdout() + with mock.patch("shutil.get_terminal_size", return_value=os.terminal_size((100, 20))), \ + redirect_stdout(result.stdout): + yield result + + def test_package_definition(self) -> None: + with self.record_stdout() as stdout: + with mock.patch.object(scancode_tools, "run_on_downloaded_package_file", return_value=iter(TYPING_EXTENSION_4_8_0__LICENSES)) as run_mock: + result = scancode_tools.run(package_definition="typing_extensions==4.8.0", index_url="https://example.org/simple") + run_mock.assert_called_once_with(package_definition="typing_extensions==4.8.0", index_url="https://example.org/simple", retrieval_flags=0, job_count=4) + self.assertEqual(TYPING_EXTENSION_4_8_0__LICENSES, result) + self.assertEqual(TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, str(stdout)) + + def test_directory(self) -> None: + with TemporaryDirectory() as directory: + path = Path(directory) + with self.record_stdout() as stdout: + with mock.patch.object(scancode_tools, "run_on_directory", return_value=iter(TYPING_EXTENSION_4_8_0__LICENSES)) as run_mock: + result = scancode_tools.run(directory=path, retrieve_ldd_data=True) + run_mock.assert_called_once_with(directory=directory, retrieval_flags=16, job_count=4) + self.assertEqual(TYPING_EXTENSION_4_8_0__LICENSES, result) + self.assertEqual(TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, str(stdout)) + + def test_archive_path(self) -> None: + with self.record_stdout() as stdout: + with mock.patch.object(scancode_tools, "run_on_package_archive_file", return_value=iter(TYPING_EXTENSION_4_8_0__LICENSES)) as run_mock: + result = scancode_tools.run(archive_path=Path("/tmp/dummy/typing_extensions-4.8.0.tar.gz"), retrieve_copyrights=True, job_count=1) + run_mock.assert_called_once_with(archive_path=Path("/tmp/dummy/typing_extensions-4.8.0.tar.gz"), retrieval_flags=1, job_count=1) + self.assertEqual(TYPING_EXTENSION_4_8_0__LICENSES, result) + self.assertEqual(TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, str(stdout)) + + def test_file_path(self) -> None: + with self.record_stdout() as stdout: + result = scancode_tools.run(file_path=SETUP_PATH, job_count=1) + self.assertIsInstance(result, list) + self.assertEqual(1, len(result), result) + first_result = result[0] + self.assertIsInstance(first_result, FileResults) + self.assertEqual(SETUP_PY_LICENSES, first_result.licenses) + self.assertEqual(f"""{SETUP_PATH} Apache-2.0 AND (LicenseRef-scancode-unknown-license-reference AND Apache-2.0) + +==================================================================================================== + +Apache-2.0 AND (LicenseRef-scancode-unknown-license-reference AND Apache-2.0) 1 +""", str(stdout)) # noqa: W291