From 965cfa8e38e7596df1800c3a7624252b11de54a2 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Wed, 18 Sep 2024 11:56:23 -0400 Subject: [PATCH 01/10] wip --- .../exchanges/clients/ncmec/hash_api.py | 41 +++- .../exchanges/clients/ncmec/tests/data.py | 202 ++++++++++++++++++ .../clients/ncmec/tests/test_hash_api.py | 189 +++------------- 3 files changed, 271 insertions(+), 161 deletions(-) create mode 100644 python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 316b45861..3ffe5cd04 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -19,7 +19,7 @@ import urllib.parse import requests -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter @@ -27,6 +27,7 @@ _DEFAULT_ELE = ET.Element("") T = t.TypeVar("T") +FEEDBACK = t.List[t.Dict[str, str]] def nullthrows(v: t.Optional[T]) -> T: @@ -131,6 +132,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] + feedback: t.Optional[FEEDBACK] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -148,6 +150,18 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": fingerprints={ x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text }, + feedback=[ + { + "type": x.tag, # "affirmativeFeedback" or "negativeFeedback" + "members": x.maybe( + "members" # "timestamp", "member.id", "member.name" + ), + "reasons": x.maybe( + "reasons" + ), # "reason" with "guid", "name", "type" | "members" + } + for x in xml.maybe("feedback") + ], ) @@ -215,6 +229,23 @@ def estimated_entries_in_range(self) -> int: ) +# TODO: check http code until we have update response shape, we also might not care about it +@dataclass +class UpdateEntryResponse: + updates: t.List[NCMECEntryUpdate] + + @classmethod + def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesResponse": + updates: t.List[NCMECEntryUpdate] = [] + + for content_xml in (xml.maybe("images"), xml.maybe("videos")): + if not content_xml or not len(content_xml): + continue + updates.extend(NCMECEntryUpdate.from_xml(c) for c in content_xml) + + return cls(updates) + + @unique class NCMECEndpoint(Enum): status = "status" @@ -401,6 +432,14 @@ def get_entries_iter( has_more = bool(next_) yield result + # TODO: split into 2, submit upvote and downvote + def submit_feedback(self, entry_id: str, is_good: bool) -> GetEntriesResponse: + # TODO + # 1. Prepare the XML payload + # 2. Send the POST request using _post + # 3. Parse the response using GetEntriesResponse.from_xml + return + def _date_format(timestamp: int) -> str: """ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ""" diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py new file mode 100644 index 000000000..81929b7ba --- /dev/null +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -0,0 +1,202 @@ +STATUS_XML = """ + + + 127.0.0.1 + testington + Sir Testington + +""".strip() + +NEXT_UNESCAPED = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000" +) + +NEXT_UNESCAPED2 = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000" +) +NEXT_UNESCAPED3 = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000" +) + +ENTRIES_XML = """ + + + + + Example Member + 2017-10-24T15:00:00Z + image1 + A1 + + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... + + + + + Example Member + + + + + + + Example Member + + + + + + + Example Member2 + image4 + 2017-10-24T15:10:00Z + + + + + + Example Member + video4 + 2017-10-24T15:20:00Z + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000 + + +""".strip() + + +ENTRIES_XML2 = """ + + + + + Example Member + 2019-10-24T15:00:00Z + image10 + A1 + + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... + + + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000 + + +""".strip() + +# This example isn't in the documentation, but shows how updates work +ENTRIES_XML3 = """ + + + + + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000 + + +""".strip() + +ENTRIES_XML4 = """ + + + + + + TX Example + 2019-11-25T15:10:00Z + willdelete + + + +""".strip() + +ENTRIES_LARGE_FINGERPRINTS = """ + + + + + + +""".strip() + +AFFIRMATIVE_FEEDBACK_XML = """ + + + + + + +""".strip() + +NEGATIVE_FEEDBACK_XML = """ + + + + + 01234567-abcd-0123-4567-012345678900 + + + +""".strip() + +UPDATE_FEEDBACK_RESULT_XML = """ + + + + +""".strip() diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 89968543a..ab776cf3e 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -1,7 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. from unittest.mock import Mock -import urllib.parse import typing as t import pytest import requests @@ -11,166 +10,17 @@ NCMECHashAPI, NCMECEnvironment, ) - -STATUS_XML = """ - - - 127.0.0.1 - testington - Sir Testington - -""".strip() - -NEXT_UNESCAPED = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000" -) - -NEXT_UNESCAPED2 = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000" +from threatexchange.exchanges.clients.ncmec.tests.data import ( + ENTRIES_LARGE_FINGERPRINTS, + ENTRIES_XML, + ENTRIES_XML2, + ENTRIES_XML3, + ENTRIES_XML4, + NEXT_UNESCAPED, + NEXT_UNESCAPED2, + NEXT_UNESCAPED3, + UPDATE_FEEDBACK_RESULT_XML, ) -NEXT_UNESCAPED3 = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000" -) - -ENTRIES_XML = """ - - - - - Example Member - 2017-10-24T15:00:00Z - image1 - A1 - - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... - - - - Example Member2 - image4 - 2017-10-24T15:10:00Z - - - - - - Example Member - video4 - 2017-10-24T15:20:00Z - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000 - - -""".strip() - - -ENTRIES_XML2 = """ - - - - - Example Member - 2019-10-24T15:00:00Z - image10 - A1 - - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... - - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000 - - -""".strip() - -# This example isn't in the documentation, but shows how updates work -ENTRIES_XML3 = """ - - - - - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000 - - -""".strip() - -ENTRIES_XML4 = """ - - - - - - TX Example - 2019-11-25T15:10:00Z - willdelete - - - -""".strip() - -ENTRIES_LARGE_FINGERPRINTS = """ - - - - - - -""".strip() def mock_get_impl(url: str, **params): @@ -323,3 +173,22 @@ def test_large_fingerprint_entries(monkeypatch): assert len(update.fingerprints) == 1 assert update.fingerprints == {"md5": "facefacefacefacefacefacefaceface"} assert result.next == "" + + +def test_feedback_entries(monkeypatch): + api = NCMECHashAPI("fake_user", "fake_pass", NCMECEnvironment.test_Industry) + session = Mock( + strict_spec=["post", "__enter__", "__exit__"], + post=set_api_return(UPDATE_FEEDBACK_RESULT_XML), + __enter__=lambda _: session, + __exit__=lambda *args: None, + ) + monkeypatch.setattr(api, "_get_session", lambda: session) + + result = api.submit_feedback("image1", True) + + assert len(result.updates) == 1 + update = result.updates[0] + assert update.received == 1 + assert update.accepted == 1 + assert update.updated == 1 From 90aa0ed810d5a26497da3463525e3a39655d1263 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Wed, 25 Sep 2024 21:33:16 -0400 Subject: [PATCH 02/10] add NCMECFeedbackType, member_id, build xml payload --- .../exchanges/clients/ncmec/hash_api.py | 43 +++++++++++++++++-- .../exchanges/clients/ncmec/tests/data.py | 2 + .../clients/ncmec/tests/test_hash_api.py | 6 ++- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 3ffe5cd04..b23dbbb4a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -27,7 +27,6 @@ _DEFAULT_ELE = ET.Element("") T = t.TypeVar("T") -FEEDBACK = t.List[t.Dict[str, str]] def nullthrows(v: t.Optional[T]) -> T: @@ -124,6 +123,19 @@ class NCMECEntryType(Enum): video = "video" +@unique +class NCMECFeedbackType(Enum): + md5 = "MD5" + sha1 = "SHA1" + pdna = "PDNA" + pdq = "PDQ" + netclean = "NETCLEAN" + Videntifier = "VIDENTIFIER" + tmk_pdqf = "TMK_PDQF" + ssvh_pdna = "SSVH_PDNA" + ssvh_safer_hash = "SSVH_SAFER_HASH" + + @dataclass class NCMECEntryUpdate: id: str @@ -132,7 +144,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] - feedback: t.Optional[FEEDBACK] + feedback: t.List[t.Dict[str, str]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -297,10 +309,11 @@ def __init__( self.username = username self.password = password self._base_url = environment.value + self.member_id = None def _get_session(self) -> requests.Session: """ - Custom requests sesson + Custom requests session Ideally, should be used within a context manager: ``` @@ -369,6 +382,7 @@ def status(self) -> StatusResult: """Query the status endpoint, which tells you who you are.""" response = self._get(NCMECEndpoint.status) member = _XMLWrapper(response)["member"] + self.member_id = member.int("id") return StatusResult(member.int("id"), member.text) def members(self) -> t.List[StatusResult]: @@ -433,9 +447,30 @@ def get_entries_iter( yield result # TODO: split into 2, submit upvote and downvote - def submit_feedback(self, entry_id: str, is_good: bool) -> GetEntriesResponse: + def submit_feedback( + self, + entry_id: str, + feedback_type: NCMECFeedbackType, + affirmative: bool, + reason_id: str = None, + ) -> GetEntriesResponse: + if not affirmative and not reason_id: + raise ValueError("Negative feedback must have a reason_id") + + # need member_id to submit feedback + if not self.member_id: + self.status() + # TODO # 1. Prepare the XML payload + root = ET.Element("feedbackSubmission") + root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") + vote = ET.SubElement(root, "affirmative" if affirmative else "negative") + if not affirmative: + reasons = ET.SubElement(vote, "reasonIds") + guid = ET.SubElement(reasons, "guid") + guid.text = reason_id + # ET.dump(root) # 2. Send the POST request using _post # 3. Parse the response using GetEntriesResponse.from_xml return diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index 81929b7ba..7664ade8a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -1,3 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + STATUS_XML = """ diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index ab776cf3e..35750766a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -7,6 +7,7 @@ from threatexchange.exchanges.clients.ncmec.hash_api import ( NCMECEntryType, NCMECEntryUpdate, + NCMECFeedbackType, NCMECHashAPI, NCMECEnvironment, ) @@ -185,7 +186,10 @@ def test_feedback_entries(monkeypatch): ) monkeypatch.setattr(api, "_get_session", lambda: session) - result = api.submit_feedback("image1", True) + result = api.submit_feedback("image1", NCMECFeedbackType.md5, True) + result = api.submit_feedback( + "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900" + ) assert len(result.updates) == 1 update = result.updates[0] From 1da801d559e8389b3ab9508153a95c5537a6c54e Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:03:38 -0400 Subject: [PATCH 03/10] get feedback_reasons --- .../exchanges/clients/ncmec/hash_api.py | 110 ++++++++++++++++-- .../exchanges/clients/ncmec/tests/data.py | 16 +++ .../clients/ncmec/tests/test_hash_api.py | 22 +++- 3 files changed, 134 insertions(+), 14 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index b23dbbb4a..dbe0cb3fa 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -241,7 +241,7 @@ def estimated_entries_in_range(self) -> int: ) -# TODO: check http code until we have update response shape, we also might not care about it +# TODO: once we know the shape of response, finish this class @dataclass class UpdateEntryResponse: updates: t.List[NCMECEntryUpdate] @@ -258,11 +258,30 @@ def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesRespon return cls(updates) +@dataclass +class GetFeedbackReasonsResponse: + reasons: t.List[t.Dict[str, str]] + + @classmethod + def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse": + reasons = [] + for reason in xml.maybe("availableFeedbackReasons"): + reasons.append( + { + "guid": reason.str("guid"), + "name": reason.str("name"), + "type": reason.str("type"), + } + ) + return cls(reasons) + + @unique class NCMECEndpoint(Enum): status = "status" entries = "entries" members = "members" + feedback = "feedback" class NCMECEnvironment(Enum): @@ -304,12 +323,15 @@ def __init__( username: str, password: str, environment: NCMECEnvironment, + member_id: t.Optional[str] = None, + reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = None, ) -> None: assert is_valid_user_pass(username, password) self.username = username self.password = password self._base_url = environment.value - self.member_id = None + self.member_id = member_id + self.reasons_map = reasons_map or {} def _get_session(self) -> requests.Session: """ @@ -339,7 +361,9 @@ def _get_session(self) -> requests.Session: ) return session - def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Element: + def _get( + self, endpoint: NCMECEndpoint, *, path: str = "", next_: str = "", **params + ) -> ET.Element: """ Perform an HTTP GET request, and return the XML response payload. @@ -347,6 +371,8 @@ def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Elem """ url = "/".join((self._base_url, self.VERSION, endpoint.value)) + if path: + url = "/".join((url, path)) if next_: url = self._base_url + next_ params = {} @@ -372,17 +398,49 @@ def _post(self, endpoint: NCMECEndpoint, *, data=None) -> t.Any: No timeout or retry strategy. """ - url = "/".join((self._base_url, endpoint.value)) + url = "/".join((self._base_url, self.VERSION, endpoint.value)) with self._get_session() as session: response = session.post(url, data=data) response.raise_for_status() return response + def _put( + self, + endpoint: NCMECEndpoint, + *, + member_id: str = None, + entry_id: str = None, + feedback_type: NCMECFeedbackType = None, + data=None, + ) -> t.Any: + """ + Perform an HTTP PUT request, and return the XML response payload. + + No timeout or retry strategy. + """ + + url = "/".join((self._base_url, self.VERSION, endpoint.value)) + if feedback_type: + url = "/".join( + ( + self._base_url, + endpoint.value, + member_id, + entry_id, + feedback_type.value, + NCMECEndpoint.feedback.value, + ) + ) + with self._get_session() as session: + response = session.put(url, data=data) + response.raise_for_status() + return response + def status(self) -> StatusResult: """Query the status endpoint, which tells you who you are.""" response = self._get(NCMECEndpoint.status) member = _XMLWrapper(response)["member"] - self.member_id = member.int("id") + self.member_id = member.str("id") return StatusResult(member.int("id"), member.text) def members(self) -> t.List[StatusResult]: @@ -393,6 +451,16 @@ def members(self) -> t.List[StatusResult]: for member in _XMLWrapper(response) ] + def feedback_reasons(self) -> GetFeedbackReasonsResponse: + for feedbackType in NCMECFeedbackType: + resp = self._get( + NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" + ) + reasons = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)).reasons + self.reasons_map[feedbackType] = reasons + + return + def get_entries( self, *, @@ -446,7 +514,6 @@ def get_entries_iter( has_more = bool(next_) yield result - # TODO: split into 2, submit upvote and downvote def submit_feedback( self, entry_id: str, @@ -461,19 +528,40 @@ def submit_feedback( if not self.member_id: self.status() - # TODO - # 1. Prepare the XML payload + # need valid reasons to submit feedback + if not self.reasons_map: + self.feedback_reasons() + + # Prepare the XML payload root = ET.Element("feedbackSubmission") root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") vote = ET.SubElement(root, "affirmative" if affirmative else "negative") + + valid_reason_ids = [ + reason["guid"] for reason in self.reasons_map[feedback_type.value] + ] if not affirmative: + if reason_id not in valid_reason_ids: + print( + "must choose from the following reasons: ", + self.reasons_map[feedback_type.value], + ) + raise ValueError("Invalid reason_id") reasons = ET.SubElement(vote, "reasonIds") guid = ET.SubElement(reasons, "guid") guid.text = reason_id # ET.dump(root) - # 2. Send the POST request using _post - # 3. Parse the response using GetEntriesResponse.from_xml - return + + resp = self._put( + NCMECEndpoint.entries, + member_id=self.member_id, + entry_id=entry_id, + feedback_type=feedback_type, + data=ET.tostring(root), + ) + + # TODO: parse response here once we know the shape using UpdateEntryResponse + return resp def _date_format(timestamp: int) -> str: diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index 7664ade8a..fc66a5b3f 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -176,6 +176,22 @@ """.strip() +STATUS_XML = """ + + + 1.1.1.1 + test_user + test member + +""".strip() + +FEEDBACK_REASONS_XML = """ + + + + +""".strip() + AFFIRMATIVE_FEEDBACK_XML = """ diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 35750766a..3cbd1a94d 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -20,6 +20,7 @@ NEXT_UNESCAPED, NEXT_UNESCAPED2, NEXT_UNESCAPED3, + STATUS_XML, UPDATE_FEEDBACK_RESULT_XML, ) @@ -177,10 +178,25 @@ def test_large_fingerprint_entries(monkeypatch): def test_feedback_entries(monkeypatch): - api = NCMECHashAPI("fake_user", "fake_pass", NCMECEnvironment.test_Industry) + api = NCMECHashAPI( + "fake_user", + "fake_pass", + NCMECEnvironment.test_Industry, + "123", + { + NCMECFeedbackType.md5.value: [ + { + "guid": "01234567-abcd-0123-4567-012345678900", + "name": "Example Reason 1", + "type": "Sha1", + } + ] + }, + ) session = Mock( - strict_spec=["post", "__enter__", "__exit__"], - post=set_api_return(UPDATE_FEEDBACK_RESULT_XML), + strict_spec=["get", "put", "__enter__", "__exit__"], + get=set_api_return(STATUS_XML), + put=set_api_return(UPDATE_FEEDBACK_RESULT_XML), __enter__=lambda _: session, __exit__=lambda *args: None, ) From be4cea236062a27f56480c382993d589c898e90c Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:18:14 -0400 Subject: [PATCH 04/10] update test check status code --- .../exchanges/clients/ncmec/tests/test_hash_api.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 3cbd1a94d..571b7702f 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -207,8 +207,4 @@ def test_feedback_entries(monkeypatch): "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900" ) - assert len(result.updates) == 1 - update = result.updates[0] - assert update.received == 1 - assert update.accepted == 1 - assert update.updated == 1 + assert result.status_code == 200 From a82dd7b936c63454ab1cdfa73259702752792973 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:31:48 -0400 Subject: [PATCH 05/10] type fixes --- .../exchanges/clients/ncmec/hash_api.py | 24 ++++++++++--------- .../clients/ncmec/tests/test_hash_api.py | 7 +++--- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index dbe0cb3fa..f24c4b17c 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -144,7 +144,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] - feedback: t.List[t.Dict[str, str]] + feedback: t.List[t.Dict[str, t.Any]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -247,7 +247,9 @@ class UpdateEntryResponse: updates: t.List[NCMECEntryUpdate] @classmethod - def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesResponse": + def from_xml( + cls, xml: _XMLWrapper, fallback_max_time: int + ) -> "UpdateEntryResponse": updates: t.List[NCMECEntryUpdate] = [] for content_xml in (xml.maybe("images"), xml.maybe("videos")): @@ -324,7 +326,7 @@ def __init__( password: str, environment: NCMECEnvironment, member_id: t.Optional[str] = None, - reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = None, + reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {}, ) -> None: assert is_valid_user_pass(username, password) self.username = username @@ -408,9 +410,9 @@ def _put( self, endpoint: NCMECEndpoint, *, - member_id: str = None, - entry_id: str = None, - feedback_type: NCMECFeedbackType = None, + member_id: t.Optional[str] = None, + entry_id: t.Optional[str] = None, + feedback_type: t.Optional[NCMECFeedbackType] = None, data=None, ) -> t.Any: """ @@ -420,7 +422,7 @@ def _put( """ url = "/".join((self._base_url, self.VERSION, endpoint.value)) - if feedback_type: + if feedback_type and member_id and entry_id: url = "/".join( ( self._base_url, @@ -456,10 +458,10 @@ def feedback_reasons(self) -> GetFeedbackReasonsResponse: resp = self._get( NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" ) - reasons = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)).reasons - self.reasons_map[feedbackType] = reasons + reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)) + self.reasons_map[feedbackType.value] = reasonsResp.reasons - return + return reasonsResp def get_entries( self, @@ -519,7 +521,7 @@ def submit_feedback( entry_id: str, feedback_type: NCMECFeedbackType, affirmative: bool, - reason_id: str = None, + reason_id: t.Optional[str] = None, ) -> GetEntriesResponse: if not affirmative and not reason_id: raise ValueError("Negative feedback must have a reason_id") diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 571b7702f..ba946d504 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -182,8 +182,8 @@ def test_feedback_entries(monkeypatch): "fake_user", "fake_pass", NCMECEnvironment.test_Industry, - "123", - { + member_id="123", + reasons_map={ NCMECFeedbackType.md5.value: [ { "guid": "01234567-abcd-0123-4567-012345678900", @@ -194,8 +194,7 @@ def test_feedback_entries(monkeypatch): }, ) session = Mock( - strict_spec=["get", "put", "__enter__", "__exit__"], - get=set_api_return(STATUS_XML), + strict_spec=["put", "__enter__", "__exit__"], put=set_api_return(UPDATE_FEEDBACK_RESULT_XML), __enter__=lambda _: session, __exit__=lambda *args: None, From 6dee071c18f09b084a84863a7923a0665752dab3 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Mon, 30 Sep 2024 09:50:01 -0400 Subject: [PATCH 06/10] parse feedback better --- .../exchanges/clients/ncmec/hash_api.py | 55 +++++++++++++------ .../exchanges/clients/ncmec/tests/data.py | 16 +++--- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index f24c4b17c..380927089 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -130,7 +130,7 @@ class NCMECFeedbackType(Enum): pdna = "PDNA" pdq = "PDQ" netclean = "NETCLEAN" - Videntifier = "VIDENTIFIER" + videntifier = "VIDENTIFIER" tmk_pdqf = "TMK_PDQF" ssvh_pdna = "SSVH_PDNA" ssvh_safer_hash = "SSVH_SAFER_HASH" @@ -162,18 +162,36 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": fingerprints={ x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text }, - feedback=[ - { - "type": x.tag, # "affirmativeFeedback" or "negativeFeedback" - "members": x.maybe( - "members" # "timestamp", "member.id", "member.name" - ), - "reasons": x.maybe( - "reasons" - ), # "reason" with "guid", "name", "type" | "members" - } - for x in xml.maybe("feedback") - ], + feedback=( + [ + { + "sentiment": x.tag, # "affirmativeFeedback" or "negativeFeedback" + "type": x.str("type"), + "latest_feedback_time": x.str("lastUpdateTimestamp"), + "members": [ + {"id": m.str("id"), "name": m.text} + for m in x.maybe("members") + if m.has_text + ], + "reasons": [ + { + "guid": r.maybe("reason").str("guid"), + "name": r.maybe("reason").str("name"), + "type": r.maybe("reason").str("type"), + "members": [ + {"id": m.str("id"), "name": m.text} + for m in x.maybe("members") + ], + } + for r in x.maybe("reasons") + if r.maybe("reason") + ], + } + for x in xml.maybe("feedback") + ] + if xml.maybe("feedback").has_text + else [] + ), ) @@ -454,6 +472,7 @@ def members(self) -> t.List[StatusResult]: ] def feedback_reasons(self) -> GetFeedbackReasonsResponse: + """Get the possible negative feedback reasons for each feedback type""" for feedbackType in NCMECFeedbackType: resp = self._get( NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" @@ -530,8 +549,8 @@ def submit_feedback( if not self.member_id: self.status() - # need valid reasons to submit feedback - if not self.reasons_map: + # need valid reasons to submit negative feedback + if not affirmative and not self.reasons_map: self.feedback_reasons() # Prepare the XML payload @@ -539,10 +558,10 @@ def submit_feedback( root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") vote = ET.SubElement(root, "affirmative" if affirmative else "negative") - valid_reason_ids = [ - reason["guid"] for reason in self.reasons_map[feedback_type.value] - ] if not affirmative: + valid_reason_ids = [ + reason["guid"] for reason in self.reasons_map[feedback_type.value] + ] if reason_id not in valid_reason_ids: print( "must choose from the following reasons: ", diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index fc66a5b3f..21b7a9ca5 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -39,17 +39,17 @@ - - Example Member - - - - - Example Member - + + + + + + Example Member + + From cced9bf04f7b8ca3e33359408cca3d91853b6355 Mon Sep 17 00:00:00 2001 From: dcallies Date: Wed, 2 Oct 2024 17:31:43 +0100 Subject: [PATCH 07/10] checkpoint --- .../clients/fb_threatexchange/api.py | 2 +- .../exchanges/clients/ncmec/hash_api.py | 227 ++++++++++-------- .../clients/ncmec/tests/test_hash_api.py | 54 ++--- .../threatexchange/signal_type/signal_base.py | 1 - 4 files changed, 151 insertions(+), 133 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/fb_threatexchange/api.py b/python-threatexchange/threatexchange/exchanges/clients/fb_threatexchange/api.py index 4325a248c..250ecced4 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/fb_threatexchange/api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/fb_threatexchange/api.py @@ -15,7 +15,7 @@ import urllib.error import requests -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 380927089..6be5e73e7 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -9,6 +9,7 @@ import xml.etree.ElementTree as ET from datetime import datetime, timezone +from copy import deepcopy from dataclasses import dataclass from enum import Enum, unique import logging @@ -113,18 +114,41 @@ def str(self, key: str) -> str: @dataclass class StatusResult: + """ + Represents a NCMEC member. + + While it does correspond to the /status endpoint, we should probably + rename it at this point. + """ + esp_id: int esp_name: str + @classmethod + def from_xml(cls, xml: _XMLWrapper) -> t.Self: + return cls(esp_id=xml.int("id"), esp_name=xml.text) + @unique class NCMECEntryType(Enum): + """Type of entry, as marked by xml""" + image = "image" video = "video" @unique -class NCMECFeedbackType(Enum): +class FingerprintType(Enum): + """ + The list of supported fingerprints, as of 10/2024. + + This also corresponds to the feedback types for the upvote/downvote API + + We are currently not parsing these in the returned entry to prevent + compatibility issues if NCMEC were to add more fingerprint types, and + returning them as simple strings. + """ + md5 = "MD5" sha1 = "SHA1" pdna = "PDNA" @@ -136,15 +160,67 @@ class NCMECFeedbackType(Enum): ssvh_safer_hash = "SSVH_SAFER_HASH" +@dataclass +class Feedback: + """Feedback on a single fingerprint in an entry""" + + # True = upvote | False = downvote + sentiment: bool + # The member giving the feedback + member: StatusResult + # For upvotes, the reason text + reason = "" + + @classmethod + def get_from_entry_feedback(cls, entry: _XMLWrapper) -> t.Dict[str, t.List[t.Self]]: + feedbacks = entry.maybe("feedback") + if not feedbacks: + return [] + ret: t.Dict[str, t.List[Feedback]] = {} + + for sentimentTag in feedbacks: + feedbacks = ret.setdefault(sentimentTag.str("type"), []) + if sentimentTag.tag == "affirmativeFeedback": + # Iterate over members + for m in sentimentTag.maybe("members"): + feedbacks.append(cls(StatusResult.from_xml(m), True)) + elif sentimentTag.tag == "negativeFeedback": + # Iterate over reasons + for r in sentimentTag.maybe("reasons"): + print(str(r)) + assert r.tag == "reason" + reason_name = r.str("name") + for m in r.maybe("members"): + feedbacks.append( + cls(False, StatusResult.from_xml(m), reason_name) + ) + else: + logging.warning( + "[ncmec] Ignoring unknown sentiment '%s'", sentimentTag.tag + ) + continue + return ret + + @dataclass class NCMECEntryUpdate: + # The entry id id: str + # The esp_id for the uploader member_id: int + # The entry or content type (e.g. image/video) entry_type: NCMECEntryType + # Whether or not this is a tombstone for a deleted entry deleted: bool + # The string classification of the entry classification: t.Optional[str] + # The hashes/fingerprints for this entry, Dict[type, value]. This is + # roughly equivalent to SignalType.get_name(): signal_value, but NCMEC + # chooses different names for these fingerprints: t.Dict[str, str] - feedback: t.List[t.Dict[str, t.Any]] + # The feedback (upvote/downvote) that other ESPs have given for this entry + # Keyed the same way as fingerprints + feedback: t.Dict[str, Feedback] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -162,36 +238,7 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": fingerprints={ x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text }, - feedback=( - [ - { - "sentiment": x.tag, # "affirmativeFeedback" or "negativeFeedback" - "type": x.str("type"), - "latest_feedback_time": x.str("lastUpdateTimestamp"), - "members": [ - {"id": m.str("id"), "name": m.text} - for m in x.maybe("members") - if m.has_text - ], - "reasons": [ - { - "guid": r.maybe("reason").str("guid"), - "name": r.maybe("reason").str("name"), - "type": r.maybe("reason").str("type"), - "members": [ - {"id": m.str("id"), "name": m.text} - for m in x.maybe("members") - ], - } - for r in x.maybe("reasons") - if r.maybe("reason") - ], - } - for x in xml.maybe("feedback") - ] - if xml.maybe("feedback").has_text - else [] - ), + feedback=Feedback.get_from_entry_feedback(xml), ) @@ -278,24 +325,6 @@ def from_xml( return cls(updates) -@dataclass -class GetFeedbackReasonsResponse: - reasons: t.List[t.Dict[str, str]] - - @classmethod - def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse": - reasons = [] - for reason in xml.maybe("availableFeedbackReasons"): - reasons.append( - { - "guid": reason.str("guid"), - "name": reason.str("name"), - "type": reason.str("type"), - } - ) - return cls(reasons) - - @unique class NCMECEndpoint(Enum): status = "status" @@ -343,15 +372,14 @@ def __init__( username: str, password: str, environment: NCMECEnvironment, - member_id: t.Optional[str] = None, - reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {}, ) -> None: assert is_valid_user_pass(username, password) self.username = username self.password = password self._base_url = environment.value - self.member_id = member_id - self.reasons_map = reasons_map or {} + self._my_esp: t.Optional[StatusResult] = None + # type -> name -> guid + self._feedback_reason_map: t.Dict[FingerprintType, t.Dict[str, str]] = {} def _get_session(self) -> requests.Session: """ @@ -428,9 +456,9 @@ def _put( self, endpoint: NCMECEndpoint, *, - member_id: t.Optional[str] = None, + member_id: t.Optional[int] = None, entry_id: t.Optional[str] = None, - feedback_type: t.Optional[NCMECFeedbackType] = None, + feedback_type: t.Optional[FingerprintType] = None, data=None, ) -> t.Any: """ @@ -445,7 +473,7 @@ def _put( ( self._base_url, endpoint.value, - member_id, + str(member_id), entry_id, feedback_type.value, NCMECEndpoint.feedback.value, @@ -459,28 +487,28 @@ def _put( def status(self) -> StatusResult: """Query the status endpoint, which tells you who you are.""" response = self._get(NCMECEndpoint.status) - member = _XMLWrapper(response)["member"] - self.member_id = member.str("id") - return StatusResult(member.int("id"), member.text) + ret = StatusResult.from_xml(_XMLWrapper(response)["member"]) + self._my_esp = deepcopy(ret) + return ret def members(self) -> t.List[StatusResult]: """Query the members endpoint, which gives you a list of esps""" response = self._get(NCMECEndpoint.members) - return [ - StatusResult(member.int("id"), member.text) - for member in _XMLWrapper(response) - ] + return [StatusResult.from_xml(member) for member in _XMLWrapper(response)] - def feedback_reasons(self) -> GetFeedbackReasonsResponse: + def feedback_reasons(self, fingerprint_type: FingerprintType) -> t.Dict[str, str]: """Get the possible negative feedback reasons for each feedback type""" - for feedbackType in NCMECFeedbackType: - resp = self._get( - NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" - ) - reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)) - self.reasons_map[feedbackType.value] = reasonsResp.reasons - - return reasonsResp + ret = {} + # Could parallelize this + xml = _XMLWrapper( + self._get(NCMECEndpoint.feedback, path=f"{fingerprint_type.value}/reasons") + ) + ret = { + reason.str("guid"): reason.str("name") + for reason in xml["availableFeedbackReasons"] + } + self._feedback_reason_map[fingerprint_type] = deepcopy(ret) + return ret def get_entries( self, @@ -538,20 +566,15 @@ def get_entries_iter( def submit_feedback( self, entry_id: str, - feedback_type: NCMECFeedbackType, + fingerprint_type: FingerprintType, affirmative: bool, - reason_id: t.Optional[str] = None, - ) -> GetEntriesResponse: - if not affirmative and not reason_id: - raise ValueError("Negative feedback must have a reason_id") + negative_reason_guid: t.Optional[str] = None, + ) -> None: # need member_id to submit feedback - if not self.member_id: - self.status() - - # need valid reasons to submit negative feedback - if not affirmative and not self.reasons_map: - self.feedback_reasons() + my_esp = self._my_esp + if not self._my_esp: + my_esp = self.status() # Prepare the XML payload root = ET.Element("feedbackSubmission") @@ -559,31 +582,35 @@ def submit_feedback( vote = ET.SubElement(root, "affirmative" if affirmative else "negative") if not affirmative: - valid_reason_ids = [ - reason["guid"] for reason in self.reasons_map[feedback_type.value] - ] - if reason_id not in valid_reason_ids: - print( - "must choose from the following reasons: ", - self.reasons_map[feedback_type.value], + if not negative_reason_guid: + # We need a reason ID, but there may be only one choice + # so we can just use that one + if fingerprint_type not in self._feedback_reason_map: + self.feedback_reasons(fingerprint_type) + feedback_options = self._feedback_reason_map[fingerprint_type] + if not feedback_options: + raise Exception( + "No feedback options for this type? Try reaching out to NCMEC" + ) + if len(feedback_options) == 1: + # Only one choice + negative_reason_guid = next(feedback_options.keys()) + if not negative_reason_guid: + raise Exception( + f"Need to pick a feedback reason. Options: {feedback_options}" ) - raise ValueError("Invalid reason_id") reasons = ET.SubElement(vote, "reasonIds") guid = ET.SubElement(reasons, "guid") - guid.text = reason_id - # ET.dump(root) + guid.text = negative_reason_guid - resp = self._put( + self._put( NCMECEndpoint.entries, - member_id=self.member_id, + member_id=my_esp.esp_id, entry_id=entry_id, - feedback_type=feedback_type, + feedback_type=fingerprint_type, data=ET.tostring(root), ) - # TODO: parse response here once we know the shape using UpdateEntryResponse - return resp - def _date_format(timestamp: int) -> str: """ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ""" diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index ba946d504..a12330091 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -7,9 +7,10 @@ from threatexchange.exchanges.clients.ncmec.hash_api import ( NCMECEntryType, NCMECEntryUpdate, - NCMECFeedbackType, + FingerprintType, NCMECHashAPI, NCMECEnvironment, + StatusResult, ) from threatexchange.exchanges.clients.ncmec.tests.data import ( ENTRIES_LARGE_FINGERPRINTS, @@ -29,10 +30,12 @@ def mock_get_impl(url: str, **params): content = ENTRIES_XML if url.endswith(NEXT_UNESCAPED): content = ENTRIES_XML2 - if url.endswith(NEXT_UNESCAPED2): + elif url.endswith(NEXT_UNESCAPED2): content = ENTRIES_XML3 - if url.endswith(NEXT_UNESCAPED3): + elif url.endswith(NEXT_UNESCAPED3): content = ENTRIES_XML4 + elif url.endswith("/status"): + content = STATUS_XML # Void your warantee by messing with requests state resp = requests.Response() resp._content = content.encode() @@ -68,6 +71,7 @@ def api(monkeypatch: pytest.MonkeyPatch): session = Mock( strict_spec=["get", "__enter__", "__exit__"], get=mock_get_impl, + _put=Mock(), __enter__=lambda _: session, __exit__=lambda *args: None, ) @@ -126,6 +130,14 @@ def assert_fifth_entry(entry: NCMECEntryUpdate) -> None: } +def test_mocked_status(api: NCMECHashAPI): + assert api._my_esp is None + result = api.status() + assert result.esp_id == 1 + assert result.esp_name == "test member" + assert result == api._my_esp + + def test_mocked_get_hashes(api: NCMECHashAPI): result = api.get_entries() @@ -177,33 +189,13 @@ def test_large_fingerprint_entries(monkeypatch): assert result.next == "" -def test_feedback_entries(monkeypatch): - api = NCMECHashAPI( - "fake_user", - "fake_pass", - NCMECEnvironment.test_Industry, - member_id="123", - reasons_map={ - NCMECFeedbackType.md5.value: [ - { - "guid": "01234567-abcd-0123-4567-012345678900", - "name": "Example Reason 1", - "type": "Sha1", - } - ] - }, - ) - session = Mock( - strict_spec=["put", "__enter__", "__exit__"], - put=set_api_return(UPDATE_FEEDBACK_RESULT_XML), - __enter__=lambda _: session, - __exit__=lambda *args: None, - ) - monkeypatch.setattr(api, "_get_session", lambda: session) +def test_feedback_entries(api: NCMECHashAPI): + # We'll mock that we've already read our own ESP - result = api.submit_feedback("image1", NCMECFeedbackType.md5, True) - result = api.submit_feedback( - "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900" + api.submit_feedback("image1", FingerprintType.md5, True) + api.submit_feedback( + "image1", + FingerprintType.md5, + False, + "01234567-abcd-0123-4567-012345678900", ) - - assert result.status_code == 200 diff --git a/python-threatexchange/threatexchange/signal_type/signal_base.py b/python-threatexchange/threatexchange/signal_type/signal_base.py index 3e889efa5..393cf6339 100644 --- a/python-threatexchange/threatexchange/signal_type/signal_base.py +++ b/python-threatexchange/threatexchange/signal_type/signal_base.py @@ -6,7 +6,6 @@ import abc import pathlib -import random import typing as t from threatexchange import common From e1376336c8ceb5cf6dddf0b47ae76865103e8a99 Mon Sep 17 00:00:00 2001 From: dcallies Date: Thu, 3 Oct 2024 13:49:00 +0100 Subject: [PATCH 08/10] aaaaa --- .../exchanges/clients/ncmec/hash_api.py | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 6be5e73e7..0a0e7de54 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -125,7 +125,7 @@ class StatusResult: esp_name: str @classmethod - def from_xml(cls, xml: _XMLWrapper) -> t.Self: + def from_xml(cls, xml: _XMLWrapper) -> "StatusResult": return cls(esp_id=xml.int("id"), esp_name=xml.text) @@ -169,28 +169,44 @@ class Feedback: # The member giving the feedback member: StatusResult # For upvotes, the reason text - reason = "" + reason: str = "" @classmethod - def get_from_entry_feedback(cls, entry: _XMLWrapper) -> t.Dict[str, t.List[t.Self]]: - feedbacks = entry.maybe("feedback") - if not feedbacks: - return [] + def get_from_entry_feedback( + cls, entry: _XMLWrapper + ) -> t.Dict[str, list["Feedback"]]: + feedbacks_xml = entry.maybe("feedback") + if not feedbacks_xml: + return {} ret: t.Dict[str, t.List[Feedback]] = {} - for sentimentTag in feedbacks: + for sentimentTag in feedbacks_xml: feedbacks = ret.setdefault(sentimentTag.str("type"), []) if sentimentTag.tag == "affirmativeFeedback": # Iterate over members for m in sentimentTag.maybe("members"): - feedbacks.append(cls(StatusResult.from_xml(m), True)) + feedbacks.append(cls(True, StatusResult.from_xml(m))) elif sentimentTag.tag == "negativeFeedback": - # Iterate over reasons - for r in sentimentTag.maybe("reasons"): - print(str(r)) - assert r.tag == "reason" - reason_name = r.str("name") - for m in r.maybe("members"): + # It's impossible to tell from the public documentation how + # to parse this correctly because it's ambigous how it's + # formatted. + reason_block = list(sentimentTag.maybe("reasons")) + for i in range(0, len(reason_block), 2): + if i + 1 >= len(reason_block): + logging.warning("[ncmec] reason block has odd number of blocks") + continue + reason = reason_block[i] + members = reason_block[i + 1] + if reason.tag != "reason" or members.tag != "members": + logging.warning( + "[ncmec] reason block malformed: reason:%s remembers:%s", + reason.tag, + members.tag, + ) + continue + + reason_name = reason.str("name") + for m in members: feedbacks.append( cls(False, StatusResult.from_xml(m), reason_name) ) @@ -220,7 +236,7 @@ class NCMECEntryUpdate: fingerprints: t.Dict[str, str] # The feedback (upvote/downvote) that other ESPs have given for this entry # Keyed the same way as fingerprints - feedback: t.Dict[str, Feedback] + feedback: t.Dict[str, list[Feedback]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -573,7 +589,7 @@ def submit_feedback( # need member_id to submit feedback my_esp = self._my_esp - if not self._my_esp: + if my_esp is None: my_esp = self.status() # Prepare the XML payload @@ -594,7 +610,7 @@ def submit_feedback( ) if len(feedback_options) == 1: # Only one choice - negative_reason_guid = next(feedback_options.keys()) + negative_reason_guid = next(iter(feedback_options.keys())) if not negative_reason_guid: raise Exception( f"Need to pick a feedback reason. Options: {feedback_options}" From 2e2a8d6719d762788a1c3028145b5bdbc519a8cc Mon Sep 17 00:00:00 2001 From: dcallies Date: Thu, 3 Oct 2024 14:22:04 +0100 Subject: [PATCH 09/10] how do types work --- .../threatexchange/exchanges/clients/ncmec/hash_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 0a0e7de54..9907d21d7 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -174,7 +174,7 @@ class Feedback: @classmethod def get_from_entry_feedback( cls, entry: _XMLWrapper - ) -> t.Dict[str, list["Feedback"]]: + ) -> t.Dict[str, t.List["Feedback"]]: feedbacks_xml = entry.maybe("feedback") if not feedbacks_xml: return {} From 201fb641e00ddb2a91d1dc77f257b2613866d703 Mon Sep 17 00:00:00 2001 From: dcallies Date: Thu, 3 Oct 2024 14:35:36 +0100 Subject: [PATCH 10/10] how do types work x2 --- .../threatexchange/exchanges/clients/ncmec/hash_api.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 9907d21d7..e534fdd08 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -236,7 +236,7 @@ class NCMECEntryUpdate: fingerprints: t.Dict[str, str] # The feedback (upvote/downvote) that other ESPs have given for this entry # Keyed the same way as fingerprints - feedback: t.Dict[str, list[Feedback]] + feedback: t.Dict[str, t.List[Feedback]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -513,9 +513,11 @@ def members(self) -> t.List[StatusResult]: return [StatusResult.from_xml(member) for member in _XMLWrapper(response)] def feedback_reasons(self, fingerprint_type: FingerprintType) -> t.Dict[str, str]: - """Get the possible negative feedback reasons for each feedback type""" - ret = {} - # Could parallelize this + """ + Get the possible negative feedback reasons for this type + + According to NCMEC documentation, the GUIDs + """ xml = _XMLWrapper( self._get(NCMECEndpoint.feedback, path=f"{fingerprint_type.value}/reasons") )