Skip to content

Commit

Permalink
hrm this is confugin
Browse files Browse the repository at this point in the history
  • Loading branch information
stringertheory committed Jan 31, 2024
1 parent ada004f commit 9baa1af
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 63 deletions.
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/cassettes
114 changes: 67 additions & 47 deletions clean_links/unshorten.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import contextlib
import logging
import warnings
from typing import Generator, Union

Expand Down Expand Up @@ -33,62 +32,83 @@ def disable_ssl_warnings() -> Generator:
yield None


def get_last_url_from_exception(exc: Exception) -> Union[str, None]:
result = None

def send(
session: requests.Session,
prepped: requests.PreparedRequest,
history: dict,
verify: bool,
timeout: float,
) -> Union[requests.PreparedRequest, None]:
history["requests"].append(prepped)
try:
if exc.response and exc.response.url:
result = exc.response.url
elif exc.request:
result = exc.request.url
except Exception as exc:
logging.exception("exception occurred while getting last url")
response = session.send(
prepped, allow_redirects=False, verify=verify, timeout=timeout
)
except requests.exceptions.RequestException as exc:
exc.history = history
raise
else:
history["responses"].append(response)

return response.next


def request_redirect_chain(
session: requests.Session,
url: str,
verify: bool,
timeout: float,
headers: dict,
method: str = "HEAD",
) -> dict:
history: dict = {
"requests": [],
"responses": [],
}

# prepare initial request
request = requests.Request(method, url, headers=headers)
prepped = session.prepare_request(request)

# send and follow the redirect chain, filling in the history
next_prepped = send(session, prepped, history, verify, timeout)
while next_prepped:
next_prepped = send(session, prepped, history, verify, timeout)

return history

return result

def format_exception(exc: Union[Exception, None]) -> Union[str, None]:
if exc is None:
return None
else:
return f"{type(exc).__name__}: {exc}"


def unshorten_url(
url: str, timeout: int = 9, verify: bool = False, headers: dict = HEADERS
url: str, timeout: float = 9, verify: bool = False, headers: dict = HEADERS
) -> dict:
with requests.Session() as session, disable_ssl_warnings():
exception = None
try:
response = session.head(
url,
allow_redirects=True,
timeout=timeout,
headers=headers,
verify=verify,
history = request_redirect_chain(
session, url, verify, timeout, headers, "HEAD"
)
except requests.exceptions.MissingSchema:
raise
except requests.exceptions.InvalidURL:
raise
except requests.exceptions.InvalidSchema as exc:
msg = str(exc)
if msg.startswith("No connection adapters were found"):
resolved = msg[39:-1]
return {
"url": url,
"resolved": resolved,
"status": None,
"exception": f"{type(exc).__name__}: {exc}",
}
else:
raise
except requests.exceptions.RequestException as exc:
return {
"url": url,
"resolved": get_last_url_from_exception(exc),
"status": None,
"exception": f"{type(exc).__name__}: {exc}",
}
else:
return {
"url": url,
"resolved": response.url,
"status": response.status_code,
"exception": None,
}
exception = exc
history = getattr(exc, "history", {})
if not history or not history["responses"]:
raise

response = history["responses"][-1]
return {
"url": url,
"resolved": response.url,
"status": response.status_code,
"exception": format_exception(exception),
"request_history": [r.url for r in history["requests"]],
"response_history": [r.status_code for r in history["responses"]],
}


def main() -> None:
Expand Down
77 changes: 61 additions & 16 deletions tests/test_unshorten.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@
from clean_links.unshorten import unshorten_url


def test_missing_schema():
url = "Ceci n'est pas une URL"
with pytest.raises(requests.exceptions.MissingSchema):
unshorten_url(url)


def test_not_an_address():
url = "https://www.definitely-not-a-website.boogety"
with pytest.raises(requests.exceptions.ConnectionError):
unshorten_url(url)


# cassettes/{module_name}/test_single.yaml will be used
@pytest.mark.vcr
def test_unchanged():
Expand All @@ -14,65 +26,98 @@ def test_unchanged():
"resolved": "https://example.com/",
"status": 200,
"exception": None,
"request_history": ["https://example.com/"],
"response_history": [200],
}


@pytest.mark.vcr
def test_single_redirect():
url = "https://trib.al/5m7fAg3"
result = unshorten_url(url)
resolved = "https://www.bloomberg.com/news/articles/2024-01-24/cryptocurrency-ai-electricity-demand-seen-doubling-in-three-years?cmpid%3D=socialflow-twitter-tech&utm_content=tech&utm_medium=social&utm_campaign=socialflow-organic&utm_source=twitter"
assert result == {
"url": url,
"resolved": "https://www.bloomberg.com/news/articles/2024-01-24/cryptocurrency-ai-electricity-demand-seen-doubling-in-three-years?cmpid%3D=socialflow-twitter-tech&utm_content=tech&utm_medium=social&utm_campaign=socialflow-organic&utm_source=twitter",
"resolved": resolved,
"status": 200,
"exception": None,
"request_history": [url, resolved],
"response_history": [301, 200],
}


@pytest.mark.vcr
def test_multiple_redirect():
url = "https://hubs.la/Q01HRjhm0"
result = unshorten_url(url)
resolved = "https://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D"
assert result == {
"url": url,
"resolved": "https://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
"resolved": resolved,
"status": 200,
"exception": None,
"request_history": [
url,
"https://mentorspaces.app.link/nsbe?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094",
"https://app.east.mentorspaces.com/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
"http://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
resolved,
],
"response_history": [301, 307, 301, 301, 200],
}


@pytest.mark.vcr
def test_expired_certificate_ignore():
url = "https://expired.badssl.com/"
result = unshorten_url(url, verify=False)
resolved = "https://expired.badssl.com/"
assert result == {
"url": url,
"resolved": "https://expired.badssl.com/",
"resolved": resolved,
"status": 200,
"exception": None,
"request_history": [url],
"response_history": [200],
}


@pytest.mark.vcr
def test_resolve_to_mailto():
url = "https://tinyurl.com/NewwAlemAndKibrom"
result = unshorten_url(url)
resolved = "https://tinyurl.com/NewwAlemAndKibrom"
assert result["url"] == url
assert (
result["resolved"]
== "mailto:[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected]?subject=URGENT%20CALL%20TO%20ACTION%3A%20Save%20Alem%20and%20Kibrom's%20lives&body=To%20Whom%20it%20May%20Concern%3A%0A%0AAlem%20Tesfay%20Abraham%20and%20Kibrom%20Adhanom%20Okbazghi%20are%20two%20Eritrean%20asylum-seekers%20who%20have%20been%20detained%20without%20charge%20in%20Egypt%20since%202012%20and%202014%2C%20respectively.%20They%20now%20are%20facing%20deportation%20to%20Eritrea%20without%20ever%20receiving%20the%20opportunity%20to%20register%20as%20refugees%20with%20UNHCR%20in%20Egypt.%20On%209%20September%2C%20they%20were%20taken%20from%20prison%20to%20a%20hospital%20in%20Cairo%20to%20take%20PCR%20tests%20and%20were%20informed%20by%20a%20prison%20official%20that%20they%20would%20be%20deported%20to%20Eritrea%20on%20the%20oncoming%20days.%0A%0AForcibly%20returning%20Alem%20and%20Kibrom%20to%20Eritrea%2C%20where%20they%20fled%20indefinite%20military%20conscription%20and%20where%20they%20would%20face%20persecution%2C%20is%20a%20grave%20breach%20of%20international%20law.%20Eritrean%20asylum-seekers%20who%20are%20forcibly%20returned%20to%20Eritrea%20risk%20arbitrary%20arrest%2C%20forced%20disappearance%20and%20indefinite%20detention%20without%20charges.%20As%20widely%20documented%20by%20many%20NGOs%20as%20well%20as%20the%20UN%20Human%20Rights%20Council%2C%20citizens%20in%20Eritrea%20are%20held%20in%20prisons%20incommunicado%2C%20in%20unsanitary%20living%20conditions%2C%20where%20torture%20and%20other%20ill%20treatments%20are%20taking%20place%20to%20present.%0A%0AForcing%20Alem%20and%20Kibrom%20back%20to%20the%20nation%20they%20are%20seeking%20asylum%20from%20violates%20the%201951%20Convention%20and%201967%20Protocol%2C%20two%20International%20Laws%20Egypt%20has%20agreed%20to.%20They%20deserve%20the%20right%20to%20be%20resettled%20by%20will%2C%20to%20a%20country%20willing%20to%20accept%20them.%20We%20urge%20you%2C%20the%20Egyptian%20authorities%2C%20and%20all%20other%20relevant%20bodies%2C%20to%20help%20stop%20the%20forced%20repatriation%20of%20Alem%20and%20Kibrom%20and%20protect%20them%20from%20persecution%20and%20grant%20them%20their%20long-awaited%20freedom.%20%0A%0A%23JusticeforAlemAndKibrom%0A%0ASincerely%2C"
)
assert result["status"] is None
assert (
result["exception"]
== 'InvalidSchema: No connection adapters were found for "mailto:[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected]?subject=URGENT%20CALL%20TO%20ACTION%3A%20Save%20Alem%20and%20Kibrom\'s%20lives&body=To%20Whom%20it%20May%20Concern%3A%0A%0AAlem%20Tesfay%20Abraham%20and%20Kibrom%20Adhanom%20Okbazghi%20are%20two%20Eritrean%20asylum-seekers%20who%20have%20been%20detained%20without%20charge%20in%20Egypt%20since%202012%20and%202014%2C%20respectively.%20They%20now%20are%20facing%20deportation%20to%20Eritrea%20without%20ever%20receiving%20the%20opportunity%20to%20register%20as%20refugees%20with%20UNHCR%20in%20Egypt.%20On%209%20September%2C%20they%20were%20taken%20from%20prison%20to%20a%20hospital%20in%20Cairo%20to%20take%20PCR%20tests%20and%20were%20informed%20by%20a%20prison%20official%20that%20they%20would%20be%20deported%20to%20Eritrea%20on%20the%20oncoming%20days.%0A%0AForcibly%20returning%20Alem%20and%20Kibrom%20to%20Eritrea%2C%20where%20they%20fled%20indefinite%20military%20conscription%20and%20where%20they%20would%20face%20persecution%2C%20is%20a%20grave%20breach%20of%20international%20law.%20Eritrean%20asylum-seekers%20who%20are%20forcibly%20returned%20to%20Eritrea%20risk%20arbitrary%20arrest%2C%20forced%20disappearance%20and%20indefinite%20detention%20without%20charges.%20As%20widely%20documented%20by%20many%20NGOs%20as%20well%20as%20the%20UN%20Human%20Rights%20Council%2C%20citizens%20in%20Eritrea%20are%20held%20in%20prisons%20incommunicado%2C%20in%20unsanitary%20living%20conditions%2C%20where%20torture%20and%20other%20ill%20treatments%20are%20taking%20place%20to%20present.%0A%0AForcing%20Alem%20and%20Kibrom%20back%20to%20the%20nation%20they%20are%20seeking%20asylum%20from%20violates%20the%201951%20Convention%20and%201967%20Protocol%2C%20two%20International%20Laws%20Egypt%20has%20agreed%20to.%20They%20deserve%20the%20right%20to%20be%20resettled%20by%20will%2C%20to%20a%20country%20willing%20to%20accept%20them.%20We%20urge%20you%2C%20the%20Egyptian%20authorities%2C%20and%20all%20other%20relevant%20bodies%2C%20to%20help%20stop%20the%20forced%20repatriation%20of%20Alem%20and%20Kibrom%20and%20protect%20them%20from%20persecution%20and%20grant%20them%20their%20long-awaited%20freedom.%20%0A%0A%23JusticeforAlemAndKibrom%0A%0ASincerely%2C"'
)
assert result["resolved"] == resolved
assert result["status"] == 301
assert result["exception"].startswith("InvalidSchema: No connection adap")
assert result["request_history"][0] == url
assert result["request_history"][1].startswith("mailto:[email protected]")
assert result["response_history"] == [301]


def test_missing_schema():
url = "I AM NOT AN URL"
with pytest.raises(requests.exceptions.MissingSchema):
unshorten_url(url)
@pytest.mark.vcr
def test_invalid_url_in_redirect_chain():
"""What should this actually do?
Throw error like if it was an invalid URL to begin with?
Or should it return the last valid URL in the redirect chain?
I think the last URL in the chain..
"""
url = "https://ctt.ec/5kum7+"
result = unshorten_url(url)
resolved = "https://clicktotweet.com/5kum7+"
assert result == {
"url": url,
"resolved": resolved,
"status": 302,
"exception": "InvalidURL: No host specified.",
"request_history": [url, resolved, "http://"],
"response_history": [301, 302],
}


# def test_expired_certificate_verify():
Expand Down

0 comments on commit 9baa1af

Please sign in to comment.