omg what a pain

stringertheory · Jan 29, 2024 · 0af498f · 0af498f
1 parent 7d8d077
commit 0af498f
Show file tree

Hide file tree

Showing 9 changed files with 2,867 additions and 3 deletions.
diff --git a/Makefile b/Makefile
@@ -12,7 +12,7 @@ check: ## Run code quality tools.
 	@echo "🚀 Linting code: Running pre-commit"
 	@poetry run pre-commit run -a
 	@echo "🚀 Static type checking: Running mypy"
-	@poetry run mypy
+	@poetry run mypy --disable-error-code attr-defined
 
 .PHONY: test
 test: ## Test the code with pytest

diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+# clean-links
+
+[![Release](https://img.shields.io/github/v/release/stringertheory/clean-links)](https://img.shields.io/github/v/release/stringertheory/clean-links)
+[![Build status](https://img.shields.io/github/actions/workflow/status/stringertheory/clean-links/main.yml?branch=main)](https://github.com/stringertheory/clean-links/actions/workflows/main.yml?query=branch%3Amain)
+[![codecov](https://codecov.io/gh/stringertheory/clean-links/branch/main/graph/badge.svg)](https://codecov.io/gh/stringertheory/clean-links)
+[![Commit activity](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links)](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links)
+[![License](https://img.shields.io/github/license/stringertheory/clean-links)](https://img.shields.io/github/license/stringertheory/clean-links)
+
+Tools for cleaning up linkss
+
+- **Github repository**: <https://github.com/stringertheory/clean-links/>
+- **Documentation** <https://stringertheory.github.io/clean-links/>
+
+## Getting started with your project
+
+First, create a repository on GitHub with the same name as this project, and then run the following commands:
+
+```bash
+git init -b main
+git add .
+git commit -m "init commit"
+git remote add origin [email protected]:stringertheory/clean-links.git
+git push -u origin main
+```
+
+Finally, install the environment and the pre-commit hooks with
+
+```bash
+make install
+```
+
+You are now ready to start development on your project!
+The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.
+
+To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
+For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
+To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).
+
+## Releasing a new version
+
+- Create an API Token on [Pypi](https://pypi.org/).
+- Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/stringertheory/clean-links/settings/secrets/actions/new).
+- Create a [new release](https://github.com/stringertheory/clean-links/releases/new) on Github.
+- Create a new tag in the form `*.*.*`.
+
+For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release).
+
+---
+
+Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).
diff --git a/clean_links/clean.py b/clean_links/clean.py
@@ -0,0 +1,101 @@
+import logging
+import re
+from urllib.parse import parse_qs, urlencode, urlsplit
+
+from clean_links.config import read_config
+from clean_links.unshorten import unshorten_url
+
+clear_urls_rules = read_config()
+
+
+def query_string(url: str, rules: list) -> str:
+    split = urlsplit(url)
+    params = parse_qs(split.query)
+
+    delete_keys = {None, ""}
+    for rule in rules:
+        for key in params:
+            if re.match("^" + rule + "$", key, flags=re.IGNORECASE):
+                delete_keys.add(key)
+
+    for delete_key in delete_keys:
+        params.pop(delete_key, "")  # type: ignore[arg-type]
+
+    params_string = urlencode(params, doseq=True)
+
+    if params_string:
+        return split.path + "?" + params_string
+    else:
+        return split.path
+
+
+def match_provider(provider: str, url: str, rules: dict) -> bool:
+    match_url = re.match(rules["urlPattern"], url)
+    match_exception = None
+    for exception_pattern in rules["exceptions"]:
+        try:
+            match_exception = re.match(exception_pattern, url)
+        except Exception:
+            logging.exception(
+                f"something's wrong with regex {exception_pattern!r} "
+                f"for provider {provider!r}."
+            )
+
+        if match_exception:
+            break
+    return bool(match_url and not match_exception)
+
+
+def clear_url(
+    url: str, keep_query: bool = True, keep_fragment: bool = True
+) -> str:
+    for provider_name, rules in clear_urls_rules["providers"].items():
+        if match_provider(provider_name, url, rules):
+            for rule in rules["rawRules"]:
+                url = re.sub(rule, "", url, flags=re.IGNORECASE)
+
+            split = urlsplit(url)
+            if keep_query:
+                full_path = query_string(url, rules["rules"])
+            else:
+                full_path = split.path
+
+            relative = full_path
+            if keep_fragment:
+                fragment_path = query_string(split.fragment, rules["rules"])
+                if fragment_path:
+                    relative += "#" + fragment_path
+
+            url = f"{split.scheme}://{split.netloc}{relative}"
+
+    return url
+
+
+def main() -> None:
+    url = "https://www.amazon.com/Kobo-Glare-Free-Touchscreen-ComfortLight-Adjustable/dp/B0BCXLQNCC/ref=pd_ci_mcx_mh_mcx_views_0?pd_rd_w=Dx5dF&content-id=amzn1.sym.225b4624-972d-4629-9040-f1bf9923dd95%3Aamzn1.symc.40e6a10e-cbc4-4fa5-81e3-4435ff64d03b&pf_rd_p=225b4624-972d-4629-9040-f1bf9923dd95&pf_rd_r=A7JSDJGYR33BN5GRCV7V&pd_rd_wg=xW6Yf&pd_rd_r=4b8a3532-9e28-4857-a929-5e572d2c765f&pd_rd_i=B0BCXLQNCC"
+
+    url = "https://trib.al/5m7fAg3"
+    # url = "https://tinyurl.com/yc2ft9m5"
+    # url = "https://bit.ly/3C4WXQ9"
+    # url = 'https://tinyurl.com/NewwAlemAndKibrom'
+    # url = "https://hubs.la/Q01HRjhm0"
+    # url = "https://buff.ly/3Omwkwd"
+    # url = "https://bit.ly/48RtRlw"
+    # url = "https://srv.buysellads.com/ads/long/x/TCHU7KSHTTTTTTH6NPRNPTTTTTTFNZMBKWTTTTTTA4RZC7VTTTTTTBZI5HINWLB6G3DIEMS4PABU5AIEQQY6BADG2HUT"
+    # url = "https://buff.ly/2RjYjMt"
+
+    print(url)
+    print()
+    resolved = unshorten_url(url).get("resolved", "")
+    print(resolved)
+    print()
+    clear = clear_url(resolved)  # , keep_query=False, keep_fragment=False)
+    print(clear)
+    # print(url)
+    # original, resolved, status = resolve_url(url, 10)
+    # print(original)
+    # print(resolved)
+
+
+if __name__ == "__main__":
+    main()