Skip to content

Commit

Permalink
omg what a pain
Browse files Browse the repository at this point in the history
  • Loading branch information
stringertheory committed Jan 29, 2024
1 parent 7d8d077 commit 0af498f
Show file tree
Hide file tree
Showing 9 changed files with 2,867 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ check: ## Run code quality tools.
@echo "🚀 Linting code: Running pre-commit"
@poetry run pre-commit run -a
@echo "🚀 Static type checking: Running mypy"
@poetry run mypy
@poetry run mypy --disable-error-code attr-defined

.PHONY: test
test: ## Test the code with pytest
Expand Down
50 changes: 50 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# clean-links

[![Release](https://img.shields.io/github/v/release/stringertheory/clean-links)](https://img.shields.io/github/v/release/stringertheory/clean-links)
[![Build status](https://img.shields.io/github/actions/workflow/status/stringertheory/clean-links/main.yml?branch=main)](https://github.com/stringertheory/clean-links/actions/workflows/main.yml?query=branch%3Amain)
[![codecov](https://codecov.io/gh/stringertheory/clean-links/branch/main/graph/badge.svg)](https://codecov.io/gh/stringertheory/clean-links)
[![Commit activity](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links)](https://img.shields.io/github/commit-activity/m/stringertheory/clean-links)
[![License](https://img.shields.io/github/license/stringertheory/clean-links)](https://img.shields.io/github/license/stringertheory/clean-links)

Tools for cleaning up linkss

- **Github repository**: <https://github.com/stringertheory/clean-links/>
- **Documentation** <https://stringertheory.github.io/clean-links/>

## Getting started with your project

First, create a repository on GitHub with the same name as this project, and then run the following commands:

```bash
git init -b main
git add .
git commit -m "init commit"
git remote add origin [email protected]:stringertheory/clean-links.git
git push -u origin main
```

Finally, install the environment and the pre-commit hooks with

```bash
make install
```

You are now ready to start development on your project!
The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.

To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).

## Releasing a new version

- Create an API Token on [Pypi](https://pypi.org/).
- Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/stringertheory/clean-links/settings/secrets/actions/new).
- Create a [new release](https://github.com/stringertheory/clean-links/releases/new) on Github.
- Create a new tag in the form `*.*.*`.

For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release).

---

Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).
101 changes: 101 additions & 0 deletions clean_links/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import logging
import re
from urllib.parse import parse_qs, urlencode, urlsplit

from clean_links.config import read_config
from clean_links.unshorten import unshorten_url

clear_urls_rules = read_config()


def query_string(url: str, rules: list) -> str:
split = urlsplit(url)
params = parse_qs(split.query)

delete_keys = {None, ""}
for rule in rules:
for key in params:
if re.match("^" + rule + "$", key, flags=re.IGNORECASE):
delete_keys.add(key)

for delete_key in delete_keys:
params.pop(delete_key, "") # type: ignore[arg-type]

params_string = urlencode(params, doseq=True)

if params_string:
return split.path + "?" + params_string
else:
return split.path


def match_provider(provider: str, url: str, rules: dict) -> bool:
match_url = re.match(rules["urlPattern"], url)
match_exception = None
for exception_pattern in rules["exceptions"]:
try:
match_exception = re.match(exception_pattern, url)
except Exception:
logging.exception(
f"something's wrong with regex {exception_pattern!r} "
f"for provider {provider!r}."
)

if match_exception:
break
return bool(match_url and not match_exception)


def clear_url(
url: str, keep_query: bool = True, keep_fragment: bool = True
) -> str:
for provider_name, rules in clear_urls_rules["providers"].items():
if match_provider(provider_name, url, rules):
for rule in rules["rawRules"]:
url = re.sub(rule, "", url, flags=re.IGNORECASE)

split = urlsplit(url)
if keep_query:
full_path = query_string(url, rules["rules"])
else:
full_path = split.path

relative = full_path
if keep_fragment:
fragment_path = query_string(split.fragment, rules["rules"])
if fragment_path:
relative += "#" + fragment_path

url = f"{split.scheme}://{split.netloc}{relative}"

return url


def main() -> None:
url = "https://www.amazon.com/Kobo-Glare-Free-Touchscreen-ComfortLight-Adjustable/dp/B0BCXLQNCC/ref=pd_ci_mcx_mh_mcx_views_0?pd_rd_w=Dx5dF&content-id=amzn1.sym.225b4624-972d-4629-9040-f1bf9923dd95%3Aamzn1.symc.40e6a10e-cbc4-4fa5-81e3-4435ff64d03b&pf_rd_p=225b4624-972d-4629-9040-f1bf9923dd95&pf_rd_r=A7JSDJGYR33BN5GRCV7V&pd_rd_wg=xW6Yf&pd_rd_r=4b8a3532-9e28-4857-a929-5e572d2c765f&pd_rd_i=B0BCXLQNCC"

url = "https://trib.al/5m7fAg3"
# url = "https://tinyurl.com/yc2ft9m5"
# url = "https://bit.ly/3C4WXQ9"
# url = 'https://tinyurl.com/NewwAlemAndKibrom'
# url = "https://hubs.la/Q01HRjhm0"
# url = "https://buff.ly/3Omwkwd"
# url = "https://bit.ly/48RtRlw"
# url = "https://srv.buysellads.com/ads/long/x/TCHU7KSHTTTTTTH6NPRNPTTTTTTFNZMBKWTTTTTTA4RZC7VTTTTTTBZI5HINWLB6G3DIEMS4PABU5AIEQQY6BADG2HUT"
# url = "https://buff.ly/2RjYjMt"

print(url)
print()
resolved = unshorten_url(url).get("resolved", "")
print(resolved)
print()
clear = clear_url(resolved) # , keep_query=False, keep_fragment=False)
print(clear)
# print(url)
# original, resolved, status = resolve_url(url, 10)
# print(original)
# print(resolved)


if __name__ == "__main__":
main()
Loading

0 comments on commit 0af498f

Please sign in to comment.