-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from dayyass/develop
release v0.1.1
- Loading branch information
Showing
20 changed files
with
422 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[run] | ||
branch = True | ||
source = text_clf | ||
|
||
[report] | ||
exclude_lines = | ||
pragma: no cover | ||
if self\.debug | ||
raise AssertionError | ||
raise NotImplementedError | ||
if __name__ == .__main__.: | ||
|
||
omit = | ||
text_clf/__main__.py | ||
|
||
show_missing = True | ||
ignore_errors = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# This workflow will install Python dependencies and run codecov | ||
# https://github.com/codecov/codecov-action#example-workflowyml-with-codecov-action | ||
|
||
name: codecov | ||
|
||
on: | ||
push: | ||
branches: [main, develop] | ||
pull_request: | ||
branches: [main, develop] | ||
|
||
jobs: | ||
build: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
os: [ubuntu-latest] | ||
steps: | ||
- uses: actions/checkout@master | ||
- name: Set up Python | ||
uses: actions/setup-python@master | ||
with: | ||
python-version: 3.7 | ||
- name: Install dependencies | ||
run: | | ||
pip install --upgrade pip | ||
pip install -r requirements.txt | ||
pip install pytest pytest-cov | ||
- name: Generate coverage report | ||
run: | | ||
pytest --cov=./ --cov-report=xml | ||
- name: Upload coverage to Codecov | ||
uses: codecov/codecov-action@v1 | ||
with: | ||
flags: unittests | ||
env_vars: OS,PYTHON | ||
fail_ci_if_error: true | ||
verbose: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# This workflow will install Python dependencies and run linter | ||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | ||
|
||
name: linter | ||
|
||
on: | ||
push: | ||
branches: [main, develop] | ||
pull_request: | ||
branches: [main, develop] | ||
|
||
jobs: | ||
build: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
os: [ubuntu-latest] | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: 3.7 | ||
- name: Install dependencies | ||
run: | | ||
pip install --upgrade pip | ||
pip install isort black flake8 types-PyYAML mypy | ||
- name: Code format check with isort | ||
run: | | ||
isort --check-only --profile black . | ||
- name: Code format check with black | ||
run: | | ||
black --check . | ||
- name: Lint with flake8 | ||
run: | | ||
# stop the build if there are Python syntax errors or undefined names | ||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||
- name: Type check with mypy | ||
run: mypy --ignore-missing-imports . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# This workflow will install Python dependencies and run tests with a variety of Python versions | ||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | ||
|
||
name: tests | ||
|
||
on: | ||
push: | ||
branches: [main, develop] | ||
pull_request: | ||
branches: [main, develop] | ||
|
||
jobs: | ||
build: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
python-version: ['3.6', '3.7', '3.8', '3.9'] | ||
os: [ubuntu-latest, macOS-latest, windows-latest] | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
pip install --upgrade pip | ||
pip install -r requirements.txt | ||
- name: Tests | ||
run: | | ||
python -m unittest discover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM python:3.7-slim-buster | ||
MAINTAINER Dani El-Ayyass <[email protected]> | ||
|
||
WORKDIR /workdir | ||
|
||
COPY config.yaml ./ | ||
COPY data/train.csv data/valid.csv data/ | ||
|
||
RUN pip install --upgrade pip && \ | ||
pip install --no-cache-dir text-classification-baseline | ||
|
||
CMD ["bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
codecov: | ||
require_ci_to_pass: yes | ||
|
||
ignore: | ||
- "text_clf/__main__.py" | ||
|
||
coverage: | ||
status: | ||
project: | ||
default: false | ||
source: | ||
paths: | ||
- "text_clf/" | ||
target: 90% | ||
patch: off |
Empty file.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import os | ||
|
||
import pandas as pd | ||
from sklearn.datasets import fetch_20newsgroups | ||
from sklearn.utils import Bunch | ||
|
||
|
||
def make_df_from_bunch(bunch: Bunch) -> pd.DataFrame: | ||
""" | ||
Make pd.DataFrame from 20newsgroups bunch. | ||
:param Bunch bunch: 20newsgroups bunch. | ||
:return: 20newsgroups DataFrame. | ||
:rtype: pd.DataFrame | ||
""" | ||
|
||
df = pd.DataFrame( | ||
{ | ||
"text": bunch.data, | ||
"target": bunch.target, | ||
} | ||
) | ||
df["target_name"] = df["target"].map(lambda x: bunch.target_names[x]) | ||
df["target_name_short"] = df["target_name"].map(lambda x: x.split(".")[0]) | ||
|
||
return df | ||
|
||
|
||
def load_20newsgroups() -> None: | ||
""" | ||
Load 20newsgroups dataset. | ||
""" | ||
|
||
train_bunch = fetch_20newsgroups(subset="train") | ||
test_bunch = fetch_20newsgroups(subset="test") | ||
|
||
df_train = make_df_from_bunch(train_bunch) | ||
df_valid = make_df_from_bunch(test_bunch) | ||
|
||
os.makedirs("data", exist_ok=True) | ||
|
||
df_train.to_csv("data/train.csv", index=False) | ||
df_valid.to_csv("data/valid.csv", index=False) | ||
|
||
|
||
if __name__ == "__main__": | ||
load_20newsgroups() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
pandas==1.3.1 | ||
PyYAML==5.4.1 | ||
scikit-learn==0.24.2 | ||
pandas>=1.1.5 | ||
PyYAML>=5.4.1 | ||
scikit-learn>=0.24.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[metadata] | ||
name = text-classification-baseline | ||
version = 0.1.0 | ||
version = 0.1.1 | ||
author = Dani El-Ayyass | ||
author_email = [email protected] | ||
description = TF-IDF + LogReg baseline for text classification | ||
|
@@ -16,12 +16,12 @@ classifiers = | |
|
||
[options] | ||
packages = find: | ||
python_requires = >=3.7 | ||
python_requires = >=3.6 | ||
install_requires = | ||
pandas >= 1.3.1 | ||
pandas >= 1.1.5 | ||
PyYAML >= 5.4.1 | ||
scikit-learn >= 0.24.2 | ||
|
||
[options.entry_points] | ||
console_scripts = | ||
text-clf = text_clf.__main__:main | ||
text-clf-train = text_clf.__main__:main |
Empty file.
Oops, something went wrong.