From e79506d4a9b996f1eec0a55e2cc0770ddca1a49f Mon Sep 17 00:00:00 2001 From: Akshay Karle <1443108+akshaykarle@users.noreply.github.com> Date: Thu, 30 May 2024 16:37:17 +0100 Subject: [PATCH] add a language parameter and move test files in a sample_data dir --- src/app.py | 3 ++- .../csv_analyzer_engine_test.py | 2 +- tests/app_test.py | 20 +++++++++++++++++-- tests/sample_data/invalid.csv | 3 +++ .../sample_data.csv | 0 5 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 tests/sample_data/invalid.csv rename tests/{analyzer_engine => sample_data}/sample_data.csv (100%) diff --git a/src/app.py b/src/app.py index c94c97d..08a91d9 100644 --- a/src/app.py +++ b/src/app.py @@ -37,6 +37,7 @@ def analyze() -> Tuple[str, int]: """Execute the analyzer function.""" try: file = request.files['file'] + language = request.form['language'] if file.filename == '': return jsonify({'error': 'No selected file'}), 400 @@ -46,7 +47,7 @@ def analyze() -> Tuple[str, int]: analyzer_results = self.engine.analyze_csv( csv_full_path=filepath, - language="en" + language=language ) self.logger.debug(f"Analyzed file with results: {analyzer_results}") os.remove(filepath) diff --git a/tests/analyzer_engine/csv_analyzer_engine_test.py b/tests/analyzer_engine/csv_analyzer_engine_test.py index e45ef83..72d5f62 100644 --- a/tests/analyzer_engine/csv_analyzer_engine_test.py +++ b/tests/analyzer_engine/csv_analyzer_engine_test.py @@ -8,7 +8,7 @@ def test_csv_analyzer_engine_anonymizer(): nlp_engine = FlairNLPEngine("flair/ner-english-large") csv_analyzer = CSVAnalyzerEngine(nlp_engine) from presidio_anonymizer import BatchAnonymizerEngine - analyzer_results = csv_analyzer.analyze_csv('./sample_data.csv', language="en") + analyzer_results = csv_analyzer.analyze_csv('../sample_data/sample_data.csv', language="en") anonymizer = BatchAnonymizerEngine() anonymized_results = anonymizer.anonymize_dict(analyzer_results) diff --git a/tests/app_test.py b/tests/app_test.py index 7101b09..55fdd97 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -22,12 +22,28 @@ def test_health(client): response = client.get("/health") assert response.status_code == 200 +def test_analyze_non_existent(client): + response = client.post("/analyze", data={ + "language": "en", + }) + + assert response.status_code == 500 + + +def test_analyze_invalid_csv(client): + response = client.post("/analyze", data={ + "file": open('./tests/sample_data/invalid.csv', 'rb'), + }) + + assert response.status_code == 500 + -def test_analyze_csv_file(client): +def test_analyze_pii_csv(client): expected_response_id = {'value': ['1', '2', '3'], 'recognizer_results': [[], [], []]} response = client.post("/analyze", data={ - "file": open('./tests/analyzer_engine/sample_data.csv', 'rb'), + "file": open('./tests/sample_data/sample_data.csv', 'rb'), + "language": "en", }) assert response.status_code == 200 diff --git a/tests/sample_data/invalid.csv b/tests/sample_data/invalid.csv new file mode 100644 index 0000000..51e7b04 --- /dev/null +++ b/tests/sample_data/invalid.csv @@ -0,0 +1,3 @@ +{ + "hello": "json" +} diff --git a/tests/analyzer_engine/sample_data.csv b/tests/sample_data/sample_data.csv similarity index 100% rename from tests/analyzer_engine/sample_data.csv rename to tests/sample_data/sample_data.csv