diff --git a/tests/unit/vertex_rag/test_rag_data.py b/tests/unit/vertex_rag/test_rag_data.py index 2b789d6513..2648e530ec 100644 --- a/tests/unit/vertex_rag/test_rag_data.py +++ b/tests/unit/vertex_rag/test_rag_data.py @@ -453,6 +453,17 @@ def test_prepare_import_files_request_slack_source(self): ) import_files_request_eq(request, tc.TEST_IMPORT_REQUEST_SLACK_SOURCE) + def test_prepare_import_slack_request_advanced_pdf_parsing_enabled_error(self): + with pytest.raises(ValueError) as e: + rag.import_files( + corpus_name=tc.TEST_RAG_CORPUS_RESOURCE_NAME, + source=tc.TEST_SLACK_SOURCE, + chunk_size=tc.TEST_CHUNK_SIZE, + chunk_overlap=tc.TEST_CHUNK_OVERLAP, + use_advanced_pdf_parsing=True, + ) + e.match("use_advanced_pdf_parsing is not supported for Slack or Jira") + def test_prepare_import_files_request_jira_source(self): request = prepare_import_files_request( corpus_name=tc.TEST_RAG_CORPUS_RESOURCE_NAME, @@ -462,6 +473,17 @@ def test_prepare_import_files_request_jira_source(self): ) import_files_request_eq(request, tc.TEST_IMPORT_REQUEST_JIRA_SOURCE) + def test_prepare_import_jira_request_advanced_pdf_parsing_enabled_error(self): + with pytest.raises(ValueError) as e: + rag.import_files( + corpus_name=tc.TEST_RAG_CORPUS_RESOURCE_NAME, + source=tc.TEST_JIRA_SOURCE, + chunk_size=tc.TEST_CHUNK_SIZE, + chunk_overlap=tc.TEST_CHUNK_OVERLAP, + use_advanced_pdf_parsing=True, + ) + e.match("use_advanced_pdf_parsing is not supported for Slack or Jira") + def test_set_embedding_model_config_set_both_error(self): embedding_model_config = rag.EmbeddingModelConfig( publisher_model="whatever", diff --git a/vertexai/preview/rag/rag_data.py b/vertexai/preview/rag/rag_data.py index 32da89c0a3..fcbb05380c 100644 --- a/vertexai/preview/rag/rag_data.py +++ b/vertexai/preview/rag/rag_data.py @@ -374,6 +374,8 @@ def import_files( raise ValueError("Only one of source or paths must be passed in at a time") if source is None and paths is None: raise ValueError("One of source or paths must be passed in") + if use_advanced_pdf_parsing and source is not None: + raise ValueError("use_advanced_pdf_parsing is not supported for Slack or Jira") corpus_name = _gapic_utils.get_corpus_name(corpus_name) request = _gapic_utils.prepare_import_files_request( corpus_name=corpus_name, @@ -493,6 +495,8 @@ async def import_files_async( raise ValueError("Only one of source or paths must be passed in at a time") if source is None and paths is None: raise ValueError("One of source or paths must be passed in") + if use_advanced_pdf_parsing and source is not None: + raise ValueError("use_advanced_pdf_parsing is not supported for Slack or Jira") corpus_name = _gapic_utils.get_corpus_name(corpus_name) request = _gapic_utils.prepare_import_files_request( corpus_name=corpus_name,