From eba1d3443e81aa6cd1b3ef41048ee612c4df1230 Mon Sep 17 00:00:00 2001 From: woranov Date: Tue, 17 Sep 2024 16:47:35 +0300 Subject: [PATCH] Fix regex matching being used in PartOfSpeech representation model (#2138) --- bertopic/representation/_pos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bertopic/representation/_pos.py b/bertopic/representation/_pos.py index 3ac2815f..11d8e558 100644 --- a/bertopic/representation/_pos.py +++ b/bertopic/representation/_pos.py @@ -120,7 +120,7 @@ def extract_topics( candidate_documents = [] for keyword in keywords: selection = documents.loc[documents.Topic == topic, :] - selection = selection.loc[selection.Document.str.contains(keyword), "Document"] + selection = selection.loc[selection.Document.str.contains(keyword, regex=False), "Document"] if len(selection) > 0: for document in selection[:2]: candidate_documents.append(document)