From 39bbfdb8298b5faa32e4bc052080d240f6140bea Mon Sep 17 00:00:00 2001
From: Heinz-Alexander Fuetterer <35225576+afuetterer@users.noreply.github.com>
Date: Mon, 1 Jul 2024 14:03:28 +0000
Subject: [PATCH] chore: remove obsolete flake8 config and update line length
 (#2066)

---
 .flake8                                       |   2 -
 bertopic/_bertopic.py                         | 751 +++++-------------
 bertopic/_save_utils.py                       |  44 +-
 bertopic/_utils.py                            |  29 +-
 bertopic/backend/_flair.py                    |   4 +-
 bertopic/backend/_gensim.py                   |   4 +-
 bertopic/backend/_hftransformers.py           |  14 +-
 bertopic/backend/_multimodal.py               |  23 +-
 bertopic/backend/_openai.py                   |   8 +-
 bertopic/backend/_use.py                      |   5 +-
 bertopic/backend/_utils.py                    |  16 +-
 bertopic/cluster/_utils.py                    |   4 +-
 .../plotting/_approximate_distribution.py     |   4 +-
 bertopic/plotting/_barchart.py                |  26 +-
 bertopic/plotting/_datamap.py                 |  25 +-
 bertopic/plotting/_distribution.py            |  10 +-
 bertopic/plotting/_documents.py               |  33 +-
 bertopic/plotting/_heatmap.py                 |  40 +-
 bertopic/plotting/_hierarchical_documents.py  |  63 +-
 bertopic/plotting/_hierarchy.py               |  77 +-
 bertopic/plotting/_term_rank.py               |  12 +-
 bertopic/plotting/_topics.py                  |  48 +-
 bertopic/plotting/_topics_over_time.py        |  28 +-
 bertopic/plotting/_topics_per_class.py        |  24 +-
 bertopic/representation/__init__.py           |   4 +-
 bertopic/representation/_cohere.py            |   9 +-
 bertopic/representation/_keybert.py           |  40 +-
 bertopic/representation/_langchain.py         |  19 +-
 bertopic/representation/_llamacpp.py          |  18 +-
 bertopic/representation/_mmr.py               |  20 +-
 bertopic/representation/_openai.py            |  19 +-
 bertopic/representation/_pos.py               |  27 +-
 bertopic/representation/_textgeneration.py    |  16 +-
 bertopic/representation/_utils.py             |   4 +-
 bertopic/representation/_visual.py            |  42 +-
 bertopic/representation/_zeroshot.py          |  20 +-
 bertopic/vectorizers/_online_cv.py            |   8 +-
 pyproject.toml                                |   2 +-
 tests/conftest.py                             |  22 +-
 tests/test_bertopic.py                        |  16 +-
 tests/test_plotting/test_approximate.py       |  21 +-
 tests/test_plotting/test_documents.py         |   4 +-
 tests/test_plotting/test_dynamic.py           |   5 +-
 tests/test_plotting/test_term_rank.py         |   4 +-
 tests/test_reduction/test_merge.py            |   8 +-
 .../test_representations.py                   |   4 +-
 tests/test_sub_models/test_cluster.py         |  24 +-
 tests/test_sub_models/test_embeddings.py      |   4 +-
 tests/test_utils.py                           |  36 +-
 tests/test_variations/test_class.py           |   8 +-
 tests/test_variations/test_hierarchy.py       |   8 +-
 tests/test_vectorizers/test_ctfidf.py         |  16 +-
 52 files changed, 425 insertions(+), 1297 deletions(-)
 delete mode 100644 .flake8

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 01f47754..00000000
--- a/.flake8
+++ /dev/null
@@ -1,2 +0,0 @@
-[flake8] 
-max-line-length = 160
diff --git a/bertopic/_bertopic.py b/bertopic/_bertopic.py
index 5682f40e..7ef1efbb 100644
--- a/bertopic/_bertopic.py
+++ b/bertopic/_bertopic.py
@@ -221,8 +221,7 @@ def __init__(
         # Topic-based parameters
         if top_n_words > 100:
             logger.warning(
-                "Note that extracting more than 100 words from a sparse "
-                "can slow down computation quite a bit."
+                "Note that extracting more than 100 words from a sparse can slow down computation quite a bit."
             )
 
         self.top_n_words = top_n_words
@@ -241,9 +240,7 @@ def __init__(
 
         # Vectorizer
         self.n_gram_range = n_gram_range
-        self.vectorizer_model = vectorizer_model or CountVectorizer(
-            ngram_range=self.n_gram_range
-        )
+        self.vectorizer_model = vectorizer_model or CountVectorizer(ngram_range=self.n_gram_range)
         self.ctfidf_model = ctfidf_model or ClassTfidfTransformer()
 
         # Representation model
@@ -364,9 +361,7 @@ def fit(
         topic_model = BERTopic().fit(docs, embeddings)
         ```
         """
-        self.fit_transform(
-            documents=documents, embeddings=embeddings, y=y, images=images
-        )
+        self.fit_transform(documents=documents, embeddings=embeddings, y=y, images=images)
         return self
 
     def fit_transform(
@@ -427,16 +422,12 @@ def fit_transform(
             check_embeddings_shape(embeddings, documents)
 
         doc_ids = range(len(documents)) if documents is not None else range(len(images))
-        documents = pd.DataFrame(
-            {"Document": documents, "ID": doc_ids, "Topic": None, "Image": images}
-        )
+        documents = pd.DataFrame({"Document": documents, "ID": doc_ids, "Topic": None, "Image": images})
 
         # Extract embeddings
         if embeddings is None:
             logger.info("Embedding - Transforming documents to embeddings.")
-            self.embedding_model = select_backend(
-                self.embedding_model, language=self.language, verbose=self.verbose
-            )
+            self.embedding_model = select_backend(self.embedding_model, language=self.language, verbose=self.verbose)
             embeddings = self._extract_embeddings(
                 documents.Document.values.tolist(),
                 images=images,
@@ -446,9 +437,7 @@ def fit_transform(
             logger.info("Embedding - Completed \u2713")
         else:
             if self.embedding_model is not None:
-                self.embedding_model = select_backend(
-                    self.embedding_model, language=self.language
-                )
+                self.embedding_model = select_backend(self.embedding_model, language=self.language)
 
         # Guided Topic Modeling
         if self.seed_topic_list is not None and self.embedding_model is not None:
@@ -459,17 +448,15 @@ def fit_transform(
 
         # Zero-shot Topic Modeling
         if self._is_zeroshot():
-            documents, embeddings, assigned_documents, assigned_embeddings = (
-                self._zeroshot_topic_modeling(documents, embeddings)
+            documents, embeddings, assigned_documents, assigned_embeddings = self._zeroshot_topic_modeling(
+                documents, embeddings
             )
             # Filter UMAP embeddings to only non-assigned embeddings to be used for clustering
             umap_embeddings = self.umap_model.transform(embeddings)
 
         if len(documents) > 0:  # No zero-shot topics matched
             # Cluster reduced embeddings
-            documents, probabilities = self._cluster_embeddings(
-                umap_embeddings, documents, y=y
-            )
+            documents, probabilities = self._cluster_embeddings(umap_embeddings, documents, y=y)
             if self._is_zeroshot() and len(assigned_documents) > 0:
                 documents, embeddings = self._combine_zeroshot_topics(
                     documents, embeddings, assigned_documents, assigned_embeddings
@@ -526,9 +513,7 @@ def fit_transform(
                 ]
 
         # Resulting output
-        self.probabilities_ = self._map_probabilities(
-            probabilities, original_topics=True
-        )
+        self.probabilities_ = self._map_probabilities(probabilities, original_topics=True)
         predictions = documents.Topic.to_list()
 
         return predictions, self.probabilities_
@@ -588,9 +573,7 @@ def transform(
             documents = [documents]
 
         if embeddings is None:
-            embeddings = self._extract_embeddings(
-                documents, images=images, method="document", verbose=self.verbose
-            )
+            embeddings = self._extract_embeddings(documents, images=images, method="document", verbose=self.verbose)
 
         # Check if an embedding model was found
         if embeddings is None:
@@ -602,9 +585,7 @@ def transform(
 
         # Transform without hdbscan_model and umap_model using only cosine similarity
         elif type(self.hdbscan_model) == BaseCluster:
-            logger.info(
-                "Predicting topic assignments through cosine similarity of topic and document embeddings."
-            )
+            logger.info("Predicting topic assignments through cosine similarity of topic and document embeddings.")
             sim_matrix = cosine_similarity(embeddings, np.array(self.topic_embeddings_))
             predictions = np.argmax(sim_matrix, axis=1) - self._outliers
 
@@ -628,12 +609,8 @@ def transform(
 
                 # Calculate probabilities
                 if self.calculate_probabilities:
-                    logger.info(
-                        "Probabilities - Start calculation of probabilities with HDBSCAN"
-                    )
-                    probabilities = hdbscan_delegator(
-                        self.hdbscan_model, "membership_vector", umap_embeddings
-                    )
+                    logger.info("Probabilities - Start calculation of probabilities with HDBSCAN")
+                    probabilities = hdbscan_delegator(self.hdbscan_model, "membership_vector", umap_embeddings)
                     logger.info("Probabilities - Completed \u2713")
             else:
                 predictions = self.hdbscan_model.predict(umap_embeddings)
@@ -712,16 +689,13 @@ def partial_fit(
         check_embeddings_shape(embeddings, documents)
         if not hasattr(self.hdbscan_model, "partial_fit"):
             raise ValueError(
-                "In order to use `.partial_fit`, the cluster model should have "
-                "a `.partial_fit` function."
+                "In order to use `.partial_fit`, the cluster model should have " "a `.partial_fit` function."
             )
 
         # Prepare documents
         if isinstance(documents, str):
             documents = [documents]
-        documents = pd.DataFrame(
-            {"Document": documents, "ID": range(len(documents)), "Topic": None}
-        )
+        documents = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": None})
 
         # Extract embeddings
         if embeddings is None:
@@ -746,9 +720,7 @@ def partial_fit(
         umap_embeddings = self._reduce_dimensionality(embeddings, y, partial_fit=True)
 
         # Cluster reduced embeddings
-        documents, self.probabilities_ = self._cluster_embeddings(
-            umap_embeddings, documents, partial_fit=True
-        )
+        documents, self.probabilities_ = self._cluster_embeddings(umap_embeddings, documents, partial_fit=True)
         topics = documents.Topic.to_list()
 
         # Map and find new topics
@@ -756,10 +728,7 @@ def partial_fit(
             self.topic_mapper_ = TopicMapper(topics)
         mappings = self.topic_mapper_.get_mappings()
         new_topics = set(topics).difference(set(mappings.keys()))
-        new_topic_ids = {
-            topic: max(mappings.values()) + index + 1
-            for index, topic in enumerate(new_topics)
-        }
+        new_topic_ids = {topic: max(mappings.values()) + index + 1 for index, topic in enumerate(new_topics)}
         self.topic_mapper_.add_new_topics(new_topic_ids)
         updated_mappings = self.topic_mapper_.get_mappings()
         updated_topics = [updated_mappings[topic] for topic in topics]
@@ -767,25 +736,19 @@ def partial_fit(
 
         # Add missing topics (topics that were originally created but are now missing)
         if self.topic_representations_:
-            missing_topics = set(self.topic_representations_.keys()).difference(
-                set(updated_topics)
-            )
+            missing_topics = set(self.topic_representations_.keys()).difference(set(updated_topics))
             for missing_topic in missing_topics:
                 documents.loc[len(documents), :] = [" ", len(documents), missing_topic]
         else:
             missing_topics = {}
 
         # Prepare documents
-        documents_per_topic = documents.sort_values("Topic").groupby(
-            ["Topic"], as_index=False
-        )
+        documents_per_topic = documents.sort_values("Topic").groupby(["Topic"], as_index=False)
         updated_topics = documents_per_topic.first().Topic.astype(int)
         documents_per_topic = documents_per_topic.agg({"Document": " ".join})
 
         # Update topic representations
-        self.c_tf_idf_, updated_words = self._c_tf_idf(
-            documents_per_topic, partial_fit=True
-        )
+        self.c_tf_idf_, updated_words = self._c_tf_idf(documents_per_topic, partial_fit=True)
         self.topic_representations_ = self._extract_words_per_topic(
             updated_words, documents, self.c_tf_idf_, calculate_aspects=False
         )
@@ -801,10 +764,7 @@ def partial_fit(
             sizes = documents.groupby(["Topic"], as_index=False).count()
             for _, row in sizes.iterrows():
                 topic = int(row.Topic)
-                if (
-                    self.topic_sizes_.get(topic) is not None
-                    and topic not in missing_topics
-                ):
+                if self.topic_sizes_.get(topic) is not None and topic not in missing_topics:
                     self.topic_sizes_[topic] += int(row.Document)
                 elif self.topic_sizes_.get(topic) is None:
                     self.topic_sizes_[topic] = int(row.Document)
@@ -879,9 +839,7 @@ def topics_over_time(
         check_is_fitted(self)
         check_documents_type(docs)
         selected_topics = topics if topics else self.topics_
-        documents = pd.DataFrame(
-            {"Document": docs, "Topic": selected_topics, "Timestamps": timestamps}
-        )
+        documents = pd.DataFrame({"Document": docs, "Topic": selected_topics, "Timestamps": timestamps})
         global_c_tf_idf = normalize(self.c_tf_idf_, axis=1, norm="l1", copy=False)
 
         all_topics = sorted(list(documents.Topic.unique()))
@@ -930,9 +888,7 @@ def topics_over_time(
                     list(set(previous_topics).intersection(set(current_topics)))  # noqa: F821
                 )
 
-                current_overlap_idx = [
-                    current_topics.index(topic) for topic in overlapping_topics
-                ]
+                current_overlap_idx = [current_topics.index(topic) for topic in overlapping_topics]
                 previous_overlap_idx = [
                     previous_topics.index(topic)  # noqa: F821
                     for topic in overlapping_topics
@@ -940,8 +896,7 @@ def topics_over_time(
 
                 c_tf_idf.tolil()[current_overlap_idx] = (
                     (
-                        c_tf_idf[current_overlap_idx]
-                        + previous_c_tf_idf[previous_overlap_idx]  # noqa: F821
+                        c_tf_idf[current_overlap_idx] + previous_c_tf_idf[previous_overlap_idx]  # noqa: F821
                     )
                     / 2.0
                 ).tolil()
@@ -949,16 +904,11 @@ def topics_over_time(
             # Fine-tune the timestamp c-TF-IDF representation based on the global c-TF-IDF representation
             # by simply taking the average of the two
             if global_tuning:
-                selected_topics = [
-                    all_topics_indices[topic]
-                    for topic in documents_per_topic.Topic.values
-                ]
+                selected_topics = [all_topics_indices[topic] for topic in documents_per_topic.Topic.values]
                 c_tf_idf = (global_c_tf_idf[selected_topics] + c_tf_idf) / 2.0
 
             # Extract the words per topic
-            words_per_topic = self._extract_words_per_topic(
-                words, selection, c_tf_idf, calculate_aspects=False
-            )
+            words_per_topic = self._extract_words_per_topic(words, selection, c_tf_idf, calculate_aspects=False)
             topic_frequency = pd.Series(
                 documents_per_topic.Timestamps.values, index=documents_per_topic.Topic
             ).to_dict()
@@ -979,9 +929,7 @@ def topics_over_time(
                 previous_topics = sorted(list(documents_per_topic.Topic.values))  # noqa: F841
                 previous_c_tf_idf = c_tf_idf.copy()  # noqa: F841
 
-        return pd.DataFrame(
-            topics_over_time, columns=["Topic", "Words", "Frequency", "Timestamp"]
-        )
+        return pd.DataFrame(topics_over_time, columns=["Topic", "Words", "Frequency", "Timestamp"])
 
     def topics_per_class(
         self,
@@ -1023,9 +971,7 @@ def topics_per_class(
         ```
         """
         check_documents_type(docs)
-        documents = pd.DataFrame(
-            {"Document": docs, "Topic": self.topics_, "Class": classes}
-        )
+        documents = pd.DataFrame({"Document": docs, "Topic": self.topics_, "Class": classes})
         global_c_tf_idf = normalize(self.c_tf_idf_, axis=1, norm="l1", copy=False)
 
         # For each unique timestamp, create topic representations
@@ -1042,18 +988,11 @@ def topics_per_class(
             # by simply taking the average of the two
             if global_tuning:
                 c_tf_idf = normalize(c_tf_idf, axis=1, norm="l1", copy=False)
-                c_tf_idf = (
-                    global_c_tf_idf[documents_per_topic.Topic.values + self._outliers]
-                    + c_tf_idf
-                ) / 2.0
+                c_tf_idf = (global_c_tf_idf[documents_per_topic.Topic.values + self._outliers] + c_tf_idf) / 2.0
 
             # Extract the words per topic
-            words_per_topic = self._extract_words_per_topic(
-                words, selection, c_tf_idf, calculate_aspects=False
-            )
-            topic_frequency = pd.Series(
-                documents_per_topic.Class.values, index=documents_per_topic.Topic
-            ).to_dict()
+            words_per_topic = self._extract_words_per_topic(words, selection, c_tf_idf, calculate_aspects=False)
+            topic_frequency = pd.Series(documents_per_topic.Class.values, index=documents_per_topic.Topic).to_dict()
 
             # Fill dataframe with results
             topics_at_class = [
@@ -1067,9 +1006,7 @@ def topics_per_class(
             ]
             topics_per_class.extend(topics_at_class)
 
-        topics_per_class = pd.DataFrame(
-            topics_per_class, columns=["Topic", "Words", "Frequency", "Class"]
-        )
+        topics_per_class = pd.DataFrame(topics_per_class, columns=["Topic", "Words", "Frequency", "Class"])
 
         return topics_per_class
 
@@ -1138,9 +1075,9 @@ def hierarchical_topics(
             linkage_function = lambda x: sch.linkage(x, "ward", optimal_ordering=True)
 
         # Calculate distance
-        embeddings = select_topic_representation(
-            self.c_tf_idf_, self.topic_embeddings_, use_ctfidf
-        )[0][self._outliers :]
+        embeddings = select_topic_representation(self.c_tf_idf_, self.topic_embeddings_, use_ctfidf)[0][
+            self._outliers :
+        ]
         X = distance_function(embeddings)
         X = validate_distance_matrix(X, embeddings.shape[0])
 
@@ -1153,15 +1090,9 @@ def hierarchical_topics(
             Z[:, 2] = get_unique_distances(Z[:, 2])
 
         # Calculate basic bag-of-words to be iteratively merged later
-        documents = pd.DataFrame(
-            {"Document": docs, "ID": range(len(docs)), "Topic": self.topics_}
-        )
-        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
-            {"Document": " ".join}
-        )
-        documents_per_topic = documents_per_topic.loc[
-            documents_per_topic.Topic != -1, :
-        ]
+        documents = pd.DataFrame({"Document": docs, "ID": range(len(docs)), "Topic": self.topics_})
+        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
+        documents_per_topic = documents_per_topic.loc[documents_per_topic.Topic != -1, :]
         clean_documents = self._preprocess_text(documents_per_topic.Document.values)
 
         # Scikit-Learn Deprecation: get_feature_names is deprecated in 1.0
@@ -1187,9 +1118,7 @@ def hierarchical_topics(
         )
         for index in tqdm(range(len(Z))):
             # Find clustered documents
-            clusters = (
-                sch.fcluster(Z, t=Z[index][2], criterion="distance") - self._outliers
-            )
+            clusters = sch.fcluster(Z, t=Z[index][2], criterion="distance") - self._outliers
             nr_clusters = len(clusters)
 
             # Extract first topic we find to get the set of topics in a merged topic
@@ -1200,18 +1129,14 @@ def hierarchical_topics(
                     topic = int(val)
                 else:
                     val = Z[int(val - len(clusters))][0]
-            clustered_topics = [
-                i for i, x in enumerate(clusters) if x == clusters[topic]
-            ]
+            clustered_topics = [i for i, x in enumerate(clusters) if x == clusters[topic]]
 
             # Group bow per cluster, calculate c-TF-IDF and extract words
             grouped = csr_matrix(bow[clustered_topics].sum(axis=0))
             c_tf_idf = self.ctfidf_model.transform(grouped)
             selection = documents.loc[documents.Topic.isin(clustered_topics), :]
             selection.Topic = 0
-            words_per_topic = self._extract_words_per_topic(
-                words, selection, c_tf_idf, calculate_aspects=False
-            )
+            words_per_topic = self._extract_words_per_topic(words, selection, c_tf_idf, calculate_aspects=False)
 
             # Extract parent's name and ID
             parent_id = index + len(clusters)
@@ -1398,9 +1323,7 @@ def approximate_distribution(
                         t = math.ceil(window / stride) - 1
                         for i in range(math.ceil(window / stride) - 1):
                             padded.append(tokenset[: window - ((t - i) * stride)])
-                            padded_ids.append(
-                                list(range(0, window - ((t - i) * stride)))
-                            )
+                            padded_ids.append(list(range(0, window - ((t - i) * stride))))
 
                         token_sets = padded + token_sets
                         token_sets_ids = padded_ids + token_sets_ids
@@ -1413,20 +1336,14 @@ def approximate_distribution(
 
             # Calculate similarity between embeddings of token sets and the topics
             if use_embedding_model:
-                embeddings = self._extract_embeddings(
-                    all_sentences, method="document", verbose=True
-                )
-                similarity = cosine_similarity(
-                    embeddings, self.topic_embeddings_[self._outliers :]
-                )
+                embeddings = self._extract_embeddings(all_sentences, method="document", verbose=True)
+                similarity = cosine_similarity(embeddings, self.topic_embeddings_[self._outliers :])
 
             # Calculate similarity between c-TF-IDF of token sets and the topics
             else:
                 bow_doc = self.vectorizer_model.transform(all_sentences)
                 c_tf_idf_doc = self.ctfidf_model.transform(bow_doc)
-                similarity = cosine_similarity(
-                    c_tf_idf_doc, self.c_tf_idf_[self._outliers :]
-                )
+                similarity = cosine_similarity(c_tf_idf_doc, self.c_tf_idf_[self._outliers :])
 
             # Only keep similarities that exceed the minimum
             similarity[similarity < min_similarity] = 0
@@ -1445,9 +1362,7 @@ def approximate_distribution(
                     # Assign topics to individual tokens
                     token_id = [i for i in range(len(token))]
                     token_val = {index: [] for index in token_id}
-                    for sim, token_set in zip(
-                        similarity[start:end], all_token_sets_ids[start:end]
-                    ):
+                    for sim, token_set in zip(similarity[start:end], all_token_sets_ids[start:end]):
                         for token in token_set:
                             if token in token_val:
                                 token_val[token].append(sim)
@@ -1477,9 +1392,7 @@ def approximate_distribution(
                         end = end + 1
                     group = similarity[start:end].sum(axis=0)
                     topic_distribution.append(group)
-                topic_distribution = normalize(
-                    np.array(topic_distribution), norm="l1", axis=1
-                )
+                topic_distribution = normalize(np.array(topic_distribution), norm="l1", axis=1)
                 topic_token_distribution = None
 
             # Combine results
@@ -1493,9 +1406,7 @@ def approximate_distribution(
 
         return topic_distributions, topic_token_distributions
 
-    def find_topics(
-        self, search_term: str = None, image: str = None, top_n: int = 5
-    ) -> Tuple[List[int], List[float]]:
+    def find_topics(self, search_term: str = None, image: str = None, top_n: int = 5) -> Tuple[List[int], List[float]]:
         """Find topics most similar to a search_term.
 
         Creates an embedding for a search query and compares that with
@@ -1529,25 +1440,19 @@ def find_topics(
         search_term consists of a phrase or multiple words.
         """
         if self.embedding_model is None:
-            raise Exception(
-                "This method can only be used if you did not use custom embeddings."
-            )
+            raise Exception("This method can only be used if you did not use custom embeddings.")
 
         topic_list = list(self.topic_representations_.keys())
         topic_list.sort()
 
         # Extract search_term embeddings and compare with topic embeddings
         if search_term is not None:
-            search_embedding = self._extract_embeddings(
-                [search_term], method="word", verbose=False
-            ).flatten()
+            search_embedding = self._extract_embeddings([search_term], method="word", verbose=False).flatten()
         elif image is not None:
             search_embedding = self._extract_embeddings(
                 [None], images=[image], method="document", verbose=False
             ).flatten()
-        sims = cosine_similarity(
-            search_embedding.reshape(1, -1), self.topic_embeddings_
-        ).flatten()
+        sims = cosine_similarity(search_embedding.reshape(1, -1), self.topic_embeddings_).flatten()
 
         # Extract topics most similar to search_term
         ids = np.argsort(sims)[-top_n:]
@@ -1623,13 +1528,10 @@ def update_topics(
 
         if top_n_words > 100:
             logger.warning(
-                "Note that extracting more than 100 words from a sparse "
-                "can slow down computation quite a bit."
+                "Note that extracting more than 100 words from a sparse " "can slow down computation quite a bit."
             )
         self.top_n_words = top_n_words
-        self.vectorizer_model = vectorizer_model or CountVectorizer(
-            ngram_range=n_gram_range
-        )
+        self.vectorizer_model = vectorizer_model or CountVectorizer(ngram_range=n_gram_range)
         self.ctfidf_model = ctfidf_model or ClassTfidfTransformer()
         self.representation_model = representation_model
 
@@ -1644,12 +1546,8 @@ def update_topics(
                 "c-TF-IDF embeddings instead of centroid embeddings."
             )
 
-        documents = pd.DataFrame(
-            {"Document": docs, "Topic": topics, "ID": range(len(docs)), "Image": images}
-        )
-        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
-            {"Document": " ".join}
-        )
+        documents = pd.DataFrame({"Document": docs, "Topic": topics, "ID": range(len(docs)), "Image": images})
+        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
 
         # Update topic sizes and assignments
         self._update_topic_size(documents)
@@ -1697,9 +1595,7 @@ def get_topics(self, full: bool = False) -> Mapping[str, Tuple[str, float]]:
         else:
             return self.topic_representations_
 
-    def get_topic(
-        self, topic: int, full: bool = False
-    ) -> Union[Mapping[str, Tuple[str, float]], bool]:
+    def get_topic(self, topic: int, full: bool = False) -> Union[Mapping[str, Tuple[str, float]], bool]:
         """Return top n words for a specific topic and their c-TF-IDF scores.
 
         Arguments:
@@ -1719,10 +1615,7 @@ def get_topic(
         if topic in self.topic_representations_:
             if full:
                 representations = {"Main": self.topic_representations_[topic]}
-                aspects = {
-                    aspect: representations[topic]
-                    for aspect, representations in self.topic_aspects_.items()
-                }
+                aspects = {aspect: representations[topic] for aspect, representations in self.topic_aspects_.items()}
                 representations.update(aspects)
                 return representations
             else:
@@ -1746,25 +1639,17 @@ def get_topic_info(self, topic: int = None) -> pd.DataFrame:
         """
         check_is_fitted(self)
 
-        info = pd.DataFrame(
-            self.topic_sizes_.items(), columns=["Topic", "Count"]
-        ).sort_values("Topic")
+        info = pd.DataFrame(self.topic_sizes_.items(), columns=["Topic", "Count"]).sort_values("Topic")
         info["Name"] = info.Topic.map(self.topic_labels_)
 
         # Custom label
         if self.custom_labels_ is not None:
             if len(self.custom_labels_) == len(info):
-                labels = {
-                    topic - self._outliers: label
-                    for topic, label in enumerate(self.custom_labels_)
-                }
+                labels = {topic - self._outliers: label for topic, label in enumerate(self.custom_labels_)}
                 info["CustomName"] = info["Topic"].map(labels)
 
         # Main Keywords
-        values = {
-            topic: list(list(zip(*values))[0])
-            for topic, values in self.topic_representations_.items()
-        }
+        values = {topic: list(list(zip(*values))[0]) for topic, values in self.topic_representations_.items()}
         info["Representation"] = info["Topic"].map(values)
 
         # Extract all topic aspects
@@ -1774,24 +1659,16 @@ def get_topic_info(self, topic: int = None) -> pd.DataFrame:
                     if isinstance(list(values.values())[-1][0], tuple) or isinstance(
                         list(values.values())[-1][0], list
                     ):
-                        values = {
-                            topic: list(list(zip(*value))[0])
-                            for topic, value in values.items()
-                        }
+                        values = {topic: list(list(zip(*value))[0]) for topic, value in values.items()}
                     elif isinstance(list(values.values())[-1][0], str):
-                        values = {
-                            topic: " ".join(value).strip()
-                            for topic, value in values.items()
-                        }
+                        values = {topic: " ".join(value).strip() for topic, value in values.items()}
                 info[aspect] = info["Topic"].map(values)
 
         # Representative Docs / Images
         if self.representative_docs_ is not None:
             info["Representative_Docs"] = info["Topic"].map(self.representative_docs_)
         if self.representative_images_ is not None:
-            info["Representative_Images"] = info["Topic"].map(
-                self.representative_images_
-            )
+            info["Representative_Images"] = info["Topic"].map(self.representative_images_)
 
         # Select specific topic to return
         if topic is not None:
@@ -1826,9 +1703,9 @@ def get_topic_freq(self, topic: int = None) -> Union[pd.DataFrame, int]:
         if isinstance(topic, int):
             return self.topic_sizes_[topic]
         else:
-            return pd.DataFrame(
-                self.topic_sizes_.items(), columns=["Topic", "Count"]
-            ).sort_values("Count", ascending=False)
+            return pd.DataFrame(self.topic_sizes_.items(), columns=["Topic", "Count"]).sort_values(
+                "Count", ascending=False
+            )
 
     def get_document_info(
         self,
@@ -1899,10 +1776,7 @@ def get_document_info(
         document_info = pd.merge(document_info, topic_info, on="Topic", how="left")
 
         # Add top n words
-        top_n_words = {
-            topic: " - ".join(list(zip(*self.get_topic(topic)))[0])
-            for topic in set(self.topics_)
-        }
+        top_n_words = {topic: " - ".join(list(zip(*self.get_topic(topic)))[0]) for topic in set(self.topics_)}
         document_info["Top_n_words"] = document_info.Topic.map(top_n_words)
 
         # Add flat probabilities
@@ -1916,15 +1790,9 @@ def get_document_info(
                 ]
 
         # Add representative document labels
-        repr_docs = [
-            repr_doc
-            for repr_docs in self.representative_docs_.values()
-            for repr_doc in repr_docs
-        ]
+        repr_docs = [repr_doc for repr_docs in self.representative_docs_.values() for repr_doc in repr_docs]
         document_info["Representative_document"] = False
-        document_info.loc[
-            document_info.Document.isin(repr_docs), "Representative_document"
-        ] = True
+        document_info.loc[document_info.Document.isin(repr_docs), "Representative_document"] = True
 
         # Add custom meta data provided by the user
         if metadata is not None:
@@ -2028,12 +1896,8 @@ def get_topic_tree(
         max_original_topic = hier_topics.Parent_ID.astype(int).min() - 1
 
         # Extract mapping from ID to name
-        topic_to_name = dict(
-            zip(hier_topics.Child_Left_ID, hier_topics.Child_Left_Name)
-        )
-        topic_to_name.update(
-            dict(zip(hier_topics.Child_Right_ID, hier_topics.Child_Right_Name))
-        )
+        topic_to_name = dict(zip(hier_topics.Child_Left_ID, hier_topics.Child_Left_Name))
+        topic_to_name.update(dict(zip(hier_topics.Child_Right_ID, hier_topics.Child_Right_Name)))
         topic_to_name = {topic: name[:100] for topic, name in topic_to_name.items()}
 
         # Create tree
@@ -2051,8 +1915,7 @@ def get_tree(start, tree):
             def _tree(to_print, start, parent, tree, grandpa=None, indent=""):
                 # Get distance between merged topics
                 distance = hier_topics.loc[
-                    (hier_topics.Child_Left_ID == parent)
-                    | (hier_topics.Child_Right_ID == parent),
+                    (hier_topics.Child_Left_ID == parent) | (hier_topics.Child_Right_ID == parent),
                     "Distance",
                 ]
                 distance = distance.values[0] if len(distance) > 0 else 10
@@ -2064,12 +1927,7 @@ def _tree(to_print, start, parent, tree, grandpa=None, indent=""):
                         if int(parent) <= max_original_topic:
                             # Do not append topic ID if they are not merged
                             if distance < max_distance:
-                                to_print += (
-                                    "■──"
-                                    + topic_to_name[parent]
-                                    + f" ── Topic: {parent}"
-                                    + "\n"
-                                )
+                                to_print += "■──" + topic_to_name[parent] + f" ── Topic: {parent}" + "\n"
                             else:
                                 to_print += "O \n"
                         else:
@@ -2080,15 +1938,11 @@ def _tree(to_print, start, parent, tree, grandpa=None, indent=""):
 
                 for child in tree[parent][:-1]:
                     to_print += indent + "├" + "─"
-                    to_print = _tree(
-                        to_print, start, child, tree, parent, indent + "│" + " " * width
-                    )
+                    to_print = _tree(to_print, start, child, tree, parent, indent + "│" + " " * width)
 
                 child = tree[parent][-1]
                 to_print += indent + "└" + "─"
-                to_print = _tree(
-                    to_print, start, child, tree, parent, indent + " " * (width + 1)
-                )
+                to_print = _tree(to_print, start, child, tree, parent, indent + " " * (width + 1))
 
                 return to_print
 
@@ -2099,9 +1953,7 @@ def _tree(to_print, start, parent, tree, grandpa=None, indent=""):
         start = str(hier_topics.Parent_ID.astype(int).max())
         return get_tree(start, tree)
 
-    def set_topic_labels(
-        self, topic_labels: Union[List[str], Mapping[int, str]]
-    ) -> None:
+    def set_topic_labels(self, topic_labels: Union[List[str], Mapping[int, str]]) -> None:
         """Set custom topic labels in your fitted BERTopic model.
 
         Arguments:
@@ -2145,17 +1997,12 @@ def set_topic_labels(
 
         if isinstance(topic_labels, dict):
             if self.custom_labels_ is not None:
-                original_labels = {
-                    topic: label
-                    for topic, label in zip(unique_topics, self.custom_labels_)
-                }
+                original_labels = {topic: label for topic, label in zip(unique_topics, self.custom_labels_)}
             else:
                 info = self.get_topic_info()
                 original_labels = dict(zip(info.Topic, info.Name))
             custom_labels = [
-                topic_labels.get(topic)
-                if topic_labels.get(topic)
-                else original_labels[topic]
+                topic_labels.get(topic) if topic_labels.get(topic) else original_labels[topic]
                 for topic in unique_topics
             ]
 
@@ -2164,8 +2011,7 @@ def set_topic_labels(
                 custom_labels = topic_labels
             else:
                 raise ValueError(
-                    "Make sure that `topic_labels` contains the same number "
-                    "of labels as there are topics."
+                    "Make sure that `topic_labels` contains the same number " "of labels as there are topics."
                 )
 
         self.custom_labels_ = custom_labels
@@ -2283,8 +2129,7 @@ def merge_topics(
                     mapping[topic] = topic_group[0]
         else:
             raise ValueError(
-                "Make sure that `topics_to_merge` is either"
-                "a list of topics or a list of list of topics."
+                "Make sure that `topics_to_merge` is either" "a list of topics or a list of list of topics."
             )
 
         # Track mappings and sizes of topics for merging topic embeddings
@@ -2472,9 +2317,7 @@ def reduce_outliers(
 
         # Check correct use of parameters
         if strategy.lower() == "probabilities" and probabilities is None:
-            raise ValueError(
-                "Make sure to pass in `probabilities` in order to use the probabilities strategy"
-            )
+            raise ValueError("Make sure to pass in `probabilities` in order to use the probabilities strategy")
 
         # Reduce outliers by extracting most likely topics through the topic-term probability matrix
         if strategy.lower() == "probabilities":
@@ -2490,12 +2333,8 @@ def reduce_outliers(
             topic_distr, _ = self.approximate_distribution(
                 outlier_docs, min_similarity=threshold, **distributions_params
             )
-            outlier_topics = iter(
-                [np.argmax(prob) if sum(prob) > 0 else -1 for prob in topic_distr]
-            )
-            new_topics = [
-                topic if topic != -1 else next(outlier_topics) for topic in topics
-            ]
+            outlier_topics = iter([np.argmax(prob) if sum(prob) > 0 else -1 for prob in topic_distr])
+            new_topics = [topic if topic != -1 else next(outlier_topics) for topic in topics]
 
         # Reduce outliers by finding the most similar c-TF-IDF representations
         elif strategy.lower() == "c-tf-idf":
@@ -2505,18 +2344,12 @@ def reduce_outliers(
             # Calculate c-TF-IDF of outlier documents with all topics
             bow_doc = self.vectorizer_model.transform(outlier_docs)
             c_tf_idf_doc = self.ctfidf_model.transform(bow_doc)
-            similarity = cosine_similarity(
-                c_tf_idf_doc, self.c_tf_idf_[self._outliers :]
-            )
+            similarity = cosine_similarity(c_tf_idf_doc, self.c_tf_idf_[self._outliers :])
 
             # Update topics
             similarity[similarity < threshold] = 0
-            outlier_topics = iter(
-                [np.argmax(sim) if sum(sim) > 0 else -1 for sim in similarity]
-            )
-            new_topics = [
-                topic if topic != -1 else next(outlier_topics) for topic in topics
-            ]
+            outlier_topics = iter([np.argmax(sim) if sum(sim) > 0 else -1 for sim in similarity])
+            new_topics = [topic if topic != -1 else next(outlier_topics) for topic in topics]
 
         # Reduce outliers by finding the most similar topic embeddings
         elif strategy.lower() == "embeddings":
@@ -2533,28 +2366,18 @@ def reduce_outliers(
 
             # Extract or calculate embeddings for outlier documents
             if embeddings is not None:
-                outlier_embeddings = np.array(
-                    [embeddings[index] for index in outlier_ids]
-                )
+                outlier_embeddings = np.array([embeddings[index] for index in outlier_ids])
             elif images is not None:
                 outlier_images = [images[index] for index in outlier_ids]
-                outlier_embeddings = self.embedding_model.embed_images(
-                    outlier_images, verbose=self.verbose
-                )
+                outlier_embeddings = self.embedding_model.embed_images(outlier_images, verbose=self.verbose)
             else:
                 outlier_embeddings = self.embedding_model.embed_documents(outlier_docs)
-            similarity = cosine_similarity(
-                outlier_embeddings, self.topic_embeddings_[self._outliers :]
-            )
+            similarity = cosine_similarity(outlier_embeddings, self.topic_embeddings_[self._outliers :])
 
             # Update topics
             similarity[similarity < threshold] = 0
-            outlier_topics = iter(
-                [np.argmax(sim) if sum(sim) > 0 else -1 for sim in similarity]
-            )
-            new_topics = [
-                topic if topic != -1 else next(outlier_topics) for topic in topics
-            ]
+            outlier_topics = iter([np.argmax(sim) if sum(sim) > 0 else -1 for sim in similarity])
+            new_topics = [topic if topic != -1 else next(outlier_topics) for topic in topics]
 
         return new_topics
 
@@ -3507,9 +3330,7 @@ def save(
                 )
 
             # Minimal
-            save_utils.save_hf(
-                model=self, save_directory=save_directory, serialization=serialization
-            )
+            save_utils.save_hf(model=self, save_directory=save_directory, serialization=serialization)
             save_utils.save_topics(model=self, path=save_directory / "topics.json")
             save_utils.save_images(model=self, path=save_directory / "images")
             save_utils.save_config(
@@ -3525,9 +3346,7 @@ def save(
                     save_directory=save_directory,
                     serialization=serialization,
                 )
-                save_utils.save_ctfidf_config(
-                    model=self, path=save_directory / "ctfidf_config.json"
-                )
+                save_utils.save_ctfidf_config(model=self, path=save_directory / "ctfidf_config.json")
 
     @classmethod
     def load(cls, path: str, embedding_model=None):
@@ -3557,22 +3376,16 @@ def load(cls, path: str, embedding_model=None):
             with open(file_or_dir, "rb") as file:
                 if embedding_model:
                     topic_model = joblib.load(file)
-                    topic_model.embedding_model = select_backend(
-                        embedding_model, verbose=topic_model.verbose
-                    )
+                    topic_model.embedding_model = select_backend(embedding_model, verbose=topic_model.verbose)
                 else:
                     topic_model = joblib.load(file)
                 return topic_model
 
         # Load from directory or HF
         if file_or_dir.is_dir():
-            topics, params, tensors, ctfidf_tensors, ctfidf_config, images = (
-                save_utils.load_local_files(file_or_dir)
-            )
+            topics, params, tensors, ctfidf_tensors, ctfidf_config, images = save_utils.load_local_files(file_or_dir)
         elif "/" in str(path):
-            topics, params, tensors, ctfidf_tensors, ctfidf_config, images = (
-                save_utils.load_files_from_hf(path)
-            )
+            topics, params, tensors, ctfidf_tensors, ctfidf_config, images = save_utils.load_files_from_hf(path)
         else:
             raise ValueError("Make sure to either pass a valid directory or HF model.")
         topic_model = _create_model_from_files(
@@ -3587,9 +3400,7 @@ def load(cls, path: str, embedding_model=None):
 
         # Replace embedding model if one is specifically chosen
         if embedding_model is not None:
-            topic_model.embedding_model = select_backend(
-                embedding_model, verbose=topic_model.verbose
-            )
+            topic_model.embedding_model = select_backend(embedding_model, verbose=topic_model.verbose)
 
         return topic_model
 
@@ -3645,9 +3456,7 @@ def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None)
             all_topics, all_params, all_tensors = [], [], []
             for index, model in enumerate(models):
                 model.save(tmpdir, serialization="pytorch")
-                topics, params, tensors, _, _, _ = save_utils.load_local_files(
-                    Path(tmpdir)
-                )
+                topics, params, tensors, _, _, _ = save_utils.load_local_files(Path(tmpdir))
                 all_topics.append(topics)
                 all_params.append(params)
                 all_tensors.append(np.array(tensors["topic_embeddings"]))
@@ -3666,11 +3475,7 @@ def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None)
 
             # Extract new topics
             new_topics = sorted(
-                [
-                    index - selected_topics["_outliers"]
-                    for index, sim in enumerate(sims)
-                    if sim < min_similarity
-                ]
+                [index - selected_topics["_outliers"] for index, sim in enumerate(sims) if sim < min_similarity]
             )
             max_topic = max(set(merged_topics["topics"]))
 
@@ -3680,12 +3485,10 @@ def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None)
                 if new_topic != -1:
                     max_topic += 1
                     new_topics_dict[new_topic] = max_topic
-                    merged_topics["topic_representations"][str(max_topic)] = (
-                        selected_topics["topic_representations"][str(new_topic)]
-                    )
-                    merged_topics["topic_labels"][str(max_topic)] = selected_topics[
-                        "topic_labels"
-                    ][str(new_topic)]
+                    merged_topics["topic_representations"][str(max_topic)] = selected_topics["topic_representations"][
+                        str(new_topic)
+                    ]
+                    merged_topics["topic_labels"][str(max_topic)] = selected_topics["topic_labels"][str(new_topic)]
 
                     # Add new aspects
                     if selected_topics["topic_aspects"]:
@@ -3698,27 +3501,19 @@ def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None)
 
                         # If the original model does not have topic aspects but the to be added model does
                         if not merged_topics.get("topic_aspects"):
-                            merged_topics["topic_aspects"] = selected_topics[
-                                "topic_aspects"
-                            ]
+                            merged_topics["topic_aspects"] = selected_topics["topic_aspects"]
 
                         # If they both contain topic aspects, add to the existing set of aspects
                         else:
-                            for aspect, values in selected_topics[
-                                "topic_aspects"
-                            ].items():
-                                merged_topics["topic_aspects"][aspect][
-                                    str(max_topic)
-                                ] = values[str(new_topic)]
+                            for aspect, values in selected_topics["topic_aspects"].items():
+                                merged_topics["topic_aspects"][aspect][str(max_topic)] = values[str(new_topic)]
 
                     # Add new embeddings
                     new_tensors = tensors[new_topic + selected_topics["_outliers"]]
                     merged_tensors = np.vstack([merged_tensors, new_tensors])
 
             # Topic Mapper
-            merged_topics["topic_mapper"] = TopicMapper(
-                list(range(-1, max_topic + 1, 1))
-            ).mappings_
+            merged_topics["topic_mapper"] = TopicMapper(list(range(-1, max_topic + 1, 1))).mappings_
 
             # Find similar topics and re-assign those from the new models
             sims_idx = np.argmax(sim_matrix, axis=1)
@@ -3749,13 +3544,8 @@ def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None)
 
         # Replace embedding model if one is specifically chosen
         verbose = any([model.verbose for model in models])
-        if (
-            embedding_model is not None
-            and type(merged_model.embedding_model) == BaseEmbedder
-        ):
-            merged_model.embedding_model = select_backend(
-                embedding_model, verbose=verbose
-            )
+        if embedding_model is not None and type(merged_model.embedding_model) == BaseEmbedder:
+            merged_model.embedding_model = select_backend(embedding_model, verbose=verbose)
         return merged_model
 
     def push_to_hf_hub(
@@ -3874,17 +3664,11 @@ def _extract_embeddings(
             documents = [documents]
 
         if images is not None and hasattr(self.embedding_model, "embed_images"):
-            embeddings = self.embedding_model.embed(
-                documents=documents, images=images, verbose=verbose
-            )
+            embeddings = self.embedding_model.embed(documents=documents, images=images, verbose=verbose)
         elif method == "word":
-            embeddings = self.embedding_model.embed_words(
-                words=documents, verbose=verbose
-            )
+            embeddings = self.embedding_model.embed_words(words=documents, verbose=verbose)
         elif method == "document":
-            embeddings = self.embedding_model.embed_documents(
-                documents, verbose=verbose
-            )
+            embeddings = self.embedding_model.embed_documents(documents, verbose=verbose)
         elif documents[0] is None and images is None:
             raise ValueError(
                 "Make sure to use an embedding model that can either embed documents"
@@ -3897,9 +3681,7 @@ def _extract_embeddings(
             )
         return embeddings
 
-    def _images_to_text(
-        self, documents: pd.DataFrame, embeddings: np.ndarray
-    ) -> pd.DataFrame:
+    def _images_to_text(self, documents: pd.DataFrame, embeddings: np.ndarray) -> pd.DataFrame:
         """Convert images to text."""
         logger.info("Images - Converting images to text. This might take a while.")
         if isinstance(self.representation_model, dict):
@@ -3912,19 +3694,14 @@ def _images_to_text(
                     documents = tuner.image_to_text(documents, embeddings)
         elif isinstance(self.representation_model, BaseRepresentation):
             if getattr(self.representation_model, "image_to_text_model", False):
-                documents = self.representation_model.image_to_text(
-                    documents, embeddings
-                )
+                documents = self.representation_model.image_to_text(documents, embeddings)
         logger.info("Images - Completed \u2713")
         return documents
 
     def _map_predictions(self, predictions: List[int]) -> List[int]:
         """Map predictions to the correct topics if topics were reduced."""
         mappings = self.topic_mapper_.get_mappings(original_topics=True)
-        mapped_predictions = [
-            mappings[prediction] if prediction in mappings else -1
-            for prediction in predictions
-        ]
+        mapped_predictions = [mappings[prediction] if prediction in mappings else -1 for prediction in predictions]
         return mapped_predictions
 
     def _reduce_dimensionality(
@@ -4008,12 +3785,8 @@ def _cluster_embeddings(
         if hasattr(self.hdbscan_model, "probabilities_"):
             probabilities = self.hdbscan_model.probabilities_
 
-            if self.calculate_probabilities and is_supported_hdbscan(
-                self.hdbscan_model
-            ):
-                probabilities = hdbscan_delegator(
-                    self.hdbscan_model, "all_points_membership_vectors"
-                )
+            if self.calculate_probabilities and is_supported_hdbscan(self.hdbscan_model):
+                probabilities = hdbscan_delegator(self.hdbscan_model, "all_points_membership_vectors")
 
         if not partial_fit:
             self.topic_mapper_ = TopicMapper(self.topics_)
@@ -4037,23 +3810,15 @@ def _zeroshot_topic_modeling(
             documents: The leftover documents that were not assigned to any topic
             embeddings: The leftover embeddings that were not assigned to any topic
         """
-        logger.info(
-            "Zeroshot Step 1 - Finding documents that could be assigned to either one of the zero-shot topics"
-        )
+        logger.info("Zeroshot Step 1 - Finding documents that could be assigned to either one of the zero-shot topics")
         # Similarity between document and zero-shot topic embeddings
         zeroshot_embeddings = self._extract_embeddings(self.zeroshot_topic_list)
         cosine_similarities = cosine_similarity(embeddings, zeroshot_embeddings)
         assignment = np.argmax(cosine_similarities, 1)
         assignment_vals = np.max(cosine_similarities, 1)
-        assigned_ids = [
-            index
-            for index, value in enumerate(assignment_vals)
-            if value >= self.zeroshot_min_similarity
-        ]
+        assigned_ids = [index for index, value in enumerate(assignment_vals) if value >= self.zeroshot_min_similarity]
         non_assigned_ids = [
-            index
-            for index, value in enumerate(assignment_vals)
-            if value < self.zeroshot_min_similarity
+            index for index, value in enumerate(assignment_vals) if value < self.zeroshot_min_similarity
         ]
 
         # Assign topics
@@ -4117,32 +3882,22 @@ def _combine_zeroshot_topics(
             documents: DataFrame with all the original documents with their topic assignments
             embeddings: np.ndarray of embeddings aligned with the documents
         """
-        logger.info(
-            "Zeroshot Step 2 - Combining topics from zero-shot topic modeling with topics from clustering..."
-        )
+        logger.info("Zeroshot Step 2 - Combining topics from zero-shot topic modeling with topics from clustering...")
         # Combine Zero-shot topics with topics from clustering
         zeroshot_topic_idx_to_topic_id = {
             zeroshot_topic_id: new_topic_id
-            for new_topic_id, zeroshot_topic_id in enumerate(
-                set(assigned_documents.Topic)
-            )
+            for new_topic_id, zeroshot_topic_id in enumerate(set(assigned_documents.Topic))
         }
         self._topic_id_to_zeroshot_topic_idx = {
             new_topic_id: zeroshot_topic_id
-            for new_topic_id, zeroshot_topic_id in enumerate(
-                set(assigned_documents.Topic)
-            )
+            for new_topic_id, zeroshot_topic_id in enumerate(set(assigned_documents.Topic))
         }
-        assigned_documents.Topic = assigned_documents.Topic.map(
-            zeroshot_topic_idx_to_topic_id
-        )
+        assigned_documents.Topic = assigned_documents.Topic.map(zeroshot_topic_idx_to_topic_id)
         num_zeroshot_topics = len(zeroshot_topic_idx_to_topic_id)
 
         # Insert zeroshot topics between outlier cluster and other clusters
         documents.Topic = documents.Topic.apply(
-            lambda topic_id: topic_id + num_zeroshot_topics
-            if topic_id != -1
-            else topic_id
+            lambda topic_id: topic_id + num_zeroshot_topics if topic_id != -1 else topic_id
         )
 
         # Combine the clustered documents/embeddings with assigned documents/embeddings in the original order
@@ -4159,9 +3914,7 @@ def _combine_zeroshot_topics(
         logger.info("Zeroshot Step 2 - Completed \u2713")
         return documents, embeddings
 
-    def _guided_topic_modeling(
-        self, embeddings: np.ndarray
-    ) -> Tuple[List[int], np.array]:
+    def _guided_topic_modeling(self, embeddings: np.ndarray) -> Tuple[List[int], np.array]:
         """Apply Guided Topic Modeling.
 
         We transform the seeded topics to embeddings using the
@@ -4185,12 +3938,8 @@ def _guided_topic_modeling(
         logger.info("Guided - Find embeddings highly related to seeded topics.")
         # Create embeddings from the seeded topics
         seed_topic_list = [" ".join(seed_topic) for seed_topic in self.seed_topic_list]
-        seed_topic_embeddings = self._extract_embeddings(
-            seed_topic_list, verbose=self.verbose
-        )
-        seed_topic_embeddings = np.vstack(
-            [seed_topic_embeddings, embeddings.mean(axis=0)]
-        )
+        seed_topic_embeddings = self._extract_embeddings(seed_topic_list, verbose=self.verbose)
+        seed_topic_embeddings = np.vstack([seed_topic_embeddings, embeddings.mean(axis=0)])
 
         # Label documents that are most similar to one of the seeded topics
         sim_matrix = cosine_similarity(embeddings, seed_topic_embeddings)
@@ -4201,9 +3950,7 @@ def _guided_topic_modeling(
         # embedding of the seeded topic to force the documents in a cluster
         for seed_topic in range(len(seed_topic_list)):
             indices = [index for index, topic in enumerate(y) if topic == seed_topic]
-            embeddings[indices] = np.average(
-                [embeddings[indices], seed_topic_embeddings[seed_topic]], weights=[3, 1]
-            )
+            embeddings[indices] = np.average([embeddings[indices], seed_topic_embeddings[seed_topic]], weights=[3, 1])
         logger.info("Guided - Completed \u2713")
         return y, embeddings
 
@@ -4226,17 +3973,11 @@ def _extract_topics(
             c_tf_idf: The resulting matrix giving a value (importance score) for each word per topic
         """
         if verbose:
-            logger.info(
-                "Representation - Extracting topics from clusters using representation models."
-            )
-        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
-            {"Document": " ".join}
-        )
+            logger.info("Representation - Extracting topics from clusters using representation models.")
+        documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
         self.c_tf_idf_, words = self._c_tf_idf(documents_per_topic)
         self.topic_representations_ = self._extract_words_per_topic(words, documents)
-        self._create_topic_vectors(
-            documents=documents, embeddings=embeddings, mappings=mappings
-        )
+        self._create_topic_vectors(documents=documents, embeddings=embeddings, mappings=mappings)
         if verbose:
             logger.info("Representation - Completed \u2713")
 
@@ -4310,11 +4051,7 @@ def _extract_representative_docs(
             selected_docs_ids = selection.index.tolist()
 
             # Calculate similarity
-            nr_docs = (
-                nr_repr_docs
-                if len(selected_docs) > nr_repr_docs
-                else len(selected_docs)
-            )
+            nr_docs = nr_repr_docs if len(selected_docs) > nr_repr_docs else len(selected_docs)
             bow = self.vectorizer_model.transform(selected_docs)
             ctfidf = self.ctfidf_model.transform(bow)
             sim_matrix = cosine_similarity(ctfidf, c_tf_idf[index])
@@ -4331,28 +4068,14 @@ def _extract_representative_docs(
 
             # Extract top n most representative documents
             else:
-                indices = np.argpartition(sim_matrix.reshape(1, -1)[0], -nr_docs)[
-                    -nr_docs:
-                ]
+                indices = np.argpartition(sim_matrix.reshape(1, -1)[0], -nr_docs)[-nr_docs:]
                 docs = [selected_docs[index] for index in indices]
 
-            doc_ids = [
-                selected_docs_ids[index]
-                for index, doc in enumerate(selected_docs)
-                if doc in docs
-            ]
+            doc_ids = [selected_docs_ids[index] for index, doc in enumerate(selected_docs) if doc in docs]
             repr_docs_ids.append(doc_ids)
             repr_docs.extend(docs)
-            repr_docs_indices.append(
-                [
-                    repr_docs_indices[-1][-1] + i + 1 if index != 0 else i
-                    for i in range(nr_docs)
-                ]
-            )
-        repr_docs_mappings = {
-            topic: repr_docs[i[0] : i[-1] + 1]
-            for topic, i in zip(topics.keys(), repr_docs_indices)
-        }
+            repr_docs_indices.append([repr_docs_indices[-1][-1] + i + 1 if index != 0 else i for i in range(nr_docs)])
+        repr_docs_mappings = {topic: repr_docs[i[0] : i[-1] + 1] for topic, i in zip(topics.keys(), repr_docs_indices)}
 
         return repr_docs_mappings, repr_docs, repr_docs_indices, repr_docs_ids
 
@@ -4393,30 +4116,22 @@ def _create_topic_vectors(
                 topic_ids = topics_from["topics_from"]
                 topic_sizes = topics_from["topic_sizes"]
                 if topic_ids:
-                    embds = np.array(self.topic_embeddings_)[
-                        np.array(topic_ids) + self._outliers
-                    ]
+                    embds = np.array(self.topic_embeddings_)[np.array(topic_ids) + self._outliers]
                     topic_embedding = np.average(embds, axis=0, weights=topic_sizes)
                     topic_embeddings_dict[topic_to] = topic_embedding
 
             # Re-order topic embeddings
             topics_to_map = {
-                topic_mapping[0]: topic_mapping[1]
-                for topic_mapping in np.array(self.topic_mapper_.mappings_)[:, -2:]
+                topic_mapping[0]: topic_mapping[1] for topic_mapping in np.array(self.topic_mapper_.mappings_)[:, -2:]
             }
             topic_embeddings = {}
             for topic, embds in topic_embeddings_dict.items():
                 topic_embeddings[topics_to_map[topic]] = embds
             unique_topics = sorted(list(topic_embeddings.keys()))
-            self.topic_embeddings_ = np.array(
-                [topic_embeddings[topic] for topic in unique_topics]
-            )
+            self.topic_embeddings_ = np.array([topic_embeddings[topic] for topic in unique_topics])
 
         # Topic embeddings based on keyword representations
-        elif (
-            self.embedding_model is not None
-            and type(self.embedding_model) is not BaseEmbedder
-        ):
+        elif self.embedding_model is not None and type(self.embedding_model) is not BaseEmbedder:
             topic_list = list(self.topic_representations_.keys())
             topic_list.sort()
 
@@ -4428,9 +4143,7 @@ def _create_topic_vectors(
             # Extract embeddings for all words in all topics
             topic_words = [self.get_topic(topic) for topic in topic_list]
             topic_words = [word[0] for topic in topic_words for word in topic]
-            word_embeddings = self._extract_embeddings(
-                topic_words, method="word", verbose=False
-            )
+            word_embeddings = self._extract_embeddings(topic_words, method="word", verbose=False)
 
             # Take the weighted average of word embeddings in a topic based on their c-TF-IDF value
             # The embeddings var is a single numpy matrix and therefore slicing is necessary to
@@ -4488,33 +4201,16 @@ def _c_tf_idf(
         if self.ctfidf_model.seed_words and self.seed_topic_list:
             seed_topic_list = [seed for seeds in self.seed_topic_list for seed in seeds]
             multiplier = np.array(
-                [
-                    self.ctfidf_model.seed_multiplier
-                    if word in self.ctfidf_model.seed_words
-                    else 1
-                    for word in words
-                ]
-            )
-            multiplier = np.array(
-                [
-                    1.2 if word in seed_topic_list else value
-                    for value, word in zip(multiplier, words)
-                ]
+                [self.ctfidf_model.seed_multiplier if word in self.ctfidf_model.seed_words else 1 for word in words]
             )
+            multiplier = np.array([1.2 if word in seed_topic_list else value for value, word in zip(multiplier, words)])
         elif self.ctfidf_model.seed_words:
             multiplier = np.array(
-                [
-                    self.ctfidf_model.seed_multiplier
-                    if word in self.ctfidf_model.seed_words
-                    else 1
-                    for word in words
-                ]
+                [self.ctfidf_model.seed_multiplier if word in self.ctfidf_model.seed_words else 1 for word in words]
             )
         elif self.seed_topic_list:
             seed_topic_list = [seed for seeds in self.seed_topic_list for seed in seeds]
-            multiplier = np.array(
-                [1.2 if word in seed_topic_list else 1 for word in words]
-            )
+            multiplier = np.array([1.2 if word in seed_topic_list else 1 for word in words])
 
         if fit:
             self.ctfidf_model = self.ctfidf_model.fit(X, multiplier=multiplier)
@@ -4572,9 +4268,7 @@ def _extract_words_per_topic(
         # Get top 30 words per topic based on c-TF-IDF score
         base_topics = {
             label: [
-                (words[word_index], score)
-                if word_index is not None and score > 0
-                else ("", 0.00001)
+                (words[word_index], score) if word_index is not None and score > 0 else ("", 0.00001)
                 for word_index, score in zip(indices[index][::-1], scores[index][::-1])
             ]
             for index, label in enumerate(labels)
@@ -4584,40 +4278,27 @@ def _extract_words_per_topic(
         topics = base_topics.copy()
         if not self.representation_model:
             # Default representation: c_tf_idf + top_n_words
-            topics = {
-                label: values[: self.top_n_words] for label, values in topics.items()
-            }
+            topics = {label: values[: self.top_n_words] for label, values in topics.items()}
         elif isinstance(self.representation_model, list):
             for tuner in self.representation_model:
                 topics = tuner.extract_topics(self, documents, c_tf_idf, topics)
         elif isinstance(self.representation_model, BaseRepresentation):
-            topics = self.representation_model.extract_topics(
-                self, documents, c_tf_idf, topics
-            )
+            topics = self.representation_model.extract_topics(self, documents, c_tf_idf, topics)
         elif isinstance(self.representation_model, dict):
             if self.representation_model.get("Main"):
                 main_model = self.representation_model["Main"]
                 if isinstance(main_model, BaseRepresentation):
-                    topics = main_model.extract_topics(
-                        self, documents, c_tf_idf, topics
-                    )
+                    topics = main_model.extract_topics(self, documents, c_tf_idf, topics)
                 elif isinstance(main_model, list):
                     for tuner in main_model:
                         topics = tuner.extract_topics(self, documents, c_tf_idf, topics)
                 else:
-                    raise TypeError(
-                        f"unsupported type {type(main_model).__name__} for representation_model['Main']"
-                    )
+                    raise TypeError(f"unsupported type {type(main_model).__name__} for representation_model['Main']")
             else:
                 # Default representation: c_tf_idf + top_n_words
-                topics = {
-                    label: values[: self.top_n_words]
-                    for label, values in topics.items()
-                }
+                topics = {label: values[: self.top_n_words] for label, values in topics.items()}
         else:
-            raise TypeError(
-                f"unsupported type {type(self.representation_model).__name__} for representation_model"
-            )
+            raise TypeError(f"unsupported type {type(self.representation_model).__name__} for representation_model")
 
         # Extract additional topic aspects
         if calculate_aspects and isinstance(self.representation_model, dict):
@@ -4626,19 +4307,12 @@ def _extract_words_per_topic(
                     aspects = base_topics.copy()
                     if not aspect_model:
                         # Default representation: c_tf_idf + top_n_words
-                        aspects = {
-                            label: values[: self.top_n_words]
-                            for label, values in aspects.items()
-                        }
+                        aspects = {label: values[: self.top_n_words] for label, values in aspects.items()}
                     if isinstance(aspect_model, list):
                         for tuner in aspect_model:
-                            aspects = tuner.extract_topics(
-                                self, documents, c_tf_idf, aspects
-                            )
+                            aspects = tuner.extract_topics(self, documents, c_tf_idf, aspects)
                     elif isinstance(aspect_model, BaseRepresentation):
-                        aspects = aspect_model.extract_topics(
-                            self, documents, c_tf_idf, aspects
-                        )
+                        aspects = aspect_model.extract_topics(self, documents, c_tf_idf, aspects)
                     else:
                         raise TypeError(
                             f"unsupported type {type(aspect_model).__name__} for representation_model[{repr(aspect)}]"
@@ -4647,9 +4321,7 @@ def _extract_words_per_topic(
 
         return topics
 
-    def _reduce_topics(
-        self, documents: pd.DataFrame, use_ctfidf: bool = False
-    ) -> pd.DataFrame:
+    def _reduce_topics(self, documents: pd.DataFrame, use_ctfidf: bool = False) -> pd.DataFrame:
         """Reduce topics to self.nr_topics.
 
         Arguments:
@@ -4676,9 +4348,7 @@ def _reduce_topics(
         )
         return documents
 
-    def _reduce_to_n_topics(
-        self, documents: pd.DataFrame, use_ctfidf: bool = False
-    ) -> pd.DataFrame:
+    def _reduce_to_n_topics(self, documents: pd.DataFrame, use_ctfidf: bool = False) -> pd.DataFrame:
         """Reduce topics to self.nr_topics.
 
         Arguments:
@@ -4700,9 +4370,7 @@ def _reduce_to_n_topics(
 
         # Cluster the topic embeddings using AgglomerativeClustering
         if version.parse(sklearn_version) >= version.parse("1.4.0"):
-            cluster = AgglomerativeClustering(
-                self.nr_topics - self._outliers, metric="precomputed", linkage="average"
-            )
+            cluster = AgglomerativeClustering(self.nr_topics - self._outliers, metric="precomputed", linkage="average")
         else:
             cluster = AgglomerativeClustering(
                 self.nr_topics - self._outliers,
@@ -4713,9 +4381,7 @@ def _reduce_to_n_topics(
         new_topics = [cluster.labels_[topic] if topic != -1 else -1 for topic in topics]
 
         # Track mappings and sizes of topics for merging topic embeddings
-        mapped_topics = {
-            from_topic: to_topic for from_topic, to_topic in zip(topics, new_topics)
-        }
+        mapped_topics = {from_topic: to_topic for from_topic, to_topic in zip(topics, new_topics)}
         basic_mappings = defaultdict(list)
         for key, val in sorted(mapped_topics.items()):
             basic_mappings[val].append(key)
@@ -4742,8 +4408,7 @@ def _reduce_to_n_topics(
         if self._is_zeroshot():
             new_topic_id_to_zeroshot_topic_idx = {}
             topics_to_map = {
-                topic_mapping[0]: topic_mapping[1]
-                for topic_mapping in np.array(self.topic_mapper_.mappings_)[:, -2:]
+                topic_mapping[0]: topic_mapping[1] for topic_mapping in np.array(self.topic_mapper_.mappings_)[:, -2:]
             }
 
             for topic_to, topics_from in basic_mappings.items():
@@ -4753,9 +4418,7 @@ def _reduce_to_n_topics(
 
                 # which of the original topics are zero-shot
                 zeroshot_topic_ids = [
-                    topic_id
-                    for topic_id in topics_from
-                    if topic_id in self._topic_id_to_zeroshot_topic_idx
+                    topic_id for topic_id in topics_from if topic_id in self._topic_id_to_zeroshot_topic_idx
                 ]
                 if len(zeroshot_topic_ids) == 0:
                     continue
@@ -4763,9 +4426,7 @@ def _reduce_to_n_topics(
                 # If any of the original topics are zero-shot, take the best fitting zero-shot label
                 # if the cosine similarity with the new topic exceeds the zero-shot threshold
                 zeroshot_labels = [
-                    self.zeroshot_topic_list[
-                        self._topic_id_to_zeroshot_topic_idx[topic_id]
-                    ]
+                    self.zeroshot_topic_list[self._topic_id_to_zeroshot_topic_idx[topic_id]]
                     for topic_id in zeroshot_topic_ids
                 ]
                 zeroshot_embeddings = self._extract_embeddings(zeroshot_labels)
@@ -4775,18 +4436,14 @@ def _reduce_to_n_topics(
                 best_zeroshot_topic_idx = np.argmax(cosine_similarities)
                 best_cosine_similarity = cosine_similarities[best_zeroshot_topic_idx]
                 if best_cosine_similarity >= self.zeroshot_min_similarity:
-                    new_topic_id_to_zeroshot_topic_idx[topic_to] = zeroshot_topic_ids[
-                        best_zeroshot_topic_idx
-                    ]
+                    new_topic_id_to_zeroshot_topic_idx[topic_to] = zeroshot_topic_ids[best_zeroshot_topic_idx]
 
             self._topic_id_to_zeroshot_topic_idx = new_topic_id_to_zeroshot_topic_idx
 
         self._update_topic_size(documents)
         return documents
 
-    def _auto_reduce_topics(
-        self, documents: pd.DataFrame, use_ctfidf: bool = False
-    ) -> pd.DataFrame:
+    def _auto_reduce_topics(self, documents: pd.DataFrame, use_ctfidf: bool = False) -> pd.DataFrame:
         """Reduce the number of topics automatically using HDBSCAN.
 
         Arguments:
@@ -4819,13 +4476,8 @@ def _auto_reduce_topics(
             for index, prediction in enumerate(predictions)
             if prediction != -1
         }
-        documents.Topic = (
-            documents.Topic.map(mapped_topics).fillna(documents.Topic).astype(int)
-        )
-        mapped_topics = {
-            from_topic: to_topic
-            for from_topic, to_topic in zip(topics, documents.Topic.tolist())
-        }
+        documents.Topic = documents.Topic.map(mapped_topics).fillna(documents.Topic).astype(int)
+        mapped_topics = {from_topic: to_topic for from_topic, to_topic in zip(topics, documents.Topic.tolist())}
 
         # Track mappings and sizes of topics for merging topic embeddings
         mappings = defaultdict(list)
@@ -4873,17 +4525,13 @@ def _sort_mappings_by_frequency(self, documents: pd.DataFrame) -> pd.DataFrame:
         self._update_topic_size(documents)
 
         # Map topics based on frequency
-        df = pd.DataFrame(
-            self.topic_sizes_.items(), columns=["Old_Topic", "Size"]
-        ).sort_values("Size", ascending=False)
+        df = pd.DataFrame(self.topic_sizes_.items(), columns=["Old_Topic", "Size"]).sort_values("Size", ascending=False)
         df = df[df.Old_Topic != -1]
         sorted_topics = {**{-1: -1}, **dict(zip(df.Old_Topic, range(len(df))))}
         self.topic_mapper_.add_mappings(sorted_topics)
 
         # Map documents
-        documents.Topic = (
-            documents.Topic.map(sorted_topics).fillna(documents.Topic).astype(int)
-        )
+        documents.Topic = documents.Topic.map(sorted_topics).fillna(documents.Topic).astype(int)
         self._update_topic_size(documents)
         return documents
 
@@ -4918,9 +4566,7 @@ def _map_probabilities(
                 )
                 for from_topic, to_topic in mappings.items():
                     if to_topic != -1 and from_topic != -1:
-                        mapped_probabilities[:, to_topic] += probabilities[
-                            :, from_topic
-                        ]
+                        mapped_probabilities[:, to_topic] += probabilities[:, from_topic]
 
                 return mapped_probabilities
 
@@ -4936,12 +4582,8 @@ def _preprocess_text(self, documents: np.ndarray) -> List[str]:
         cleaned_documents = [doc.replace("\n", " ") for doc in documents]
         cleaned_documents = [doc.replace("\t", " ") for doc in cleaned_documents]
         if self.language == "english":
-            cleaned_documents = [
-                re.sub(r"[^A-Za-z0-9 ]+", "", doc) for doc in cleaned_documents
-            ]
-        cleaned_documents = [
-            doc if doc != "" else "emptydoc" for doc in cleaned_documents
-        ]
+            cleaned_documents = [re.sub(r"[^A-Za-z0-9 ]+", "", doc) for doc in cleaned_documents]
+        cleaned_documents = [doc if doc != "" else "emptydoc" for doc in cleaned_documents]
         return cleaned_documents
 
     @staticmethod
@@ -4961,13 +4603,8 @@ def _top_n_idx_sparse(matrix: csr_matrix, n: int) -> np.ndarray:
         indices = []
         for le, ri in zip(matrix.indptr[:-1], matrix.indptr[1:]):
             n_row_pick = min(n, ri - le)
-            values = matrix.indices[
-                le + np.argpartition(matrix.data[le:ri], -n_row_pick)[-n_row_pick:]
-            ]
-            values = [
-                values[index] if len(values) >= index + 1 else None
-                for index in range(n)
-            ]
+            values = matrix.indices[le + np.argpartition(matrix.data[le:ri], -n_row_pick)[-n_row_pick:]]
+            values = [values[index] if len(values) >= index + 1 else None for index in range(n)]
             indices.append(values)
         return np.array(indices)
 
@@ -4984,9 +4621,7 @@ def _top_n_values_sparse(matrix: csr_matrix, indices: np.ndarray) -> np.ndarray:
         """
         top_values = []
         for row, values in enumerate(indices):
-            scores = np.array(
-                [matrix[row, value] if value is not None else 0 for value in values]
-            )
+            scores = np.array([matrix[row, value] if value is not None else 0 for value in values])
             top_values.append(scores)
         return np.array(top_values)
 
@@ -4999,11 +4634,7 @@ def _get_param_names(cls):
         """
         init_signature = inspect.signature(cls.__init__)
         parameters = sorted(
-            [
-                p.name
-                for p in init_signature.parameters.values()
-                if p.name != "self" and p.kind != p.VAR_KEYWORD
-            ]
+            [p.name for p in init_signature.parameters.values() if p.name != "self" and p.kind != p.VAR_KEYWORD]
         )
         return parameters
 
@@ -5173,22 +4804,16 @@ def _create_model_from_files(
         **params,
     )
     topic_model.topic_embeddings_ = tensors["topic_embeddings"].numpy()
-    topic_model.topic_representations_ = {
-        int(key): val for key, val in topics["topic_representations"].items()
-    }
+    topic_model.topic_representations_ = {int(key): val for key, val in topics["topic_representations"].items()}
     topic_model.topics_ = topics["topics"]
-    topic_model.topic_sizes_ = {
-        int(key): val for key, val in topics["topic_sizes"].items()
-    }
+    topic_model.topic_sizes_ = {int(key): val for key, val in topics["topic_sizes"].items()}
     topic_model.custom_labels_ = topics["custom_labels"]
 
     if topics.get("topic_aspects"):
         topic_aspects = {}
         for aspect, values in topics["topic_aspects"].items():
             if aspect != "Visual_Aspect":
-                topic_aspects[aspect] = {
-                    int(topic): value for topic, value in values.items()
-                }
+                topic_aspects[aspect] = {int(topic): value for topic, value in values.items()}
         topic_model.topic_aspects_ = topic_aspects
 
         if images is not None:
@@ -5209,20 +4834,12 @@ def _create_model_from_files(
         )
 
         # CountVectorizer
-        topic_model.vectorizer_model = CountVectorizer(
-            **ctfidf_config["vectorizer_model"]["params"]
-        )
-        topic_model.vectorizer_model.vocabulary_ = ctfidf_config["vectorizer_model"][
-            "vocab"
-        ]
+        topic_model.vectorizer_model = CountVectorizer(**ctfidf_config["vectorizer_model"]["params"])
+        topic_model.vectorizer_model.vocabulary_ = ctfidf_config["vectorizer_model"]["vocab"]
 
         # ClassTfidfTransformer
-        topic_model.ctfidf_model.reduce_frequent_words = ctfidf_config["ctfidf_model"][
-            "reduce_frequent_words"
-        ]
-        topic_model.ctfidf_model.bm25_weighting = ctfidf_config["ctfidf_model"][
-            "bm25_weighting"
-        ]
+        topic_model.ctfidf_model.reduce_frequent_words = ctfidf_config["ctfidf_model"]["reduce_frequent_words"]
+        topic_model.ctfidf_model.bm25_weighting = ctfidf_config["ctfidf_model"]["bm25_weighting"]
         idf = ctfidf_tensors["diag"].numpy()
         topic_model.ctfidf_model._idf_diag = sp.diags(
             idf, offsets=0, shape=(len(idf), len(idf)), format="csr", dtype=np.float64
diff --git a/bertopic/_save_utils.py b/bertopic/_save_utils.py
index a01ba691..845e0f75 100644
--- a/bertopic/_save_utils.py
+++ b/bertopic/_save_utils.py
@@ -135,9 +135,7 @@ def push_to_hf_hub(
         save_ctfidf: Whether to save c-TF-IDF information
     """
     if not _has_hf_hub:
-        raise ValueError(
-            "Make sure you have the huggingface hub installed via `pip install --upgrade huggingface_hub`"
-        )
+        raise ValueError("Make sure you have the huggingface hub installed via `pip install --upgrade huggingface_hub`")
 
     # Create repo if it doesn't exist yet and infer complete repo_id
     repo_url = create_repo(repo_id, token=token, private=private, exist_ok=True)
@@ -156,9 +154,7 @@ def push_to_hf_hub(
 
         # Add README if it does not exist
         try:
-            get_hf_file_metadata(
-                hf_hub_url(repo_id=repo_id, filename="README.md", revision=revision)
-            )
+            get_hf_file_metadata(hf_hub_url(repo_id=repo_id, filename="README.md", revision=revision))
         except:  # noqa: E722
             if model_card:
                 readme_text = generate_readme(model, repo_id)
@@ -241,13 +237,9 @@ def load_files_from_hf(path):
 
     # c-TF-IDF
     try:
-        ctfidf_config = load_cfg_from_json(
-            hf_hub_download(path, CTFIDF_CFG_NAME, revision=None)
-        )
+        ctfidf_config = load_cfg_from_json(hf_hub_download(path, CTFIDF_CFG_NAME, revision=None))
         try:
-            ctfidf_tensors = hf_hub_download(
-                path, CTFIDF_SAFE_WEIGHTS_NAME, revision=None
-            )
+            ctfidf_tensors = hf_hub_download(path, CTFIDF_SAFE_WEIGHTS_NAME, revision=None)
             ctfidf_tensors = load_safetensors(ctfidf_tensors)
         except:  # noqa: E722
             ctfidf_tensors = hf_hub_download(path, CTFIDF_WEIGHTS_NAME, revision=None)
@@ -268,9 +260,7 @@ def load_files_from_hf(path):
             topic_list = list(topics["topic_representations"].keys())
             images = {}
             for topic in topic_list:
-                image = Image.open(
-                    hf_hub_download(path, f"images/{topic}.jpg", revision=None)
-                )
+                image = Image.open(hf_hub_download(path, f"images/{topic}.jpg", revision=None))
                 images[int(topic)] = image
 
     return topics, params, tensors, ctfidf_tensors, ctfidf_config, images
@@ -283,11 +273,7 @@ def generate_readme(model, repo_id: str):
 
     # Get Statistics
     model_name = repo_id.split("/")[-1]
-    params = {
-        param: value
-        for param, value in model.get_params().items()
-        if "model" not in param
-    }
+    params = {param: value for param, value in model.get_params().items() if "model" not in param}
     params = "\n".join([f"* {param}: {value}" for param, value in params.items()])
     topics = sorted(list(set(model.topics_)))
     nr_topics = str(len(set(model.topics_)))
@@ -298,23 +284,15 @@ def generate_readme(model, repo_id: str):
         nr_documents = ""
 
     # Topic information
-    topic_keywords = [
-        " - ".join(list(zip(*model.get_topic(topic)))[0][:5]) for topic in topics
-    ]
+    topic_keywords = [" - ".join(list(zip(*model.get_topic(topic)))[0][:5]) for topic in topics]
     topic_freq = [model.get_topic_freq(topic) for topic in topics]
-    topic_labels = (
-        model.custom_labels_
-        if model.custom_labels_
-        else [model.topic_labels_[topic] for topic in topics]
-    )
+    topic_labels = model.custom_labels_ if model.custom_labels_ else [model.topic_labels_[topic] for topic in topics]
     topics = [
         f"| {topic} | {topic_keywords[index]} | {topic_freq[topic]} | {topic_labels[index]} | \n"
         for index, topic in enumerate(topics)
     ]
     topics = topic_table_head + "".join(topics)
-    frameworks = "\n".join(
-        [f"* {param}: {value}" for param, value in get_package_versions().items()]
-    )
+    frameworks = "\n".join([f"* {param}: {value}" for param, value in get_package_versions().items()])
 
     # Fill Statistics into model card
     model_card = model_card.replace("{MODEL_NAME}", model_name)
@@ -330,9 +308,7 @@ def generate_readme(model, repo_id: str):
     if not has_visual_aspect:
         model_card = model_card.replace("{PIPELINE_TAG}", "text-classification")
     else:
-        model_card = model_card.replace(
-            "pipeline_tag: {PIPELINE_TAG}\n", ""
-        )  # TODO add proper tag for this instance
+        model_card = model_card.replace("pipeline_tag: {PIPELINE_TAG}\n", "")  # TODO add proper tag for this instance
 
     return model_card
 
diff --git a/bertopic/_utils.py b/bertopic/_utils.py
index 0695b7cf..6c859041 100644
--- a/bertopic/_utils.py
+++ b/bertopic/_utils.py
@@ -45,20 +45,14 @@ def check_documents_type(documents):
         if not any([isinstance(doc, str) for doc in documents]):
             raise TypeError("Make sure that the iterable only contains strings.")
     else:
-        raise TypeError(
-            "Make sure that the documents variable is an iterable containing strings only."
-        )
+        raise TypeError("Make sure that the documents variable is an iterable containing strings only.")
 
 
 def check_embeddings_shape(embeddings, docs):
     """Check if the embeddings have the correct shape."""
     if embeddings is not None:
-        if not any(
-            [isinstance(embeddings, np.ndarray), isinstance(embeddings, csr_matrix)]
-        ):
-            raise ValueError(
-                "Make sure to input embeddings as a numpy array or scipy.sparse.csr.csr_matrix. "
-            )
+        if not any([isinstance(embeddings, np.ndarray), isinstance(embeddings, csr_matrix)]):
+            raise ValueError("Make sure to input embeddings as a numpy array or scipy.sparse.csr.csr_matrix. ")
         else:
             if embeddings.shape[0] != len(docs):
                 raise ValueError(
@@ -137,16 +131,11 @@ def validate_distance_matrix(X, n_samples):
         # check it has correct size
         n = s[0]
         if n != (n_samples * (n_samples - 1) / 2):
-            raise ValueError(
-                "The condensed distance matrix must have " "shape (n*(n-1)/2,)."
-            )
+            raise ValueError("The condensed distance matrix must have " "shape (n*(n-1)/2,).")
     elif len(s) == 2:
         # check it has correct size
         if (s[0] != n_samples) or (s[1] != n_samples):
-            raise ValueError(
-                "The distance matrix must be of shape "
-                "(n, n) where n is the number of samples."
-            )
+            raise ValueError("The distance matrix must be of shape " "(n, n) where n is the number of samples.")
         # force zero diagonal and convert to condensed
         np.fill_diagonal(X, 0)
         X = squareform(X)
@@ -182,15 +171,11 @@ def get_unique_distances(dists: np.array, noise_max=1e-7) -> np.array:
     for i in range(dists.shape[0] - 1):
         if dists[i] == dists[i + 1]:
             # returns the next unique distance or the current distance with the added noise
-            next_unique_dist = next(
-                (d for d in dists[i + 1 :] if d != dists[i]), dists[i] + noise_max
-            )
+            next_unique_dist = next((d for d in dists[i + 1 :] if d != dists[i]), dists[i] + noise_max)
 
             # the noise can never be large then the difference between the next unique distance and the current one
             curr_max_noise = min(noise_max, next_unique_dist - dists_cp[i])
-            dists_cp[i + 1] = np.random.uniform(
-                low=dists_cp[i] + curr_max_noise / 2, high=dists_cp[i] + curr_max_noise
-            )
+            dists_cp[i + 1] = np.random.uniform(low=dists_cp[i] + curr_max_noise / 2, high=dists_cp[i] + curr_max_noise)
     return dists_cp
 
 
diff --git a/bertopic/backend/_flair.py b/bertopic/backend/_flair.py
index 2abeec49..f6e27fea 100644
--- a/bertopic/backend/_flair.py
+++ b/bertopic/backend/_flair.py
@@ -67,9 +67,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
         embeddings = []
         for document in tqdm(documents, disable=not verbose):
             try:
-                sentence = (
-                    Sentence(document) if document else Sentence("an empty document")
-                )
+                sentence = Sentence(document) if document else Sentence("an empty document")
                 self.embedding_model.embed(sentence)
             except RuntimeError:
                 sentence = Sentence("an empty document")
diff --git a/bertopic/backend/_gensim.py b/bertopic/backend/_gensim.py
index 3727e04d..d76fff17 100644
--- a/bertopic/backend/_gensim.py
+++ b/bertopic/backend/_gensim.py
@@ -48,9 +48,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
             Document/words embeddings with shape (n, m) with `n` documents/words
             that each have an embeddings size of `m`
         """
-        vector_shape = self.embedding_model.get_vector(
-            list(self.embedding_model.index_to_key)[0]
-        ).shape[0]
+        vector_shape = self.embedding_model.get_vector(list(self.embedding_model.index_to_key)[0]).shape[0]
         empty_vector = np.zeros(vector_shape)
 
         # Extract word embeddings and pool to document-level
diff --git a/bertopic/backend/_hftransformers.py b/bertopic/backend/_hftransformers.py
index 8de9cc2a..344412e9 100644
--- a/bertopic/backend/_hftransformers.py
+++ b/bertopic/backend/_hftransformers.py
@@ -58,9 +58,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
 
         embeddings = []
         for document, features in tqdm(
-            zip(
-                documents, self.embedding_model(dataset, truncation=True, padding=True)
-            ),
+            zip(documents, self.embedding_model(dataset, truncation=True, padding=True)),
             total=len(dataset),
             disable=not verbose,
         ):
@@ -79,12 +77,10 @@ def _embed(self, document: str, features: np.ndarray) -> np.ndarray:
         https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2#usage-huggingface-transformers
         """
         token_embeddings = np.array(features)
-        attention_mask = self.embedding_model.tokenizer(
-            document, truncation=True, padding=True, return_tensors="np"
-        )["attention_mask"]
-        input_mask_expanded = np.broadcast_to(
-            np.expand_dims(attention_mask, -1), token_embeddings.shape
-        )
+        attention_mask = self.embedding_model.tokenizer(document, truncation=True, padding=True, return_tensors="np")[
+            "attention_mask"
+        ]
+        input_mask_expanded = np.broadcast_to(np.expand_dims(attention_mask, -1), token_embeddings.shape)
         sum_embeddings = np.sum(token_embeddings * input_mask_expanded, 1)
         sum_mask = np.clip(
             input_mask_expanded.sum(1),
diff --git a/bertopic/backend/_multimodal.py b/bertopic/backend/_multimodal.py
index 846efc41..e1aac8d3 100644
--- a/bertopic/backend/_multimodal.py
+++ b/bertopic/backend/_multimodal.py
@@ -84,9 +84,7 @@ def __init__(
         except:  # noqa: E722
             self.tokenizer = None
 
-    def embed(
-        self, documents: List[str], images: List[str] = None, verbose: bool = False
-    ) -> np.ndarray:
+    def embed(self, documents: List[str], images: List[str] = None, verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words or images into an n-dimensional
         matrix of embeddings.
 
@@ -124,9 +122,7 @@ def embed(
         elif image_embeddings is not None:
             return image_embeddings
 
-    def embed_documents(
-        self, documents: List[str], verbose: bool = False
-    ) -> np.ndarray:
+    def embed_documents(self, documents: List[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 
@@ -139,9 +135,7 @@ def embed_documents(
             that each have an embeddings size of `m`
         """
         truncated_docs = [self._truncate_document(doc) for doc in documents]
-        embeddings = self.embedding_model.encode(
-            truncated_docs, show_progress_bar=verbose
-        )
+        embeddings = self.embedding_model.encode(truncated_docs, show_progress_bar=verbose)
         return embeddings
 
     def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
@@ -170,15 +164,12 @@ def embed_images(self, images, verbose):
                 end_index = (i * self.batch_size) + self.batch_size
 
                 images_to_embed = [
-                    Image.open(image) if isinstance(image, str) else image
-                    for image in images[start_index:end_index]
+                    Image.open(image) if isinstance(image, str) else image for image in images[start_index:end_index]
                 ]
                 if self.image_model is not None:
                     img_emb = self.image_model.encode(images_to_embed)
                 else:
-                    img_emb = self.embedding_model.encode(
-                        images_to_embed, show_progress_bar=False
-                    )
+                    img_emb = self.embedding_model.encode(images_to_embed, show_progress_bar=False)
                 embeddings.extend(img_emb.tolist())
 
                 # Close images
@@ -191,9 +182,7 @@ def embed_images(self, images, verbose):
             if self.image_model is not None:
                 embeddings = self.image_model.encode(images_to_embed)
             else:
-                embeddings = self.embedding_model.encode(
-                    images_to_embed, show_progress_bar=False
-                )
+                embeddings = self.embedding_model.encode(images_to_embed, show_progress_bar=False)
         return embeddings
 
     def _truncate_document(self, document):
diff --git a/bertopic/backend/_openai.py b/bertopic/backend/_openai.py
index 19d18268..7a4cc6b3 100644
--- a/bertopic/backend/_openai.py
+++ b/bertopic/backend/_openai.py
@@ -70,9 +70,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
         if self.batch_size is not None:
             embeddings = []
             for batch in tqdm(self._chunks(prepared_documents), disable=not verbose):
-                response = self.client.embeddings.create(
-                    input=batch, **self.generator_kwargs
-                )
+                response = self.client.embeddings.create(input=batch, **self.generator_kwargs)
                 embeddings.extend([r.embedding for r in response.data])
 
                 # Delay subsequent calls
@@ -81,9 +79,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
 
         # Extract embeddings all at once
         else:
-            response = self.client.embeddings.create(
-                input=prepared_documents, **self.generator_kwargs
-            )
+            response = self.client.embeddings.create(input=prepared_documents, **self.generator_kwargs)
             embeddings = [r.embedding for r in response.data]
         return np.array(embeddings)
 
diff --git a/bertopic/backend/_use.py b/bertopic/backend/_use.py
index c33c76fc..a17a87d1 100644
--- a/bertopic/backend/_use.py
+++ b/bertopic/backend/_use.py
@@ -50,9 +50,6 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
             that each have an embeddings size of `m`
         """
         embeddings = np.array(
-            [
-                self.embedding_model([doc]).cpu().numpy()[0]
-                for doc in tqdm(documents, disable=not verbose)
-            ]
+            [self.embedding_model([doc]).cpu().numpy()[0] for doc in tqdm(documents, disable=not verbose)]
         )
         return embeddings
diff --git a/bertopic/backend/_utils.py b/bertopic/backend/_utils.py
index 7c78d32e..4190bd4e 100644
--- a/bertopic/backend/_utils.py
+++ b/bertopic/backend/_utils.py
@@ -68,9 +68,7 @@
 ]
 
 
-def select_backend(
-    embedding_model, language: str = None, verbose: bool = False
-) -> BaseEmbedder:
+def select_backend(embedding_model, language: str = None, verbose: bool = False) -> BaseEmbedder:
     """Select an embedding model based on language or a specific provided model.
     When selecting a language, we choose all-MiniLM-L6-v2 for English and
     paraphrase-multilingual-MiniLM-L12-v2 for all other languages as it support 100+ languages.
@@ -115,9 +113,7 @@ def select_backend(
         return USEBackend(embedding_model)
 
     # Sentence Transformer embeddings
-    if "sentence_transformers" in str(type(embedding_model)) or isinstance(
-        embedding_model, str
-    ):
+    if "sentence_transformers" in str(type(embedding_model)) or isinstance(embedding_model, str):
         from ._sentencetransformers import SentenceTransformerBackend
 
         return SentenceTransformerBackend(embedding_model)
@@ -134,13 +130,9 @@ def select_backend(
             from ._sentencetransformers import SentenceTransformerBackend
 
             if language.lower() in ["English", "english", "en"]:
-                return SentenceTransformerBackend(
-                    "sentence-transformers/all-MiniLM-L6-v2"
-                )
+                return SentenceTransformerBackend("sentence-transformers/all-MiniLM-L6-v2")
             elif language.lower() in languages or language == "multilingual":
-                return SentenceTransformerBackend(
-                    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-                )
+                return SentenceTransformerBackend("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
             else:
                 raise ValueError(
                     f"{language} is currently not supported. However, you can "
diff --git a/bertopic/cluster/_utils.py b/bertopic/cluster/_utils.py
index 82f243c6..375a15b3 100644
--- a/bertopic/cluster/_utils.py
+++ b/bertopic/cluster/_utils.py
@@ -25,9 +25,7 @@ def hdbscan_delegator(model, func: str, embeddings: np.ndarray = None):
         if "cuml" in str_type_model and "hdbscan" in str_type_model:
             from cuml.cluster import hdbscan as cuml_hdbscan
 
-            predictions, probabilities = cuml_hdbscan.approximate_predict(
-                model, embeddings
-            )
+            predictions, probabilities = cuml_hdbscan.approximate_predict(model, embeddings)
             return predictions, probabilities
 
         predictions = model.predict(embeddings)
diff --git a/bertopic/plotting/_approximate_distribution.py b/bertopic/plotting/_approximate_distribution.py
index a6380273..d5c0bd60 100644
--- a/bertopic/plotting/_approximate_distribution.py
+++ b/bertopic/plotting/_approximate_distribution.py
@@ -86,9 +86,7 @@ def text_color(val):
 
     def highligh_color(data, color="white"):
         attr = "background-color: {}".format(color)
-        return pd.DataFrame(
-            np.where(data == 0, attr, ""), index=data.index, columns=data.columns
-        )
+        return pd.DataFrame(np.where(data == 0, attr, ""), index=data.index, columns=data.columns)
 
     if len(df) == 0:
         return df
diff --git a/bertopic/plotting/_barchart.py b/bertopic/plotting/_barchart.py
index 417e2c0f..a6e614cb 100644
--- a/bertopic/plotting/_barchart.py
+++ b/bertopic/plotting/_barchart.py
@@ -52,9 +52,7 @@ def visualize_barchart(
     <iframe src="../../getting_started/visualization/bar_chart.html"
     style="width:1100px; height: 660px; border: 0px;""></iframe>
     """
-    colors = itertools.cycle(
-        ["#D55E00", "#0072B2", "#CC79A7", "#E69F00", "#56B4E9", "#009E73", "#F0E442"]
-    )
+    colors = itertools.cycle(["#D55E00", "#0072B2", "#CC79A7", "#E69F00", "#56B4E9", "#009E73", "#F0E442"])
 
     # Select topics based on top_n and topics args
     freq_df = topic_model.get_topic_freq()
@@ -68,21 +66,11 @@ def visualize_barchart(
 
     # Initialize figure
     if isinstance(custom_labels, str):
-        subplot_titles = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in topics
-        ]
-        subplot_titles = [
-            "_".join([label[0] for label in labels[:4]]) for labels in subplot_titles
-        ]
-        subplot_titles = [
-            label if len(label) < 30 else label[:27] + "..." for label in subplot_titles
-        ]
+        subplot_titles = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in topics]
+        subplot_titles = ["_".join([label[0] for label in labels[:4]]) for labels in subplot_titles]
+        subplot_titles = [label if len(label) < 30 else label[:27] + "..." for label in subplot_titles]
     elif topic_model.custom_labels_ is not None and custom_labels:
-        subplot_titles = [
-            topic_model.custom_labels_[topic + topic_model._outliers]
-            for topic in topics
-        ]
+        subplot_titles = [topic_model.custom_labels_[topic + topic_model._outliers] for topic in topics]
     else:
         subplot_titles = [f"Topic {topic}" for topic in topics]
     columns = 4
@@ -100,9 +88,7 @@ def visualize_barchart(
     row = 1
     column = 1
     for topic in topics:
-        words = [word + "  " for word, _ in topic_model.get_topic(topic)][:n_words][
-            ::-1
-        ]
+        words = [word + "  " for word, _ in topic_model.get_topic(topic)][:n_words][::-1]
         scores = [score for _, score in topic_model.get_topic(topic)][:n_words][::-1]
 
         fig.add_trace(
diff --git a/bertopic/plotting/_datamap.py b/bertopic/plotting/_datamap.py
index a793e4fc..a0e02c18 100644
--- a/bertopic/plotting/_datamap.py
+++ b/bertopic/plotting/_datamap.py
@@ -106,17 +106,13 @@ def visualize_document_datamap(
 
     # Extract embeddings if not already done
     if embeddings is None and reduced_embeddings is None:
-        embeddings_to_reduce = topic_model._extract_embeddings(
-            df.doc.to_list(), method="document"
-        )
+        embeddings_to_reduce = topic_model._extract_embeddings(df.doc.to_list(), method="document")
     else:
         embeddings_to_reduce = embeddings
 
     # Reduce input embeddings
     if reduced_embeddings is None:
-        umap_model = UMAP(
-            n_neighbors=15, n_components=2, min_dist=0.15, metric="cosine"
-        ).fit(embeddings_to_reduce)
+        umap_model = UMAP(n_neighbors=15, n_components=2, min_dist=0.15, metric="cosine").fit(embeddings_to_reduce)
         embeddings_2d = umap_model.embedding_
     else:
         embeddings_2d = reduced_embeddings
@@ -125,27 +121,18 @@ def visualize_document_datamap(
 
     # Prepare text and names
     if isinstance(custom_labels, str):
-        names = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in unique_topics
-        ]
+        names = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in unique_topics]
         names = [" ".join([label[0] for label in labels[:4]]) for labels in names]
         names = [label if len(label) < 30 else label[:27] + "..." for label in names]
     elif topic_model.custom_labels_ is not None and custom_labels:
-        names = [
-            topic_model.custom_labels_[topic + topic_model._outliers]
-            for topic in unique_topics
-        ]
+        names = [topic_model.custom_labels_[topic + topic_model._outliers] for topic in unique_topics]
     else:
         names = [
-            f"Topic-{topic}: "
-            + " ".join([word for word, value in topic_model.get_topic(topic)][:3])
+            f"Topic-{topic}: " + " ".join([word for word, value in topic_model.get_topic(topic)][:3])
             for topic in unique_topics
         ]
 
-    topic_name_mapping = {
-        topic_num: topic_name for topic_num, topic_name in zip(unique_topics, names)
-    }
+    topic_name_mapping = {topic_num: topic_name for topic_num, topic_name in zip(unique_topics, names)}
     topic_name_mapping[-1] = "Unlabelled"
 
     # If a set of topics is chosen, set everything else to "Unlabelled"
diff --git a/bertopic/plotting/_distribution.py b/bertopic/plotting/_distribution.py
index d04d140b..c04a851b 100644
--- a/bertopic/plotting/_distribution.py
+++ b/bertopic/plotting/_distribution.py
@@ -60,17 +60,11 @@ def visualize_distribution(
 
     # Create labels
     if isinstance(custom_labels, str):
-        labels = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in labels_idx
-        ]
+        labels = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in labels_idx]
         labels = ["_".join([label[0] for label in l[:4]]) for l in labels]  # noqa: E741
         labels = [label if len(label) < 30 else label[:27] + "..." for label in labels]
     elif topic_model.custom_labels_ is not None and custom_labels:
-        labels = [
-            topic_model.custom_labels_[idx + topic_model._outliers]
-            for idx in labels_idx
-        ]
+        labels = [topic_model.custom_labels_[idx + topic_model._outliers] for idx in labels_idx]
     else:
         labels = []
         for idx in labels_idx:
diff --git a/bertopic/plotting/_documents.py b/bertopic/plotting/_documents.py
index 0c5287b4..e1a3f1d3 100644
--- a/bertopic/plotting/_documents.py
+++ b/bertopic/plotting/_documents.py
@@ -109,24 +109,18 @@ def visualize_documents(
     # Extract embeddings if not already done
     if sample is None:
         if embeddings is None and reduced_embeddings is None:
-            embeddings_to_reduce = topic_model._extract_embeddings(
-                df.doc.to_list(), method="document"
-            )
+            embeddings_to_reduce = topic_model._extract_embeddings(df.doc.to_list(), method="document")
         else:
             embeddings_to_reduce = embeddings
     else:
         if embeddings is not None:
             embeddings_to_reduce = embeddings[indices]
         elif embeddings is None and reduced_embeddings is None:
-            embeddings_to_reduce = topic_model._extract_embeddings(
-                df.doc.to_list(), method="document"
-            )
+            embeddings_to_reduce = topic_model._extract_embeddings(df.doc.to_list(), method="document")
 
     # Reduce input embeddings
     if reduced_embeddings is None:
-        umap_model = UMAP(
-            n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine"
-        ).fit(embeddings_to_reduce)
+        umap_model = UMAP(n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine").fit(embeddings_to_reduce)
         embeddings_2d = umap_model.embedding_
     elif sample is not None and reduced_embeddings is not None:
         embeddings_2d = reduced_embeddings[indices]
@@ -143,21 +137,14 @@ def visualize_documents(
 
     # Prepare text and names
     if isinstance(custom_labels, str):
-        names = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in unique_topics
-        ]
+        names = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in unique_topics]
         names = ["_".join([label[0] for label in labels[:4]]) for labels in names]
         names = [label if len(label) < 30 else label[:27] + "..." for label in names]
     elif topic_model.custom_labels_ is not None and custom_labels:
-        names = [
-            topic_model.custom_labels_[topic + topic_model._outliers]
-            for topic in unique_topics
-        ]
+        names = [topic_model.custom_labels_[topic + topic_model._outliers] for topic in unique_topics]
     else:
         names = [
-            f"{topic}_"
-            + "_".join([word for word, value in topic_model.get_topic(topic)][:3])
+            f"{topic}_" + "_".join([word for word, value in topic_model.get_topic(topic)][:3])
             for topic in unique_topics
         ]
 
@@ -248,12 +235,8 @@ def visualize_documents(
         y1=sum(y_range) / 2,
         line=dict(color="#9E9E9E", width=2),
     )
-    fig.add_annotation(
-        x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10
-    )
-    fig.add_annotation(
-        y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10
-    )
+    fig.add_annotation(x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10)
+    fig.add_annotation(y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10)
 
     # Stylize layout
     fig.update_layout(
diff --git a/bertopic/plotting/_heatmap.py b/bertopic/plotting/_heatmap.py
index ad9f0664..9e51f13e 100644
--- a/bertopic/plotting/_heatmap.py
+++ b/bertopic/plotting/_heatmap.py
@@ -59,9 +59,9 @@ def visualize_heatmap(
     <iframe src="../../getting_started/visualization/heatmap.html"
     style="width:1000px; height: 720px; border: 0px;""></iframe>
     """
-    embeddings = select_topic_representation(
-        topic_model.c_tf_idf_, topic_model.topic_embeddings_, use_ctfidf
-    )[0][topic_model._outliers :]
+    embeddings = select_topic_representation(topic_model.c_tf_idf_, topic_model.topic_embeddings_, use_ctfidf)[0][
+        topic_model._outliers :
+    ]
 
     # Select topics based on top_n and topics args
     freq_df = topic_model.get_topic_freq()
@@ -77,10 +77,7 @@ def visualize_heatmap(
     sorted_topics = topics
     if n_clusters:
         if n_clusters >= len(set(topics)):
-            raise ValueError(
-                "Make sure to set `n_clusters` lower than "
-                "the total number of unique topics."
-            )
+            raise ValueError("Make sure to set `n_clusters` lower than " "the total number of unique topics.")
 
         distance_matrix = cosine_similarity(embeddings[topics])
         Z = linkage(distance_matrix, "ward")
@@ -101,31 +98,16 @@ def visualize_heatmap(
     # Create labels
     if isinstance(custom_labels, str):
         new_labels = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in sorted_topics
-        ]
-        new_labels = [
-            "_".join([label[0] for label in labels[:4]]) for labels in new_labels
-        ]
-        new_labels = [
-            label if len(label) < 30 else label[:27] + "..." for label in new_labels
+            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in sorted_topics
         ]
+        new_labels = ["_".join([label[0] for label in labels[:4]]) for labels in new_labels]
+        new_labels = [label if len(label) < 30 else label[:27] + "..." for label in new_labels]
     elif topic_model.custom_labels_ is not None and custom_labels:
-        new_labels = [
-            topic_model.custom_labels_[topic + topic_model._outliers]
-            for topic in sorted_topics
-        ]
+        new_labels = [topic_model.custom_labels_[topic + topic_model._outliers] for topic in sorted_topics]
     else:
-        new_labels = [
-            [[str(topic), None]] + topic_model.get_topic(topic)
-            for topic in sorted_topics
-        ]
-        new_labels = [
-            "_".join([label[0] for label in labels[:4]]) for labels in new_labels
-        ]
-        new_labels = [
-            label if len(label) < 30 else label[:27] + "..." for label in new_labels
-        ]
+        new_labels = [[[str(topic), None]] + topic_model.get_topic(topic) for topic in sorted_topics]
+        new_labels = ["_".join([label[0] for label in labels[:4]]) for labels in new_labels]
+        new_labels = [label if len(label) < 30 else label[:27] + "..." for label in new_labels]
 
     fig = px.imshow(
         distance_matrix,
diff --git a/bertopic/plotting/_hierarchical_documents.py b/bertopic/plotting/_hierarchical_documents.py
index 5501c8b7..2da9c83b 100644
--- a/bertopic/plotting/_hierarchical_documents.py
+++ b/bertopic/plotting/_hierarchical_documents.py
@@ -133,24 +133,18 @@ def visualize_hierarchical_documents(
     # Extract embeddings if not already done
     if sample is None:
         if embeddings is None and reduced_embeddings is None:
-            embeddings_to_reduce = topic_model._extract_embeddings(
-                df.doc.to_list(), method="document"
-            )
+            embeddings_to_reduce = topic_model._extract_embeddings(df.doc.to_list(), method="document")
         else:
             embeddings_to_reduce = embeddings
     else:
         if embeddings is not None:
             embeddings_to_reduce = embeddings[indices]
         elif embeddings is None and reduced_embeddings is None:
-            embeddings_to_reduce = topic_model._extract_embeddings(
-                df.doc.to_list(), method="document"
-            )
+            embeddings_to_reduce = topic_model._extract_embeddings(df.doc.to_list(), method="document")
 
     # Reduce input embeddings
     if reduced_embeddings is None:
-        umap_model = UMAP(
-            n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine"
-        ).fit(embeddings_to_reduce)
+        umap_model = UMAP(n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine").fit(embeddings_to_reduce)
         embeddings_2d = umap_model.embedding_
     elif sample is not None and reduced_embeddings is not None:
         embeddings_2d = reduced_embeddings[indices]
@@ -179,8 +173,7 @@ def visualize_hierarchical_documents(
         max_distances = [distances[i] for i in log_indices]
     elif level_scale == "lin" or level_scale == "linear":
         max_distances = [
-            distances[indices[-1]]
-            for indices in np.array_split(range(len(hierarchical_topics)), nr_levels)
+            distances[indices[-1]] for indices in np.array_split(range(len(hierarchical_topics)), nr_levels)
         ][::-1]
     else:
         raise ValueError("level_scale needs to be one of 'log' or 'linear'")
@@ -188,9 +181,7 @@ def visualize_hierarchical_documents(
     for index, max_distance in enumerate(max_distances):
         # Get topics below `max_distance`
         mapping = {topic: topic for topic in df.topic.unique()}
-        selection = hierarchical_topics.loc[
-            hierarchical_topics.Distance <= max_distance, :
-        ]
+        selection = hierarchical_topics.loc[hierarchical_topics.Distance <= max_distance, :]
         selection.Parent_ID = selection.Parent_ID.astype(int)
         selection = selection.sort_values("Parent_ID")
 
@@ -219,18 +210,12 @@ def visualize_hierarchical_documents(
             if topic_model.get_topic(topic):
                 if isinstance(custom_labels, str):
                     trace_name = f"{topic}_" + "_".join(
-                        list(zip(*topic_model.topic_aspects_[custom_labels][topic]))[0][
-                            :3
-                        ]
+                        list(zip(*topic_model.topic_aspects_[custom_labels][topic]))[0][:3]
                     )
                 elif topic_model.custom_labels_ is not None and custom_labels:
-                    trace_name = topic_model.custom_labels_[
-                        topic + topic_model._outliers
-                    ]
+                    trace_name = topic_model.custom_labels_[topic + topic_model._outliers]
                 else:
-                    trace_name = f"{topic}_" + "_".join(
-                        [word[:20] for word, _ in topic_model.get_topic(topic)][:3]
-                    )
+                    trace_name = f"{topic}_" + "_".join([word[:20] for word, _ in topic_model.get_topic(topic)][:3])
                 topic_names[topic] = {
                     "trace_name": trace_name[:40],
                     "plot_text": trace_name[:40],
@@ -239,9 +224,7 @@ def visualize_hierarchical_documents(
         else:
             trace_name = (
                 f"{topic}_"
-                + hierarchical_topics.loc[
-                    hierarchical_topics.Parent_ID == str(topic), "Parent_Name"
-                ].values[0]
+                + hierarchical_topics.loc[hierarchical_topics.Parent_ID == str(topic), "Parent_Name"].values[0]
             )
             plot_text = "_".join([name[:20] for name in trace_name.split("_")[:3]])
             topic_names[topic] = {
@@ -264,9 +247,7 @@ def visualize_hierarchical_documents(
                     mode="markers+text",
                     name="other",
                     hoverinfo="text",
-                    hovertext=df.loc[(df[f"level_{level+1}"] == -1), "doc"]
-                    if not hide_document_hover
-                    else None,
+                    hovertext=df.loc[(df[f"level_{level+1}"] == -1), "doc"] if not hide_document_hover else None,
                     showlegend=False,
                     marker=dict(color="#CFD8DC", size=5, opacity=0.5),
                 )
@@ -275,20 +256,14 @@ def visualize_hierarchical_documents(
         # Selected topics
         if topics:
             selection = df.loc[(df.topic.isin(topics)), :]
-            unique_topics = sorted(
-                [int(topic) for topic in selection[f"level_{level+1}"].unique()]
-            )
+            unique_topics = sorted([int(topic) for topic in selection[f"level_{level+1}"].unique()])
         else:
-            unique_topics = sorted(
-                [int(topic) for topic in df[f"level_{level+1}"].unique()]
-            )
+            unique_topics = sorted([int(topic) for topic in df[f"level_{level+1}"].unique()])
 
         for topic in unique_topics:
             if topic != -1:
                 if topics:
-                    selection = df.loc[
-                        (df[f"level_{level+1}"] == topic) & (df.topic.isin(topics)), :
-                    ]
+                    selection = df.loc[(df[f"level_{level+1}"] == topic) & (df.topic.isin(topics)), :]
                 else:
                     selection = df.loc[df[f"level_{level+1}"] == topic, :]
 
@@ -297,9 +272,7 @@ def visualize_hierarchical_documents(
                     selection["text"] = ""
                     selection.loc[len(selection) - 1, "x"] = selection.x.mean()
                     selection.loc[len(selection) - 1, "y"] = selection.y.mean()
-                    selection.loc[len(selection) - 1, "text"] = topic_names[int(topic)][
-                        "plot_text"
-                    ]
+                    selection.loc[len(selection) - 1, "text"] = topic_names[int(topic)]["plot_text"]
 
                 traces.append(
                     go.Scattergl(
@@ -373,12 +346,8 @@ def visualize_hierarchical_documents(
         y1=sum(y_range) / 2,
         line=dict(color="#9E9E9E", width=2),
     )
-    fig.add_annotation(
-        x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10
-    )
-    fig.add_annotation(
-        y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10
-    )
+    fig.add_annotation(x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10)
+    fig.add_annotation(y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10)
 
     # Stylize layout
     fig.update_layout(
diff --git a/bertopic/plotting/_hierarchy.py b/bertopic/plotting/_hierarchy.py
index 6faa1bc4..2e6e6b23 100644
--- a/bertopic/plotting/_hierarchy.py
+++ b/bertopic/plotting/_hierarchy.py
@@ -123,9 +123,9 @@ def visualize_hierarchy(
     indices = np.array([all_topics.index(topic) for topic in topics])
 
     # Select topic embeddings
-    embeddings = select_topic_representation(
-        topic_model.c_tf_idf_, topic_model.topic_embeddings_, use_ctfidf
-    )[0][indices]
+    embeddings = select_topic_representation(topic_model.c_tf_idf_, topic_model.topic_embeddings_, use_ctfidf)[0][
+        indices
+    ]
 
     # Annotations
     if hierarchical_topics is not None and len(topics) == len(freq_df.Topic.to_list()):
@@ -142,9 +142,7 @@ def visualize_hierarchy(
         annotations = None
 
     # wrap distance function to validate input and return a condensed distance matrix
-    distance_function_viz = lambda x: validate_distance_matrix(
-        distance_function(x), embeddings.shape[0]
-    )
+    distance_function_viz = lambda x: validate_distance_matrix(distance_function(x), embeddings.shape[0])
     # Create dendogram
     fig = ff.create_dendrogram(
         embeddings,
@@ -159,31 +157,20 @@ def visualize_hierarchy(
     axis = "yaxis" if orientation == "left" else "xaxis"
     if isinstance(custom_labels, str):
         new_labels = [
-            [[str(x), None]] + topic_model.topic_aspects_[custom_labels][x]
-            for x in fig.layout[axis]["ticktext"]
-        ]
-        new_labels = [
-            "_".join([label[0] for label in labels[:4]]) for labels in new_labels
-        ]
-        new_labels = [
-            label if len(label) < 30 else label[:27] + "..." for label in new_labels
+            [[str(x), None]] + topic_model.topic_aspects_[custom_labels][x] for x in fig.layout[axis]["ticktext"]
         ]
+        new_labels = ["_".join([label[0] for label in labels[:4]]) for labels in new_labels]
+        new_labels = [label if len(label) < 30 else label[:27] + "..." for label in new_labels]
     elif topic_model.custom_labels_ is not None and custom_labels:
         new_labels = [
-            topic_model.custom_labels_[topics[int(x)] + topic_model._outliers]
-            for x in fig.layout[axis]["ticktext"]
+            topic_model.custom_labels_[topics[int(x)] + topic_model._outliers] for x in fig.layout[axis]["ticktext"]
         ]
     else:
         new_labels = [
-            [[str(topics[int(x)]), None]] + topic_model.get_topic(topics[int(x)])
-            for x in fig.layout[axis]["ticktext"]
-        ]
-        new_labels = [
-            "_".join([label[0] for label in labels[:4]]) for labels in new_labels
-        ]
-        new_labels = [
-            label if len(label) < 30 else label[:27] + "..." for label in new_labels
+            [[str(topics[int(x)]), None]] + topic_model.get_topic(topics[int(x)]) for x in fig.layout[axis]["ticktext"]
         ]
+        new_labels = ["_".join([label[0] for label in labels[:4]]) for labels in new_labels]
+        new_labels = [label if len(label) < 30 else label[:27] + "..." for label in new_labels]
 
     # Stylize layout
     fig.update_layout(
@@ -222,21 +209,9 @@ def visualize_hierarchy(
     if hierarchical_topics is not None:
         for index in [0, 3]:
             axis = "x" if orientation == "left" else "y"
-            xs = [
-                data["x"][index]
-                for data in fig.data
-                if (data["text"] and data[axis][index] > 0)
-            ]
-            ys = [
-                data["y"][index]
-                for data in fig.data
-                if (data["text"] and data[axis][index] > 0)
-            ]
-            hovertext = [
-                data["text"][index]
-                for data in fig.data
-                if (data["text"] and data[axis][index] > 0)
-            ]
+            xs = [data["x"][index] for data in fig.data if (data["text"] and data[axis][index] > 0)]
+            ys = [data["y"][index] for data in fig.data if (data["text"] and data[axis][index] > 0)]
+            hovertext = [data["text"][index] for data in fig.data if (data["text"] and data[axis][index] > 0)]
 
             fig.add_trace(
                 go.Scatter(
@@ -322,18 +297,12 @@ def _get_annotations(
         if len(fst_topic) == 1:
             if isinstance(custom_labels, str):
                 fst_name = f"{fst_topic[0]}_" + "_".join(
-                    list(zip(*topic_model.topic_aspects_[custom_labels][fst_topic[0]]))[
-                        0
-                    ][:3]
+                    list(zip(*topic_model.topic_aspects_[custom_labels][fst_topic[0]]))[0][:3]
                 )
             elif topic_model.custom_labels_ is not None and custom_labels:
-                fst_name = topic_model.custom_labels_[
-                    fst_topic[0] + topic_model._outliers
-                ]
+                fst_name = topic_model.custom_labels_[fst_topic[0] + topic_model._outliers]
             else:
-                fst_name = "_".join(
-                    [word for word, _ in topic_model.get_topic(fst_topic[0])][:5]
-                )
+                fst_name = "_".join([word for word, _ in topic_model.get_topic(fst_topic[0])][:5])
         else:
             for key, value in parent_topic.items():
                 if set(value) == set(fst_topic):
@@ -342,18 +311,12 @@ def _get_annotations(
         if len(scnd_topic) == 1:
             if isinstance(custom_labels, str):
                 scnd_name = f"{scnd_topic[0]}_" + "_".join(
-                    list(
-                        zip(*topic_model.topic_aspects_[custom_labels][scnd_topic[0]])
-                    )[0][:3]
+                    list(zip(*topic_model.topic_aspects_[custom_labels][scnd_topic[0]]))[0][:3]
                 )
             elif topic_model.custom_labels_ is not None and custom_labels:
-                scnd_name = topic_model.custom_labels_[
-                    scnd_topic[0] + topic_model._outliers
-                ]
+                scnd_name = topic_model.custom_labels_[scnd_topic[0] + topic_model._outliers]
             else:
-                scnd_name = "_".join(
-                    [word for word, _ in topic_model.get_topic(scnd_topic[0])][:5]
-                )
+                scnd_name = "_".join([word for word, _ in topic_model.get_topic(scnd_topic[0])][:5])
         else:
             for key, value in parent_topic.items():
                 if set(value) == set(scnd_topic):
diff --git a/bertopic/plotting/_term_rank.py b/bertopic/plotting/_term_rank.py
index 5dc98a23..4043692b 100644
--- a/bertopic/plotting/_term_rank.py
+++ b/bertopic/plotting/_term_rank.py
@@ -69,9 +69,7 @@ def visualize_term_rank(
     topic_words = [topic_model.get_topic(topic) for topic in topic_ids]
 
     values = np.array([[value[1] for value in values] for values in topic_words])
-    indices = np.array(
-        [[value + 1 for value in range(len(values))] for values in topic_words]
-    )
+    indices = np.array([[value + 1 for value in range(len(values))] for values in topic_words])
 
     # Create figure
     lines = []
@@ -79,15 +77,11 @@ def visualize_term_rank(
         if not any(y > 1.5):
             # labels
             if isinstance(custom_labels, str):
-                label = f"{topic}_" + "_".join(
-                    list(zip(*topic_model.topic_aspects_[custom_labels][topic]))[0][:3]
-                )
+                label = f"{topic}_" + "_".join(list(zip(*topic_model.topic_aspects_[custom_labels][topic]))[0][:3])
             elif topic_model.custom_labels_ is not None and custom_labels:
                 label = topic_model.custom_labels_[topic + topic_model._outliers]
             else:
-                label = f"<b>Topic {topic}</b>:" + "_".join(
-                    [word[0] for word in topic_model.get_topic(topic)]
-                )
+                label = f"<b>Topic {topic}</b>:" + "_".join([word[0] for word in topic_model.get_topic(topic)])
                 label = label[:50]
 
             # line parameters
diff --git a/bertopic/plotting/_topics.py b/bertopic/plotting/_topics.py
index 8a14a34d..2e477d05 100644
--- a/bertopic/plotting/_topics.py
+++ b/bertopic/plotting/_topics.py
@@ -65,22 +65,13 @@ def visualize_topics(
     topic_list = sorted(topics)
     frequencies = [topic_model.topic_sizes_[topic] for topic in topic_list]
     if isinstance(custom_labels, str):
-        words = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in topic_list
-        ]
+        words = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in topic_list]
         words = ["_".join([label[0] for label in labels[:4]]) for labels in words]
         words = [label if len(label) < 30 else label[:27] + "..." for label in words]
     elif custom_labels and topic_model.custom_labels_ is not None:
-        words = [
-            topic_model.custom_labels_[topic + topic_model._outliers]
-            for topic in topic_list
-        ]
+        words = [topic_model.custom_labels_[topic + topic_model._outliers] for topic in topic_list]
     else:
-        words = [
-            " | ".join([word[0] for word in topic_model.get_topic(topic)[:5]])
-            for topic in topic_list
-        ]
+        words = [" | ".join([word[0] for word in topic_model.get_topic(topic)[:5]]) for topic in topic_list]
 
     # Embed c-TF-IDF into 2D
     all_topics = sorted(list(topic_model.get_topics().keys()))
@@ -96,13 +87,9 @@ def visualize_topics(
 
     if c_tfidf_used:
         embeddings = MinMaxScaler().fit_transform(embeddings)
-        embeddings = UMAP(
-            n_neighbors=2, n_components=2, metric="hellinger", random_state=42
-        ).fit_transform(embeddings)
+        embeddings = UMAP(n_neighbors=2, n_components=2, metric="hellinger", random_state=42).fit_transform(embeddings)
     else:
-        embeddings = UMAP(
-            n_neighbors=2, n_components=2, metric="cosine", random_state=42
-        ).fit_transform(embeddings)
+        embeddings = UMAP(n_neighbors=2, n_components=2, metric="cosine", random_state=42).fit_transform(embeddings)
 
     # Visualize with plotly
     df = pd.DataFrame(
@@ -117,18 +104,14 @@ def visualize_topics(
     return _plotly_topic_visualization(df, topic_list, title, width, height)
 
 
-def _plotly_topic_visualization(
-    df: pd.DataFrame, topic_list: List[str], title: str, width: int, height: int
-):
+def _plotly_topic_visualization(df: pd.DataFrame, topic_list: List[str], title: str, width: int, height: int):
     """Create plotly-based visualization of topics with a slider for topic selection."""
 
     def get_color(topic_selected):
         if topic_selected == -1:
             marker_color = ["#B0BEC5" for _ in topic_list]
         else:
-            marker_color = [
-                "red" if topic == topic_selected else "#B0BEC5" for topic in topic_list
-            ]
+            marker_color = ["red" if topic == topic_selected else "#B0BEC5" for topic in topic_list]
         return [{"marker.color": [marker_color]}]
 
     # Prepare figure range
@@ -152,9 +135,7 @@ def get_color(topic_selected):
         labels={"x": "", "y": ""},
         hover_data={"Topic": True, "Words": True, "Size": True, "x": False, "y": False},
     )
-    fig.update_traces(
-        marker=dict(color="#B0BEC5", line=dict(width=2, color="DarkSlateGrey"))
-    )
+    fig.update_traces(marker=dict(color="#B0BEC5", line=dict(width=2, color="DarkSlateGrey")))
 
     # Update hover order
     fig.update_traces(
@@ -168,10 +149,7 @@ def get_color(topic_selected):
     )
 
     # Create a slider for topic selection
-    steps = [
-        dict(label=f"Topic {topic}", method="update", args=get_color(topic))
-        for topic in topic_list
-    ]
+    steps = [dict(label=f"Topic {topic}", method="update", args=get_color(topic)) for topic in topic_list]
     sliders = [dict(active=0, pad={"t": 50}, steps=steps)]
 
     # Stylize layout
@@ -213,12 +191,8 @@ def get_color(topic_selected):
         y1=sum(y_range) / 2,
         line=dict(color="#9E9E9E", width=2),
     )
-    fig.add_annotation(
-        x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10
-    )
-    fig.add_annotation(
-        y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10
-    )
+    fig.add_annotation(x=x_range[0], y=sum(y_range) / 2, text="D1", showarrow=False, yshift=10)
+    fig.add_annotation(y=y_range[1], x=sum(x_range) / 2, text="D2", showarrow=False, xshift=10)
     fig.data = fig.data[::-1]
 
     return fig
diff --git a/bertopic/plotting/_topics_over_time.py b/bertopic/plotting/_topics_over_time.py
index 625a8cce..b8254421 100644
--- a/bertopic/plotting/_topics_over_time.py
+++ b/bertopic/plotting/_topics_over_time.py
@@ -73,34 +73,20 @@ def visualize_topics_over_time(
 
     # Prepare data
     if isinstance(custom_labels, str):
-        topic_names = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in topics
-        ]
-        topic_names = [
-            "_".join([label[0] for label in labels[:4]]) for labels in topic_names
-        ]
-        topic_names = [
-            label if len(label) < 30 else label[:27] + "..." for label in topic_names
-        ]
-        topic_names = {
-            key: topic_names[index]
-            for index, key in enumerate(topic_model.topic_labels_.keys())
-        }
+        topic_names = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in topics]
+        topic_names = ["_".join([label[0] for label in labels[:4]]) for labels in topic_names]
+        topic_names = [label if len(label) < 30 else label[:27] + "..." for label in topic_names]
+        topic_names = {key: topic_names[index] for index, key in enumerate(topic_model.topic_labels_.keys())}
     elif topic_model.custom_labels_ is not None and custom_labels:
         topic_names = {
-            key: topic_model.custom_labels_[key + topic_model._outliers]
-            for key, _ in topic_model.topic_labels_.items()
+            key: topic_model.custom_labels_[key + topic_model._outliers] for key, _ in topic_model.topic_labels_.items()
         }
     else:
         topic_names = {
-            key: value[:40] + "..." if len(value) > 40 else value
-            for key, value in topic_model.topic_labels_.items()
+            key: value[:40] + "..." if len(value) > 40 else value for key, value in topic_model.topic_labels_.items()
         }
     topics_over_time["Name"] = topics_over_time.Topic.map(topic_names)
-    data = topics_over_time.loc[
-        topics_over_time.Topic.isin(selected_topics), :
-    ].sort_values(["Topic", "Timestamp"])
+    data = topics_over_time.loc[topics_over_time.Topic.isin(selected_topics), :].sort_values(["Topic", "Timestamp"])
 
     # Add traces
     fig = go.Figure()
diff --git a/bertopic/plotting/_topics_per_class.py b/bertopic/plotting/_topics_per_class.py
index 5bb8cef4..cdf02ebb 100644
--- a/bertopic/plotting/_topics_per_class.py
+++ b/bertopic/plotting/_topics_per_class.py
@@ -73,29 +73,17 @@ def visualize_topics_per_class(
 
     # Prepare data
     if isinstance(custom_labels, str):
-        topic_names = [
-            [[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic]
-            for topic in topics
-        ]
-        topic_names = [
-            "_".join([label[0] for label in labels[:4]]) for labels in topic_names
-        ]
-        topic_names = [
-            label if len(label) < 30 else label[:27] + "..." for label in topic_names
-        ]
-        topic_names = {
-            key: topic_names[index]
-            for index, key in enumerate(topic_model.topic_labels_.keys())
-        }
+        topic_names = [[[str(topic), None]] + topic_model.topic_aspects_[custom_labels][topic] for topic in topics]
+        topic_names = ["_".join([label[0] for label in labels[:4]]) for labels in topic_names]
+        topic_names = [label if len(label) < 30 else label[:27] + "..." for label in topic_names]
+        topic_names = {key: topic_names[index] for index, key in enumerate(topic_model.topic_labels_.keys())}
     elif topic_model.custom_labels_ is not None and custom_labels:
         topic_names = {
-            key: topic_model.custom_labels_[key + topic_model._outliers]
-            for key, _ in topic_model.topic_labels_.items()
+            key: topic_model.custom_labels_[key + topic_model._outliers] for key, _ in topic_model.topic_labels_.items()
         }
     else:
         topic_names = {
-            key: value[:40] + "..." if len(value) > 40 else value
-            for key, value in topic_model.topic_labels_.items()
+            key: value[:40] + "..." if len(value) > 40 else value for key, value in topic_model.topic_labels_.items()
         }
     topics_per_class["Name"] = topics_per_class.Topic.map(topic_names)
     data = topics_per_class.loc[topics_per_class.Topic.isin(selected_topics), :]
diff --git a/bertopic/representation/__init__.py b/bertopic/representation/__init__.py
index 3c18305f..da0c6365 100644
--- a/bertopic/representation/__init__.py
+++ b/bertopic/representation/__init__.py
@@ -24,9 +24,7 @@
     from bertopic.representation._zeroshot import ZeroShotClassification
 except ModuleNotFoundError:
     msg = "`pip install bertopic` without `--no-deps` \n\n"
-    ZeroShotClassification = NotInstalled(
-        "ZeroShotClassification", "transformers", custom_msg=msg
-    )
+    ZeroShotClassification = NotInstalled("ZeroShotClassification", "transformers", custom_msg=msg)
 
 # OpenAI Generator
 try:
diff --git a/bertopic/representation/_cohere.py b/bertopic/representation/_cohere.py
index 64511daf..8ca31c8f 100644
--- a/bertopic/representation/_cohere.py
+++ b/bertopic/representation/_cohere.py
@@ -151,13 +151,8 @@ def extract_topics(
 
         # Generate using Cohere's Language Model
         updated_topics = {}
-        for topic, docs in tqdm(
-            repr_docs_mappings.items(), disable=not topic_model.verbose
-        ):
-            truncated_docs = [
-                truncate_document(topic_model, self.doc_length, self.tokenizer, doc)
-                for doc in docs
-            ]
+        for topic, docs in tqdm(repr_docs_mappings.items(), disable=not topic_model.verbose):
+            truncated_docs = [truncate_document(topic_model, self.doc_length, self.tokenizer, doc) for doc in docs]
             prompt = self._create_prompt(truncated_docs, topic, topics)
             self.prompts_.append(prompt)
 
diff --git a/bertopic/representation/_keybert.py b/bertopic/representation/_keybert.py
index 7d9d19e2..f91c01cc 100644
--- a/bertopic/representation/_keybert.py
+++ b/bertopic/representation/_keybert.py
@@ -84,10 +84,8 @@ def extract_topics(
             updated_topics: Updated topic representations
         """
         # We extract the top n representative documents per class
-        _, representative_docs, repr_doc_indices, _ = (
-            topic_model._extract_representative_docs(
-                c_tf_idf, documents, topics, self.nr_samples, self.nr_repr_docs
-            )
+        _, representative_docs, repr_doc_indices, _ = topic_model._extract_representative_docs(
+            c_tf_idf, documents, topics, self.nr_samples, self.nr_repr_docs
         )
 
         # We extract the top n words per class
@@ -95,9 +93,7 @@ def extract_topics(
 
         # We calculate the similarity between word and document embeddings and create
         # topic embeddings from the representative document embeddings
-        sim_matrix, words = self._extract_embeddings(
-            topic_model, topics, representative_docs, repr_doc_indices
-        )
+        sim_matrix, words = self._extract_embeddings(topic_model, topics, representative_docs, repr_doc_indices)
 
         # Find the best matching words based on the similarity matrix for each topic
         updated_topics = self._extract_top_words(words, topics, sim_matrix)
@@ -139,17 +135,12 @@ def _extract_candidate_words(
         # Get top 30 words per topic based on c-TF-IDF score
         topics = {
             label: [
-                (words[word_index], score)
-                if word_index is not None and score > 0
-                else ("", 0.00001)
+                (words[word_index], score) if word_index is not None and score > 0 else ("", 0.00001)
                 for word_index, score in zip(indices[index][::-1], scores[index][::-1])
             ]
             for index, label in enumerate(labels)
         }
-        topics = {
-            label: list(zip(*values[: self.nr_candidate_words]))[0]
-            for label, values in topics.items()
-        }
+        topics = {label: list(zip(*values[: self.nr_candidate_words]))[0] for label, values in topics.items()}
 
         return topics
 
@@ -177,18 +168,12 @@ def _extract_embeddings(
             vocab: The complete vocabulary of input documents
         """
         # Calculate representative docs embeddings and create topic embeddings
-        repr_embeddings = topic_model._extract_embeddings(
-            representative_docs, method="document", verbose=False
-        )
-        topic_embeddings = [
-            np.mean(repr_embeddings[i[0] : i[-1] + 1], axis=0) for i in repr_doc_indices
-        ]
+        repr_embeddings = topic_model._extract_embeddings(representative_docs, method="document", verbose=False)
+        topic_embeddings = [np.mean(repr_embeddings[i[0] : i[-1] + 1], axis=0) for i in repr_doc_indices]
 
         # Calculate word embeddings and extract best matching with updated topic_embeddings
         vocab = list(set([word for words in topics.values() for word in words]))
-        word_embeddings = topic_model._extract_embeddings(
-            vocab, method="document", verbose=False
-        )
+        word_embeddings = topic_model._extract_embeddings(vocab, method="document", verbose=False)
         sim = cosine_similarity(topic_embeddings, word_embeddings)
 
         return sim, vocab
@@ -216,14 +201,9 @@ def _extract_top_words(
         for i, topic in enumerate(labels):
             indices = [vocab.index(word) for word in topics[topic]]
             values = sim[:, indices][i]
-            word_indices = [
-                indices[index] for index in np.argsort(values)[-self.top_n_words :]
-            ]
+            word_indices = [indices[index] for index in np.argsort(values)[-self.top_n_words :]]
             updated_topics[topic] = [
-                (vocab[index], val)
-                for val, index in zip(
-                    np.sort(values)[-self.top_n_words :], word_indices
-                )
+                (vocab[index], val) for val, index in zip(np.sort(values)[-self.top_n_words :], word_indices)
             ][::-1]
 
         return updated_topics
diff --git a/bertopic/representation/_langchain.py b/bertopic/representation/_langchain.py
index ad92aef1..df5c4839 100644
--- a/bertopic/representation/_langchain.py
+++ b/bertopic/representation/_langchain.py
@@ -180,11 +180,7 @@ def extract_topics(
         # Generate label using langchain's batch functionality
         chain_docs: List[List[Document]] = [
             [
-                Document(
-                    page_content=truncate_document(
-                        topic_model, self.doc_length, self.tokenizer, doc
-                    )
-                )
+                Document(page_content=truncate_document(topic_model, self.doc_length, self.tokenizer, doc))
                 for doc in docs
             ]
             for docs in repr_docs_mappings.values()
@@ -199,16 +195,10 @@ def extract_topics(
                 prompt = self.prompt.replace("[KEYWORDS]", ", ".join(keywords))
                 prompts.append(prompt)
 
-            inputs = [
-                {"input_documents": docs, "question": prompt}
-                for docs, prompt in zip(chain_docs, prompts)
-            ]
+            inputs = [{"input_documents": docs, "question": prompt} for docs, prompt in zip(chain_docs, prompts)]
 
         else:
-            inputs = [
-                {"input_documents": docs, "question": self.prompt}
-                for docs in chain_docs
-            ]
+            inputs = [{"input_documents": docs, "question": self.prompt} for docs in chain_docs]
 
         # `self.chain` must return a dict with an `output_text` key
         # same output key as the `StuffDocumentsChain` returned by `load_qa_chain`
@@ -216,8 +206,7 @@ def extract_topics(
         labels = [output["output_text"].strip() for output in outputs]
 
         updated_topics = {
-            topic: [(label, 1)] + [("", 0) for _ in range(9)]
-            for topic, label in zip(repr_docs_mappings.keys(), labels)
+            topic: [(label, 1)] + [("", 0) for _ in range(9)] for topic, label in zip(repr_docs_mappings.keys(), labels)
         }
 
         return updated_topics
diff --git a/bertopic/representation/_llamacpp.py b/bertopic/representation/_llamacpp.py
index fa573463..83b18952 100644
--- a/bertopic/representation/_llamacpp.py
+++ b/bertopic/representation/_llamacpp.py
@@ -143,28 +143,18 @@ def extract_topics(
         )
 
         updated_topics = {}
-        for topic, docs in tqdm(
-            repr_docs_mappings.items(), disable=not topic_model.verbose
-        ):
+        for topic, docs in tqdm(repr_docs_mappings.items(), disable=not topic_model.verbose):
             # Prepare prompt
-            truncated_docs = [
-                truncate_document(topic_model, self.doc_length, self.tokenizer, doc)
-                for doc in docs
-            ]
+            truncated_docs = [truncate_document(topic_model, self.doc_length, self.tokenizer, doc) for doc in docs]
             prompt = self._create_prompt(truncated_docs, topic, topics)
             self.prompts_.append(prompt)
 
             # Extract result from generator and use that as label
             topic_description = self.model(prompt, **self.pipeline_kwargs)["choices"]
-            topic_description = [
-                (description["text"].replace(prompt, ""), 1)
-                for description in topic_description
-            ]
+            topic_description = [(description["text"].replace(prompt, ""), 1) for description in topic_description]
 
             if len(topic_description) < 10:
-                topic_description += [
-                    ("", 0) for _ in range(10 - len(topic_description))
-                ]
+                topic_description += [("", 0) for _ in range(10 - len(topic_description))]
 
             updated_topics[topic] = topic_description
 
diff --git a/bertopic/representation/_mmr.py b/bertopic/representation/_mmr.py
index 07a8dd13..b3b1b232 100644
--- a/bertopic/representation/_mmr.py
+++ b/bertopic/representation/_mmr.py
@@ -68,12 +68,10 @@ def extract_topics(
         updated_topics = {}
         for topic, topic_words in topics.items():
             words = [word[0] for word in topic_words]
-            word_embeddings = topic_model._extract_embeddings(
-                words, method="word", verbose=False
+            word_embeddings = topic_model._extract_embeddings(words, method="word", verbose=False)
+            topic_embedding = topic_model._extract_embeddings(" ".join(words), method="word", verbose=False).reshape(
+                1, -1
             )
-            topic_embedding = topic_model._extract_embeddings(
-                " ".join(words), method="word", verbose=False
-            ).reshape(1, -1)
             topic_words = mmr(
                 topic_embedding,
                 word_embeddings,
@@ -81,9 +79,7 @@ def extract_topics(
                 self.diversity,
                 self.top_n_words,
             )
-            updated_topics[topic] = [
-                (word, value) for word, value in topics[topic] if word in topic_words
-            ]
+            updated_topics[topic] = [(word, value) for word, value in topics[topic] if word in topic_words]
         return updated_topics
 
 
@@ -119,14 +115,10 @@ def mmr(
         # Extract similarities within candidates and
         # between candidates and selected keywords/phrases
         candidate_similarities = word_doc_similarity[candidates_idx, :]
-        target_similarities = np.max(
-            word_similarity[candidates_idx][:, keywords_idx], axis=1
-        )
+        target_similarities = np.max(word_similarity[candidates_idx][:, keywords_idx], axis=1)
 
         # Calculate MMR
-        mmr = (
-            1 - diversity
-        ) * candidate_similarities - diversity * target_similarities.reshape(-1, 1)
+        mmr = (1 - diversity) * candidate_similarities - diversity * target_similarities.reshape(-1, 1)
         mmr_idx = candidates_idx[np.argmax(mmr)]
 
         # Update keywords & candidates
diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index 35bdf1da..8fd25a1b 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -205,13 +205,8 @@ def extract_topics(
 
         # Generate using OpenAI's Language Model
         updated_topics = {}
-        for topic, docs in tqdm(
-            repr_docs_mappings.items(), disable=not topic_model.verbose
-        ):
-            truncated_docs = [
-                truncate_document(topic_model, self.doc_length, self.tokenizer, doc)
-                for doc in docs
-            ]
+        for topic, docs in tqdm(repr_docs_mappings.items(), disable=not topic_model.verbose):
+            truncated_docs = [truncate_document(topic_model, self.doc_length, self.tokenizer, doc) for doc in docs]
             prompt = self._create_prompt(truncated_docs, topic, topics)
             self.prompts_.append(prompt)
 
@@ -237,11 +232,7 @@ def extract_topics(
                 # Check whether content was actually generated
                 # Addresses #1570 for potential issues with OpenAI's content filter
                 if hasattr(response.choices[0].message, "content"):
-                    label = (
-                        response.choices[0]
-                        .message.content.strip()
-                        .replace("topic: ", "")
-                    )
+                    label = response.choices[0].message.content.strip().replace("topic: ", "")
                 else:
                     label = "No label returned"
             else:
@@ -253,9 +244,7 @@ def extract_topics(
                         **self.generator_kwargs,
                     )
                 else:
-                    response = self.client.completions.create(
-                        model=self.model, prompt=prompt, **self.generator_kwargs
-                    )
+                    response = self.client.completions.create(model=self.model, prompt=prompt, **self.generator_kwargs)
                 label = response.choices[0].text.strip()
 
             updated_topics[topic] = [(label, 1)]
diff --git a/bertopic/representation/_pos.py b/bertopic/representation/_pos.py
index 08139b53..3ac2815f 100644
--- a/bertopic/representation/_pos.py
+++ b/bertopic/representation/_pos.py
@@ -120,9 +120,7 @@ def extract_topics(
             candidate_documents = []
             for keyword in keywords:
                 selection = documents.loc[documents.Topic == topic, :]
-                selection = selection.loc[
-                    selection.Document.str.contains(keyword), "Document"
-                ]
+                selection = selection.loc[selection.Document.str.contains(keyword), "Document"]
                 if len(selection) > 0:
                     for document in selection[:2]:
                         candidate_documents.append(document)
@@ -150,27 +148,14 @@ def extract_topics(
 
         for topic, candidate_keywords in candidate_topics.items():
             word_indices = np.sort(
-                [
-                    words_lookup.get(keyword)
-                    for keyword in candidate_keywords
-                    if keyword in words_lookup
-                ]
+                [words_lookup.get(keyword) for keyword in candidate_keywords if keyword in words_lookup]
             )
             vals = topic_model.c_tf_idf_[:, word_indices][topic + topic_model._outliers]
-            indices = np.argsort(np.array(vals.todense().reshape(1, -1))[0])[
-                -self.top_n_words :
-            ][::-1]
-            vals = np.sort(np.array(vals.todense().reshape(1, -1))[0])[
-                -self.top_n_words :
-            ][::-1]
-            topic_words = [
-                (words[word_indices[index]], val) for index, val in zip(indices, vals)
-            ]
+            indices = np.argsort(np.array(vals.todense().reshape(1, -1))[0])[-self.top_n_words :][::-1]
+            vals = np.sort(np.array(vals.todense().reshape(1, -1))[0])[-self.top_n_words :][::-1]
+            topic_words = [(words[word_indices[index]], val) for index, val in zip(indices, vals)]
             updated_topics[topic] = topic_words
             if len(updated_topics[topic]) < self.top_n_words:
-                updated_topics[topic] += [
-                    ("", 0)
-                    for _ in range(self.top_n_words - len(updated_topics[topic]))
-                ]
+                updated_topics[topic] += [("", 0) for _ in range(self.top_n_words - len(updated_topics[topic]))]
 
         return updated_topics
diff --git a/bertopic/representation/_textgeneration.py b/bertopic/representation/_textgeneration.py
index 3bc3853a..b028e575 100644
--- a/bertopic/representation/_textgeneration.py
+++ b/bertopic/representation/_textgeneration.py
@@ -142,15 +142,10 @@ def extract_topics(
             repr_docs_mappings = {topic: None for topic in topics.keys()}
 
         updated_topics = {}
-        for topic, docs in tqdm(
-            repr_docs_mappings.items(), disable=not topic_model.verbose
-        ):
+        for topic, docs in tqdm(repr_docs_mappings.items(), disable=not topic_model.verbose):
             # Prepare prompt
             truncated_docs = (
-                [
-                    truncate_document(topic_model, self.doc_length, self.tokenizer, doc)
-                    for doc in docs
-                ]
+                [truncate_document(topic_model, self.doc_length, self.tokenizer, doc) for doc in docs]
                 if docs is not None
                 else docs
             )
@@ -160,14 +155,11 @@ def extract_topics(
             # Extract result from generator and use that as label
             topic_description = self.model(prompt, **self.pipeline_kwargs)
             topic_description = [
-                (description["generated_text"].replace(prompt, ""), 1)
-                for description in topic_description
+                (description["generated_text"].replace(prompt, ""), 1) for description in topic_description
             ]
 
             if len(topic_description) < 10:
-                topic_description += [
-                    ("", 0) for _ in range(10 - len(topic_description))
-                ]
+                topic_description += [("", 0) for _ in range(10 - len(topic_description))]
 
             updated_topics[topic] = topic_description
 
diff --git a/bertopic/representation/_utils.py b/bertopic/representation/_utils.py
index 00f157a5..2a99fd1f 100644
--- a/bertopic/representation/_utils.py
+++ b/bertopic/representation/_utils.py
@@ -85,9 +85,7 @@ def wrapper(*args, **kwargs):
 
                 # Check if max retries has been reached
                 if num_retries > max_retries:
-                    raise Exception(
-                        f"Maximum number of retries ({max_retries}) exceeded."
-                    )
+                    raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
 
                 # Increment the delay
                 delay *= exponential_base * (1 + jitter * random.random())
diff --git a/bertopic/representation/_visual.py b/bertopic/representation/_visual.py
index 897d7c9d..07968596 100644
--- a/bertopic/representation/_visual.py
+++ b/bertopic/representation/_visual.py
@@ -63,9 +63,7 @@ def __init__(
         if isinstance(image_to_text_model, Pipeline):
             self.image_to_text_model = image_to_text_model
         elif isinstance(image_to_text_model, str):
-            self.image_to_text_model = pipeline(
-                "image-to-text", model=image_to_text_model
-            )
+            self.image_to_text_model = pipeline("image-to-text", model=image_to_text_model)
         elif image_to_text_model is None:
             self.image_to_text_model = None
         else:
@@ -109,23 +107,17 @@ def extract_topics(
         for topic in tqdm(unique_topics):
             # Get and order represetnative images
             sliced_examplars = repr_docs_ids[topic + topic_model._outliers]
-            sliced_examplars = [
-                sliced_examplars[i : i + 3] for i in range(0, len(sliced_examplars), 3)
-            ]
+            sliced_examplars = [sliced_examplars[i : i + 3] for i in range(0, len(sliced_examplars), 3)]
             images_to_combine = [
                 [
-                    Image.open(images[index])
-                    if isinstance(images[index], str)
-                    else images[index]
+                    Image.open(images[index]) if isinstance(images[index], str) else images[index]
                     for index in sub_indices
                 ]
                 for sub_indices in sliced_examplars
             ]
 
             # Concatenate representative images
-            representative_image = get_concat_tile_resize(
-                images_to_combine, self.image_height, self.image_squares
-            )
+            representative_image = get_concat_tile_resize(images_to_combine, self.image_height, self.image_squares)
             representative_images[topic] = representative_image
 
             # Make sure to properly close images
@@ -136,9 +128,7 @@ def extract_topics(
 
         return representative_images
 
-    def _convert_image_to_text(
-        self, images: List[str], verbose: bool = False
-    ) -> List[str]:
+    def _convert_image_to_text(self, images: List[str], verbose: bool = False) -> List[str]:
         """Convert a list of images to captions.
 
         Arguments:
@@ -163,9 +153,7 @@ def _convert_image_to_text(
 
         return documents
 
-    def image_to_text(
-        self, documents: pd.DataFrame, embeddings: np.ndarray
-    ) -> pd.DataFrame:
+    def image_to_text(self, documents: pd.DataFrame, embeddings: np.ndarray) -> pd.DataFrame:
         """Convert images to text."""
         # Create image topic embeddings
         topics = documents.Topic.values.tolist()
@@ -193,10 +181,7 @@ def image_to_text(
         current_id = 0
         for topic, image_ids in tqdm(image_centroids.items()):
             selected_images = [
-                Image.open(images[index])
-                if isinstance(images[index], str)
-                else images[index]
-                for index in image_ids
+                Image.open(images[index]) if isinstance(images[index], str) else images[index] for index in image_ids
             ]
             text = self._convert_image_to_text(selected_images)
 
@@ -243,10 +228,7 @@ def get_concat_v_multi_resize(im_list):
     """Code adapted from: https://note.nkmk.me/en/python-pillow-concat-images/."""
     min_width = min(im.width for im in im_list)
     min_width = max(im.width for im in im_list)
-    im_list_resize = [
-        im.resize((min_width, int(im.height * min_width / im.width)), resample=0)
-        for im in im_list
-    ]
+    im_list_resize = [im.resize((min_width, int(im.height * min_width / im.width)), resample=0) for im in im_list]
     total_height = sum(im.height for im in im_list_resize)
     dst = Image.new("RGB", (min_width, total_height), (255, 255, 255))
     pos_y = 0
@@ -264,9 +246,7 @@ def get_concat_tile_resize(im_list_2d, image_height=600, image_squares=False):
     if image_squares:
         width = int(image_height / 3)
         height = int(image_height / 3)
-        images = [
-            [image.resize((width, height)) for image in images] for images in im_list_2d
-        ]
+        images = [[image.resize((width, height)) for image in images] for images in im_list_2d]
 
     # Resize images based on minimum size
     else:
@@ -280,9 +260,7 @@ def get_concat_tile_resize(im_list_2d, image_height=600, image_squares=False):
                         resample=0,
                     )
                 elif img.width > img.height:
-                    images[i][j] = img.resize(
-                        (min_width, int(img.height * min_width / img.width)), resample=0
-                    )
+                    images[i][j] = img.resize((min_width, int(img.height * min_width / img.width)), resample=0)
                 else:
                     images[i][j] = img.resize((min_width, min_width))
 
diff --git a/bertopic/representation/_zeroshot.py b/bertopic/representation/_zeroshot.py
index 7dff499b..5f67de9a 100644
--- a/bertopic/representation/_zeroshot.py
+++ b/bertopic/representation/_zeroshot.py
@@ -75,12 +75,8 @@ def extract_topics(
             updated_topics: Updated topic representations
         """
         # Classify topics
-        topic_descriptions = [
-            " ".join(list(zip(*topics[topic]))[0]) for topic in topics.keys()
-        ]
-        classifications = self.model(
-            topic_descriptions, self.candidate_topics, **self.pipeline_kwargs
-        )
+        topic_descriptions = [" ".join(list(zip(*topics[topic]))[0]) for topic in topics.keys()]
+        classifications = self.model(topic_descriptions, self.candidate_topics, **self.pipeline_kwargs)
 
         # Extract labels
         updated_topics = {}
@@ -90,25 +86,19 @@ def extract_topics(
             # Multi-label assignment
             if self.pipeline_kwargs.get("multi_label"):
                 topic_description = []
-                for label, score in zip(
-                    classification["labels"], classification["scores"]
-                ):
+                for label, score in zip(classification["labels"], classification["scores"]):
                     if score > self.min_prob:
                         topic_description.append((label, score))
 
             # Single label assignment
             elif classification["scores"][0] > self.min_prob:
-                topic_description = [
-                    (classification["labels"][0], classification["scores"][0])
-                ]
+                topic_description = [(classification["labels"][0], classification["scores"][0])]
 
             # Make sure that 10 items are returned
             if len(topic_description) == 0:
                 topic_description = topics[topic]
             elif len(topic_description) < 10:
-                topic_description += [
-                    ("", 0) for _ in range(10 - len(topic_description))
-                ]
+                topic_description += [("", 0) for _ in range(10 - len(topic_description))]
             updated_topics[topic] = topic_description
 
         return updated_topics
diff --git a/bertopic/vectorizers/_online_cv.py b/bertopic/vectorizers/_online_cv.py
index fedb363c..27387fa2 100644
--- a/bertopic/vectorizers/_online_cv.py
+++ b/bertopic/vectorizers/_online_cv.py
@@ -121,15 +121,11 @@ def update_bow(self, raw_documents: List[str]) -> csr_matrix:
             X = self.transform(raw_documents)
 
             # Add empty columns if new words are found
-            columns = csr_matrix(
-                (self.X_.shape[0], X.shape[1] - self.X_.shape[1]), dtype=int
-            )
+            columns = csr_matrix((self.X_.shape[0], X.shape[1] - self.X_.shape[1]), dtype=int)
             self.X_ = sparse.hstack([self.X_, columns])
 
             # Add empty rows if new topics are found
-            rows = csr_matrix(
-                (X.shape[0] - self.X_.shape[0], self.X_.shape[1]), dtype=int
-            )
+            rows = csr_matrix((X.shape[0] - self.X_.shape[0], self.X_.shape[1]), dtype=int)
             self.X_ = sparse.vstack([self.X_, rows])
 
             # Decay of BoW matrix
diff --git a/pyproject.toml b/pyproject.toml
index d0c1abfe..2dce9bc3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,7 +98,7 @@ include = ["bertopic*"]
 exclude = ["tests"]
 
 [tool.ruff]
-target-version = "py38"
+line-length = 120
 
 [tool.ruff.lint]
 select = [  
diff --git a/tests/conftest.py b/tests/conftest.py
index 95bcf738..3d8d49db 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -27,17 +27,15 @@ def document_embeddings(documents, embedding_model):
 
 @pytest.fixture(scope="session")
 def reduced_embeddings(document_embeddings):
-    reduced_embeddings = UMAP(
-        n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine"
-    ).fit_transform(document_embeddings)
+    reduced_embeddings = UMAP(n_neighbors=10, n_components=2, min_dist=0.0, metric="cosine").fit_transform(
+        document_embeddings
+    )
     return reduced_embeddings
 
 
 @pytest.fixture(scope="session")
 def documents():
-    newsgroup_docs = fetch_20newsgroups(
-        subset="all", remove=("headers", "footers", "quotes")
-    )["data"][:1000]
+    newsgroup_docs = fetch_20newsgroups(subset="all", remove=("headers", "footers", "quotes"))["data"][:1000]
     return newsgroup_docs
 
 
@@ -74,9 +72,7 @@ def zeroshot_topic_model(documents, document_embeddings, embedding_model):
 
 @pytest.fixture(scope="session")
 def custom_topic_model(documents, document_embeddings, embedding_model):
-    umap_model = UMAP(
-        n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42
-    )
+    umap_model = UMAP(n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42)
     hdbscan_model = HDBSCAN(
         min_cluster_size=3,
         metric="euclidean",
@@ -94,9 +90,7 @@ def custom_topic_model(documents, document_embeddings, embedding_model):
 
 @pytest.fixture(scope="session")
 def representation_topic_model(documents, document_embeddings, embedding_model):
-    umap_model = UMAP(
-        n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42
-    )
+    umap_model = UMAP(n_neighbors=15, n_components=6, min_dist=0.0, metric="cosine", random_state=42)
     hdbscan_model = HDBSCAN(
         min_cluster_size=3,
         metric="euclidean",
@@ -177,9 +171,7 @@ def online_topic_model(documents, document_embeddings, embedding_model):
 
     topics = []
     for index in range(0, len(documents), 50):
-        model.partial_fit(
-            documents[index : index + 50], document_embeddings[index : index + 50]
-        )
+        model.partial_fit(documents[index : index + 50], document_embeddings[index : index + 50])
         topics.extend(model.topics_)
     model.topics_ = topics
     return model
diff --git a/tests/test_bertopic.py b/tests/test_bertopic.py
index 73614e1b..3bcc6cbb 100644
--- a/tests/test_bertopic.py
+++ b/tests/test_bertopic.py
@@ -75,13 +75,9 @@ def test_full_model(model, documents, request):
     # Test zero-shot topic modeling
     if topic_model._is_zeroshot():
         if topic_model._outliers:
-            assert set(topic_model.topic_labels_.keys()) == set(
-                range(-1, len(topic_model.topic_labels_) - 1)
-            )
+            assert set(topic_model.topic_labels_.keys()) == set(range(-1, len(topic_model.topic_labels_) - 1))
         else:
-            assert set(topic_model.topic_labels_.keys()) == set(
-                range(len(topic_model.topic_labels_))
-            )
+            assert set(topic_model.topic_labels_.keys()) == set(range(len(topic_model.topic_labels_)))
 
     # Test topics over time
     timestamps = [i % 10 for i in range(len(documents))]
@@ -130,9 +126,7 @@ def test_full_model(model, documents, request):
         assert topic != original_topic
 
     # Test updating topic labels
-    topic_labels = topic_model.generate_topic_labels(
-        nr_words=3, topic_prefix=False, word_length=10, separator=", "
-    )
+    topic_labels = topic_model.generate_topic_labels(nr_words=3, topic_prefix=False, word_length=10, separator=", ")
     assert len(topic_labels) == len(set(topic_model.topics_))
 
     # Test setting topic labels
@@ -148,9 +142,7 @@ def test_full_model(model, documents, request):
     # Test reduction of outliers
     if -1 in topics:
         new_topics = topic_model.reduce_outliers(documents, topics, threshold=0.0)
-        nr_outliers_topic_model = sum(
-            [1 for topic in topic_model.topics_ if topic == -1]
-        )
+        nr_outliers_topic_model = sum([1 for topic in topic_model.topics_ if topic == -1])
         nr_outliers_new_topics = sum([1 for topic in new_topics if topic == -1])
 
         if topic_model._outliers == 1:
diff --git a/tests/test_plotting/test_approximate.py b/tests/test_plotting/test_approximate.py
index 2de86848..1b0a78eb 100644
--- a/tests/test_plotting/test_approximate.py
+++ b/tests/test_plotting/test_approximate.py
@@ -18,28 +18,17 @@ def test_approximate_distribution(batch_size, padding, model, documents, request
     topic_model = copy.deepcopy(request.getfixturevalue(model))
 
     # Calculate only on a document-level based on tokensets
-    topic_distr, _ = topic_model.approximate_distribution(
-        documents, padding=padding, batch_size=batch_size
-    )
-    assert (
-        topic_distr.shape[1] == len(topic_model.topic_labels_) - topic_model._outliers
-    )
+    topic_distr, _ = topic_model.approximate_distribution(documents, padding=padding, batch_size=batch_size)
+    assert topic_distr.shape[1] == len(topic_model.topic_labels_) - topic_model._outliers
 
     # Use the distribution visualization
     for i in range(3):
         topic_model.visualize_distribution(topic_distr[i])
 
     # Calculate distribution on a token-level
-    topic_distr, topic_token_distr = topic_model.approximate_distribution(
-        documents[:100], calculate_tokens=True
-    )
-    assert (
-        topic_distr.shape[1] == len(topic_model.topic_labels_) - topic_model._outliers
-    )
+    topic_distr, topic_token_distr = topic_model.approximate_distribution(documents[:100], calculate_tokens=True)
+    assert topic_distr.shape[1] == len(topic_model.topic_labels_) - topic_model._outliers
     assert len(topic_token_distr) == len(documents[:100])
 
     for token_distr in topic_token_distr:
-        assert (
-            token_distr.shape[1]
-            == len(topic_model.topic_labels_) - topic_model._outliers
-        )
+        assert token_distr.shape[1] == len(topic_model.topic_labels_) - topic_model._outliers
diff --git a/tests/test_plotting/test_documents.py b/tests/test_plotting/test_documents.py
index 81acbe4c..8d94767b 100644
--- a/tests/test_plotting/test_documents.py
+++ b/tests/test_plotting/test_documents.py
@@ -17,8 +17,6 @@ def test_documents(model, reduced_embeddings, documents, request):
     topics = set(topic_model.topics_)
     if -1 in topics:
         topics.remove(-1)
-    fig = topic_model.visualize_documents(
-        documents, embeddings=reduced_embeddings, hide_document_hover=True
-    )
+    fig = topic_model.visualize_documents(documents, embeddings=reduced_embeddings, hide_document_hover=True)
     fig_topics = [int(data["name"].split("_")[0]) for data in fig.to_dict()["data"][1:]]
     assert set(fig_topics) == topics
diff --git a/tests/test_plotting/test_dynamic.py b/tests/test_plotting/test_dynamic.py
index 361702b1..6551da52 100644
--- a/tests/test_plotting/test_dynamic.py
+++ b/tests/test_plotting/test_dynamic.py
@@ -19,7 +19,4 @@ def test_dynamic(model, documents, request):
     topics_over_time = topic_model.topics_over_time(documents, timestamps)
     fig = topic_model.visualize_topics_over_time(topics_over_time)
 
-    assert (
-        len(fig.to_dict()["data"])
-        == len(set(topic_model.topics_)) - topic_model._outliers
-    )
+    assert len(fig.to_dict()["data"]) == len(set(topic_model.topics_)) - topic_model._outliers
diff --git a/tests/test_plotting/test_term_rank.py b/tests/test_plotting/test_term_rank.py
index 318d7d3c..67015d05 100644
--- a/tests/test_plotting/test_term_rank.py
+++ b/tests/test_plotting/test_term_rank.py
@@ -2,9 +2,7 @@
 import pytest
 
 
-@pytest.mark.parametrize(
-    "model", [("kmeans_pca_topic_model"), ("base_topic_model"), ("custom_topic_model")]
-)
+@pytest.mark.parametrize("model", [("kmeans_pca_topic_model"), ("base_topic_model"), ("custom_topic_model")])
 def test_term_rank(model, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     topic_model.visualize_term_rank()
diff --git a/tests/test_reduction/test_merge.py b/tests/test_reduction/test_merge.py
index b69ee3cd..67bf9934 100644
--- a/tests/test_reduction/test_merge.py
+++ b/tests/test_reduction/test_merge.py
@@ -19,9 +19,7 @@ def test_merge(model, documents, request):
 
     topics_to_merge = [1, 2]
     topic_model.merge_topics(documents, topics_to_merge)
-    mappings = topic_model.topic_mapper_.get_mappings(
-        list(topic_model.hdbscan_model.labels_)
-    )
+    mappings = topic_model.topic_mapper_.get_mappings(list(topic_model.hdbscan_model.labels_))
     mapped_labels = [mappings[label] for label in topic_model.hdbscan_model.labels_]
 
     assert nr_topics == len(set(topic_model.topics_)) + 1
@@ -33,9 +31,7 @@ def test_merge(model, documents, request):
 
     topics_to_merge = [1, 2]
     topic_model.merge_topics(documents, topics_to_merge)
-    mappings = topic_model.topic_mapper_.get_mappings(
-        list(topic_model.hdbscan_model.labels_)
-    )
+    mappings = topic_model.topic_mapper_.get_mappings(list(topic_model.hdbscan_model.labels_))
     mapped_labels = [mappings[label] for label in topic_model.hdbscan_model.labels_]
 
     assert nr_topics == len(set(topic_model.topics_)) + 2
diff --git a/tests/test_representation/test_representations.py b/tests/test_representation/test_representations.py
index 98b8f4dd..7c819964 100644
--- a/tests/test_representation/test_representations.py
+++ b/tests/test_representation/test_representations.py
@@ -151,9 +151,7 @@ def test_topic_reduction_edge_cases(model, documents, request):
     topic_model.nr_topics = 100
     nr_topics = 5
     topics = np.random.randint(-1, nr_topics - 1, len(documents))
-    old_documents = pd.DataFrame(
-        {"Document": documents, "ID": range(len(documents)), "Topic": topics}
-    )
+    old_documents = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": topics})
     topic_model._update_topic_size(old_documents)
     topic_model._extract_topics(old_documents)
     old_freq = topic_model.get_topic_freq()
diff --git a/tests/test_sub_models/test_cluster.py b/tests/test_sub_models/test_cluster.py
index 6115d08e..265f6f78 100644
--- a/tests/test_sub_models/test_cluster.py
+++ b/tests/test_sub_models/test_cluster.py
@@ -21,13 +21,9 @@
     ],
 )
 def test_hdbscan_cluster_embeddings(cluster_model, samples, features, centers):
-    embeddings, _ = make_blobs(
-        n_samples=samples, centers=centers, n_features=features, random_state=42
-    )
+    embeddings, _ = make_blobs(n_samples=samples, centers=centers, n_features=features, random_state=42)
     documents = [str(i + 1) for i in range(embeddings.shape[0])]
-    old_df = pd.DataFrame(
-        {"Document": documents, "ID": range(len(documents)), "Topic": None}
-    )
+    old_df = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": None})
 
     if cluster_model == "kmeans":
         cluster_model = KMeans(n_clusters=centers)
@@ -44,9 +40,7 @@ def test_hdbscan_cluster_embeddings(cluster_model, samples, features, centers):
 
     assert len(new_df.Topic.unique()) == centers
     assert "Topic" in new_df.columns
-    pd.testing.assert_frame_equal(
-        old_df.drop("Topic", axis=1), new_df.drop("Topic", axis=1)
-    )
+    pd.testing.assert_frame_equal(old_df.drop("Topic", axis=1), new_df.drop("Topic", axis=1))
 
 
 @pytest.mark.parametrize("cluster_model", ["hdbscan", "kmeans"])
@@ -62,13 +56,9 @@ def test_hdbscan_cluster_embeddings(cluster_model, samples, features, centers):
     ],
 )
 def test_custom_hdbscan_cluster_embeddings(cluster_model, samples, features, centers):
-    embeddings, _ = make_blobs(
-        n_samples=samples, centers=centers, n_features=features, random_state=42
-    )
+    embeddings, _ = make_blobs(n_samples=samples, centers=centers, n_features=features, random_state=42)
     documents = [str(i + 1) for i in range(embeddings.shape[0])]
-    old_df = pd.DataFrame(
-        {"Document": documents, "ID": range(len(documents)), "Topic": None}
-    )
+    old_df = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": None})
     if cluster_model == "kmeans":
         cluster_model = KMeans(n_clusters=centers)
     else:
@@ -84,6 +74,4 @@ def test_custom_hdbscan_cluster_embeddings(cluster_model, samples, features, cen
 
     assert len(new_df.Topic.unique()) == centers
     assert "Topic" in new_df.columns
-    pd.testing.assert_frame_equal(
-        old_df.drop("Topic", axis=1), new_df.drop("Topic", axis=1)
-    )
+    pd.testing.assert_frame_equal(old_df.drop("Topic", axis=1), new_df.drop("Topic", axis=1))
diff --git a/tests/test_sub_models/test_embeddings.py b/tests/test_sub_models/test_embeddings.py
index 22f53539..75735607 100644
--- a/tests/test_sub_models/test_embeddings.py
+++ b/tests/test_sub_models/test_embeddings.py
@@ -19,9 +19,7 @@
 def test_extract_embeddings(model, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     single_embedding = topic_model._extract_embeddings("a document")
-    multiple_embeddings = topic_model._extract_embeddings(
-        ["something different", "another document"]
-    )
+    multiple_embeddings = topic_model._extract_embeddings(["something different", "another document"])
     sim_matrix = cosine_similarity(single_embedding, multiple_embeddings)[0]
 
     assert single_embedding.shape[0] == 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2974b1b6..90876e76 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -41,15 +41,9 @@ def test_check_embeddings_shape():
 
 def test_make_unique_distances():
     def check_dists(dists: List[float], noise_max: float):
-        unique_dists = get_unique_distances(
-            np.array(dists, dtype=float), noise_max=noise_max
-        )
-        assert len(unique_dists) == len(
-            dists
-        ), "The number of elements must be the same"
-        assert len(dists) == len(
-            np.unique(unique_dists)
-        ), "The distances must be unique"
+        unique_dists = get_unique_distances(np.array(dists, dtype=float), noise_max=noise_max)
+        assert len(unique_dists) == len(dists), "The number of elements must be the same"
+        assert len(dists) == len(np.unique(unique_dists)), "The distances must be unique"
 
     check_dists([0, 0, 0.5, 0.75, 1, 1], noise_max=1e-7)
 
@@ -69,44 +63,32 @@ def test_select_topic_representation():
     topic_embeddings = np.array([[2, 2, 2]])
 
     # Use topic embeddings
-    repr_, ctfidf_used = select_topic_representation(
-        ctfidf_embeddings, topic_embeddings, use_ctfidf=False
-    )
+    repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=False)
     np.testing.assert_array_equal(topic_embeddings, repr_)
     assert not ctfidf_used
 
     # Fallback to c-TF-IDF
-    repr_, ctfidf_used = select_topic_representation(
-        ctfidf_embeddings, None, use_ctfidf=False
-    )
+    repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, None, use_ctfidf=False)
     np.testing.assert_array_equal(ctfidf_embeddings, repr_)
     assert ctfidf_used
 
     # Use c-TF-IDF
-    repr_, ctfidf_used = select_topic_representation(
-        ctfidf_embeddings, topic_embeddings, use_ctfidf=True
-    )
+    repr_, ctfidf_used = select_topic_representation(ctfidf_embeddings, topic_embeddings, use_ctfidf=True)
     np.testing.assert_array_equal(ctfidf_embeddings, repr_)
     assert ctfidf_used
 
     # Fallback to topic embeddings
-    repr_, ctfidf_used = select_topic_representation(
-        None, topic_embeddings, use_ctfidf=True
-    )
+    repr_, ctfidf_used = select_topic_representation(None, topic_embeddings, use_ctfidf=True)
     np.testing.assert_array_equal(topic_embeddings, repr_)
     assert not ctfidf_used
 
     # `scipy.sparse.csr_matrix` can be used as c-TF-IDF embeddings
     np.testing.assert_array_equal(
         ctfidf_embeddings,
-        select_topic_representation(
-            ctfidf_embeddings_sparse, None, use_ctfidf=True, output_ndarray=True
-        )[0],
+        select_topic_representation(ctfidf_embeddings_sparse, None, use_ctfidf=True, output_ndarray=True)[0],
     )
 
     # check that `csr_matrix` is not casted to `np.ndarray` when `ctfidf_as_ndarray` is False
-    repr_ = select_topic_representation(
-        ctfidf_embeddings_sparse, None, output_ndarray=False
-    )[0]
+    repr_ = select_topic_representation(ctfidf_embeddings_sparse, None, output_ndarray=False)[0]
 
     assert isinstance(repr_, csr_matrix)
diff --git a/tests/test_variations/test_class.py b/tests/test_variations/test_class.py
index a94c108d..5c969b51 100644
--- a/tests/test_variations/test_class.py
+++ b/tests/test_variations/test_class.py
@@ -18,12 +18,8 @@
 )
 def test_class(model, documents, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
-    topics_per_class_global = topic_model.topics_per_class(
-        documents, classes=classes, global_tuning=True
-    )
-    topics_per_class_local = topic_model.topics_per_class(
-        documents, classes=classes, global_tuning=False
-    )
+    topics_per_class_global = topic_model.topics_per_class(documents, classes=classes, global_tuning=True)
+    topics_per_class_local = topic_model.topics_per_class(documents, classes=classes, global_tuning=False)
 
     assert topics_per_class_global.Frequency.sum() == len(documents)
     assert topics_per_class_local.Frequency.sum() == len(documents)
diff --git a/tests/test_variations/test_hierarchy.py b/tests/test_variations/test_hierarchy.py
index cdfdaf8d..1ac7091d 100644
--- a/tests/test_variations/test_hierarchy.py
+++ b/tests/test_variations/test_hierarchy.py
@@ -36,9 +36,7 @@ def test_hierarchy(model, documents, request):
 def test_linkage(model, documents, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     linkage_function = lambda x: sch.linkage(x, "single", optimal_ordering=True)
-    hierarchical_topics = topic_model.hierarchical_topics(
-        documents, linkage_function=linkage_function
-    )
+    hierarchical_topics = topic_model.hierarchical_topics(documents, linkage_function=linkage_function)
     merged_topics = set([v for vals in hierarchical_topics.Topics.values for v in vals])
     tree = topic_model.get_topic_tree(hierarchical_topics)
 
@@ -61,9 +59,7 @@ def test_linkage(model, documents, request):
 def test_tree(model, documents, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     linkage_function = lambda x: sch.linkage(x, "single", optimal_ordering=True)
-    hierarchical_topics = topic_model.hierarchical_topics(
-        documents, linkage_function=linkage_function
-    )
+    hierarchical_topics = topic_model.hierarchical_topics(documents, linkage_function=linkage_function)
     merged_topics = set([v for vals in hierarchical_topics.Topics.values for v in vals])
     tree = topic_model.get_topic_tree(hierarchical_topics)
 
diff --git a/tests/test_vectorizers/test_ctfidf.py b/tests/test_vectorizers/test_ctfidf.py
index a6cedccd..5d2626b6 100644
--- a/tests/test_vectorizers/test_ctfidf.py
+++ b/tests/test_vectorizers/test_ctfidf.py
@@ -23,12 +23,8 @@
 def test_ctfidf(model, documents, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     topics = topic_model.topics_
-    documents = pd.DataFrame(
-        {"Document": documents, "ID": range(len(documents)), "Topic": topics}
-    )
-    documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
-        {"Document": " ".join}
-    )
+    documents = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": topics})
+    documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
     documents = topic_model._preprocess_text(documents_per_topic.Document.values)
     count = topic_model.vectorizer_model.fit(documents)
 
@@ -74,12 +70,8 @@ def test_ctfidf_custom_cv(model, documents, request):
     topic_model = copy.deepcopy(request.getfixturevalue(model))
     topic_model.vectorizer_model = cv
     topics = topic_model.topics_
-    documents = pd.DataFrame(
-        {"Document": documents, "ID": range(len(documents)), "Topic": topics}
-    )
-    documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
-        {"Document": " ".join}
-    )
+    documents = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": topics})
+    documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
     documents = topic_model._preprocess_text(documents_per_topic.Document.values)
     count = topic_model.vectorizer_model.fit(documents)