From 4d77dceabe97b05c6e79b288b94b2dbf6baa385f Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Mon, 22 Jul 2024 10:33:02 +0200
Subject: [PATCH] Bump versions again to get LangChain extractors

---
 libs/e2e-tests/pyproject.llamaindex.toml      |   6 +-
 libs/langchain/pyproject.toml                 |   6 +-
 .../graph_store/cassandra.py                  |   1 -
 .../extractors/gliner_link_extractor.py       |  66 ++-------
 .../extractors/hierarchy_link_extractor.py    |  70 ++--------
 .../extractors/html_link_extractor.py         | 128 ++----------------
 .../extractors/keybert_link_extractor.py      |  72 ++--------
 .../graph_store/extractors/link_extractor.py  |  40 +-----
 .../extractors/link_extractor_adapter.py      |  28 +---
 .../extractors/link_extractor_transformer.py  |   3 +-
 .../unit_tests/test_gliner_link_extractor.py  |   2 +-
 .../test_hierarchy_link_extractor.py          |  83 ------------
 .../unit_tests/test_html_link_extractor.py    | 106 ---------------
 .../unit_tests/test_keybert_link_extractor.py |   2 +-
 .../test_link_extractor_transformer.py        |  39 ++++--
 15 files changed, 80 insertions(+), 572 deletions(-)
 delete mode 100644 libs/langchain/tests/unit_tests/test_hierarchy_link_extractor.py
 delete mode 100644 libs/langchain/tests/unit_tests/test_html_link_extractor.py

diff --git a/libs/e2e-tests/pyproject.llamaindex.toml b/libs/e2e-tests/pyproject.llamaindex.toml
index 5729a3266..939dd1422 100644
--- a/libs/e2e-tests/pyproject.llamaindex.toml
+++ b/libs/e2e-tests/pyproject.llamaindex.toml
@@ -42,9 +42,9 @@ llama-index-multi-modal-llms-gemini = { git = "https://github.com/run-llama/llam
 
 llama-parse = { git = "https://github.com/run-llama/llama_parse.git", branch = "main" }
 
-langchain = "0.2.7"
-langchain-core = "0.2.12"
-langchain-community = "0.2.7"
+langchain = "0.2.10"
+langchain-core = "0.2.22"
+langchain-community = "0.2.9"
 langchain-astradb = "0.3.3"
 langchain-openai = "0.1.8"
 langchain-google-genai = { version = "1.0.6" }
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index d9c72a43a..56e37f827 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -18,9 +18,9 @@ ragstack-ai-colbert = { version = "1.0.5", optional = true }
 ragstack-ai-knowledge-store = { version = "0.1.0", optional = true }
 
 # langchain
-langchain = "0.2.7"
-langchain-core = "0.2.12"
-langchain-community = "0.2.7"
+langchain = "0.2.10"
+langchain-core = "0.2.22"
+langchain-community = "0.2.9"
 langchain-astradb = "0.3.3"
 langchain-openai = "0.1.8"
 langchain-google-genai = { version = "1.0.6", optional = true }
diff --git a/libs/langchain/ragstack_langchain/graph_store/cassandra.py b/libs/langchain/ragstack_langchain/graph_store/cassandra.py
index 9e11e2148..1a73456be 100644
--- a/libs/langchain/ragstack_langchain/graph_store/cassandra.py
+++ b/libs/langchain/ragstack_langchain/graph_store/cassandra.py
@@ -2,7 +2,6 @@
     CassandraGraphVectorStore as CassandraGraphStore,
 )
 
-
 __all__ = [
     "CassandraGraphStore",
 ]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/gliner_link_extractor.py b/libs/langchain/ragstack_langchain/graph_store/extractors/gliner_link_extractor.py
index fe980053f..380a90c55 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/gliner_link_extractor.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/gliner_link_extractor.py
@@ -1,57 +1,9 @@
-from typing import Any, Dict, Iterable, List, Optional, Set
-
-from langchain_core.graph_vectorstores import Link
-
-from ragstack_langchain.graph_store.extractors.link_extractor import LinkExtractor
-
-# TypeAlias is not available in Python 2.9, we can't use that or the newer `type`.
-GLiNERInput = str
-
-
-class GLiNERLinkExtractor(LinkExtractor[GLiNERInput]):
-    def __init__(
-        self,
-        labels: List[str],
-        *,
-        kind: str = "entity",
-        model: str = "urchade/gliner_mediumv2.1",
-        extract_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        """Extract keywords using GLiNER.
-
-        Args:
-            kind: Kind of links to produce with this extractor.
-            labels: List of kinds of entities to extract.
-            model: GLiNER model to use.
-            extract_kwargs: Keyword arguments to pass to GLiNER.
-        """
-        try:
-            from gliner import GLiNER
-
-            self._model = GLiNER.from_pretrained(model)
-
-        except ImportError:
-            raise ImportError(
-                "gliner is required for GLiNERLinkExtractor. "
-                "Please install it with `pip install gliner`."
-            ) from None
-
-        self._labels = labels
-        self._kind = kind
-        self._extract_kwargs = extract_kwargs or {}
-
-    def extract_one(self, input: GLiNERInput) -> Set[Link]:  # noqa: A002
-        return next(self.extract_many([input]))
-
-    def extract_many(
-        self,
-        inputs: Iterable[GLiNERInput],
-    ) -> Iterable[Set[Link]]:
-        strs = [i if isinstance(i, str) else i.page_content for i in inputs]
-        for entities in self._model.batch_predict_entities(
-            strs, self._labels, **self._extract_kwargs
-        ):
-            yield {
-                Link.bidir(kind=f"{self._kind}:{e['label']}", tag=e["text"])
-                for e in entities
-            }
+from langchain_community.graph_vectorstores.extractors import (
+    GLiNERInput,
+    GLiNERLinkExtractor,
+)
+
+__all__ = [
+    "GLiNERInput",
+    "GLiNERLinkExtractor",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/hierarchy_link_extractor.py b/libs/langchain/ragstack_langchain/graph_store/extractors/hierarchy_link_extractor.py
index 077564d9c..9cabd0af2 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/hierarchy_link_extractor.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/hierarchy_link_extractor.py
@@ -1,61 +1,9 @@
-from typing import Callable, List, Set
-
-from langchain_core.documents import Document
-from langchain_core.graph_vectorstores import Link
-
-from .link_extractor import LinkExtractor
-from .link_extractor_adapter import LinkExtractorAdapter
-
-# TypeAlias is not available in Python 2.9, we can't use that or the newer `type`.
-HierarchyInput = List[str]
-
-
-class HierarchyLinkExtractor(LinkExtractor[HierarchyInput]):
-    def __init__(
-        self,
-        kind: str = "hierarchy",
-        up_links: bool = True,
-        down_links: bool = False,
-        sibling_links: bool = False,
-    ):
-        """Extract links from a document hierarchy.
-
-        Args:
-            kind: Kind of links to produce with this extractor.
-            up_links: Link from a section to it's parent.
-            down_links: Link from a section to it's children.
-            sibling_links: Link from a section to other sections with the same parent.
-        """
-        self._kind = kind
-        self._up_links = up_links
-        self._down_links = down_links
-        self._sibling_links = sibling_links
-
-    def as_document_extractor(
-        self, hierarchy: Callable[[Document], HierarchyInput]
-    ) -> LinkExtractor[Document]:
-        return LinkExtractorAdapter(underlying=self, transform=hierarchy)
-
-    def extract_one(
-        self,
-        input: HierarchyInput,  # noqa: A002
-    ) -> Set[Link]:
-        this_path = "/".join(input)
-        parent_path = None
-
-        links = set()
-        if self._up_links:
-            links.add(Link.incoming(kind=self._kind, tag=f"up:{this_path}"))
-        if self._down_links:
-            links.add(Link.outgoing(kind=self._kind, tag=f"down:{this_path}"))
-
-        if len(input) >= 1:
-            parent_path = "/".join(input[0:-1])
-            if self._up_links and len(input) > 1:
-                links.add(Link.outgoing(kind=self._kind, tag=f"up:{parent_path}"))
-            if self._down_links and len(input) > 1:
-                links.add(Link.incoming(kind=self._kind, tag=f"down:{parent_path}"))
-            if self._sibling_links:
-                links.add(Link.bidir(kind=self._kind, tag=f"sib:{parent_path}"))
-
-        return links
+from langchain_community.graph_vectorstores.extractors import (
+    HierarchyInput,
+    HierarchyLinkExtractor,
+)
+
+__all__ = [
+    "HierarchyInput",
+    "HierarchyLinkExtractor",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/html_link_extractor.py b/libs/langchain/ragstack_langchain/graph_store/extractors/html_link_extractor.py
index c19180b76..d51add4c5 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/html_link_extractor.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/html_link_extractor.py
@@ -1,119 +1,9 @@
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Set, Union
-from urllib.parse import urldefrag, urljoin, urlparse
-
-from langchain_core.documents import Document
-from langchain_core.graph_vectorstores import Link
-
-from .link_extractor import LinkExtractor
-from .link_extractor_adapter import LinkExtractorAdapter
-
-if TYPE_CHECKING:
-    from bs4 import BeautifulSoup
-
-
-def _parse_url(link, page_url, drop_fragments: bool = True):
-    href = link.get("href")
-    if href is None:
-        return None
-    url = urlparse(href)
-    if url.scheme not in ["http", "https", ""]:
-        return None
-
-    # Join the HREF with the page_url to convert relative paths to absolute.
-    url = urljoin(page_url, href)
-
-    # Fragments would be useful if we chunked a page based on section.
-    # Then, each chunk would have a different URL based on the fragment.
-    # Since we aren't doing that yet, they just "break" links. So, drop
-    # the fragment.
-    if drop_fragments:
-        return urldefrag(url).url
-    return url
-
-
-def _parse_hrefs(
-    soup: "BeautifulSoup", url: str, drop_fragments: bool = True
-) -> Set[str]:
-    links = soup.find_all("a")
-    links = {
-        _parse_url(link, page_url=url, drop_fragments=drop_fragments) for link in links
-    }
-
-    # Remove entries for any 'a' tag that failed to parse (didn't have href,
-    # or invalid domain, etc.)
-    links.discard(None)
-
-    # Remove self links.
-    links.discard(url)
-
-    return links
-
-
-@dataclass
-class HtmlInput:
-    content: Union[str, "BeautifulSoup"]
-    base_url: str
-
-
-class HtmlLinkExtractor(LinkExtractor[HtmlInput]):
-    def __init__(self, *, kind: str = "hyperlink", drop_fragments: bool = True):
-        """Extract hyperlinks from HTML content.
-
-        Expects the input to be an HTML string or a `BeautifulSoup` object.
-
-        Args:
-            kind: The kind of edge to extract. Defaults to "hyperlink".
-            drop_fragments: Whether fragments in URLs and links shoud be
-                dropped. Defaults to `True`.
-        """
-        try:
-            import bs4  # noqa:F401
-        except ImportError as e:
-            raise ImportError(
-                "BeautifulSoup4 is required for HtmlLinkExtractor. "
-                "Please install it with `pip install beautifulsoup4`."
-            ) from e
-
-        self._kind = kind
-        self.drop_fragments = drop_fragments
-
-    def as_document_extractor(
-        self, url_metadata_key: str = "source"
-    ) -> LinkExtractor[Document]:
-        """Return a LinkExtractor that applies to documents.
-
-        NOTE: Since the HtmlLinkExtractor parses HTML, if you use with other similar
-        link extractors it may be more efficient to call the link extractors directly
-        on the parsed BeautifulSoup object.
-
-        Args:
-            url_metadata_key: The name of the filed in document metadata with the URL of
-                the document.
-        """
-        return LinkExtractorAdapter(
-            underlying=self,
-            transform=lambda doc: HtmlInput(
-                doc.page_content, doc.metadata[url_metadata_key]
-            ),
-        )
-
-    def extract_one(
-        self,
-        input: HtmlInput,  # noqa: A002
-    ) -> Set[Link]:
-        content = input.content
-        if isinstance(content, str):
-            from bs4 import BeautifulSoup
-
-            content = BeautifulSoup(content, "html.parser")
-
-        base_url = input.base_url
-        if self.drop_fragments:
-            base_url = urldefrag(base_url).url
-
-        hrefs = _parse_hrefs(content, base_url, self.drop_fragments)
-
-        links = {Link.outgoing(kind=self._kind, tag=url) for url in hrefs}
-        links.add(Link.incoming(kind=self._kind, tag=base_url))
-        return links
+from langchain_community.graph_vectorstores.extractors import (
+    HtmlInput,
+    HtmlLinkExtractor,
+)
+
+__all__ = [
+    "HtmlInput",
+    "HtmlLinkExtractor",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/keybert_link_extractor.py b/libs/langchain/ragstack_langchain/graph_store/extractors/keybert_link_extractor.py
index 6f67e867f..725d46991 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/keybert_link_extractor.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/keybert_link_extractor.py
@@ -1,63 +1,9 @@
-from typing import Any, Dict, Iterable, Optional, Set, Union
-
-from langchain_core.documents import Document
-from langchain_core.graph_vectorstores import Link
-
-from ragstack_langchain.graph_store.extractors.link_extractor import LinkExtractor
-
-# TypeAlias is not available in Python 2.9, we can't use that or the newer `type`.
-KeybertInput = Union[str, Document]
-
-
-class KeybertLinkExtractor(LinkExtractor[KeybertInput]):
-    def __init__(
-        self,
-        *,
-        kind: str = "kw",
-        embedding_model: str = "all-MiniLM-L6-v2",
-        extract_keywords_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        """Extract keywords using Keybert.
-
-        Args:
-            kind: Kind of links to produce with this extractor.
-            embedding_model: Name of the embedding model to use with Keybert.
-            extract_keywords_kwargs: Keyword arguments to pass to Keybert's
-                `extract_keywords` method.
-        """
-        try:
-            import keybert
-
-            self._kw_model = keybert.KeyBERT(model=embedding_model)
-        except ImportError:
-            raise ImportError(
-                "keybert is required for KeybertLinkExtractor. "
-                "Please install it with `pip install keybert`."
-            ) from None
-
-        self._kind = kind
-        self._extract_keywords_kwargs = extract_keywords_kwargs or {}
-
-    def extract_one(self, input: KeybertInput) -> Set[Link]:  # noqa: A002
-        keywords = self._kw_model.extract_keywords(
-            input if isinstance(input, str) else input.page_content,
-            **self._extract_keywords_kwargs,
-        )
-        return {Link.bidir(kind=self._kind, tag=kw[0]) for kw in keywords}
-
-    def extract_many(
-        self,
-        inputs: Iterable[KeybertInput],
-    ) -> Iterable[Set[Link]]:
-        if len(inputs) == 1:
-            # Even though we pass a list, if it contains one item, keybert will
-            # flatten it. This means it's easier to just call the special case
-            # for one item.
-            yield self.extract_one(inputs[0])
-        elif len(inputs) > 1:
-            strs = [i if isinstance(i, str) else i.page_content for i in inputs]
-            extracted = self._kw_model.extract_keywords(
-                strs, **self._extract_keywords_kwargs
-            )
-            for keywords in extracted:
-                yield {Link.bidir(kind=self._kind, tag=kw[0]) for kw in keywords}
+from langchain_community.graph_vectorstores.extractors import (
+    KeybertInput,
+    KeybertLinkExtractor,
+)
+
+__all__ = [
+    "KeybertInput",
+    "KeybertLinkExtractor",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor.py b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor.py
index f4c3aa34d..380b14993 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor.py
@@ -1,37 +1,5 @@
-from __future__ import annotations
+from langchain_community.graph_vectorstores.extractors import LinkExtractor
 
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Generic, Iterable, Set, TypeVar
-
-if TYPE_CHECKING:
-    from langchain_core.graph_vectorstores import Link
-
-InputT = TypeVar("InputT")
-
-METADATA_LINKS_KEY = "links"
-
-
-class LinkExtractor(ABC, Generic[InputT]):
-    """Interface for extracting links (incoming, outgoing, bidirectional)."""
-
-    @abstractmethod
-    def extract_one(self, input: InputT) -> set[Link]:  # noqa: A002
-        """Add edges from each `input` to the corresponding documents.
-
-        Args:
-            input: The input content to extract edges from.
-
-        Returns:
-            Set of links extracted from the input.
-        """
-
-    def extract_many(self, inputs: Iterable[InputT]) -> Iterable[Set[Link]]:
-        """Add edges from each `input` to the corresponding documents.
-
-        Args:
-            inputs: The input content to extract edges from.
-
-        Returns:
-            Iterable over the set of links extracted from the input.
-        """
-        return map(self.extract_one, inputs)
+__all__ = [
+    "LinkExtractor",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_adapter.py b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_adapter.py
index d4fd7a35d..6a6d0f801 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_adapter.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_adapter.py
@@ -1,25 +1,5 @@
-from typing import Callable, Iterable, Set, TypeVar
+from langchain_community.graph_vectorstores.extractors import LinkExtractorAdapter
 
-from langchain_core.graph_vectorstores import Link
-
-from ragstack_langchain.graph_store.extractors.link_extractor import LinkExtractor
-
-InputT = TypeVar("InputT")
-UnderlyingInputT = TypeVar("UnderlyingInputT")
-
-
-class LinkExtractorAdapter(LinkExtractor[InputT]):
-    def __init__(
-        self,
-        underlying: LinkExtractor[UnderlyingInputT],
-        transform: Callable[[InputT], UnderlyingInputT],
-    ) -> None:
-        self._underlying = underlying
-        self._transform = transform
-
-    def extract_one(self, input: InputT) -> Set[Link]:  # noqa: A002
-        return self.extract_one(self._transform(input))
-
-    def extract_many(self, inputs: Iterable[InputT]) -> Iterable[Set[Link]]:
-        underlying_inputs = map(self._transform, inputs)
-        return self._underlying.extract_many(underlying_inputs)
+__all__ = [
+    "LinkExtractorAdapter",
+]
diff --git a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_transformer.py b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_transformer.py
index 534f7fb69..2fb27e9c7 100644
--- a/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_transformer.py
+++ b/libs/langchain/ragstack_langchain/graph_store/extractors/link_extractor_transformer.py
@@ -1,11 +1,10 @@
 from typing import Iterable, Sequence
 
+from langchain_community.graph_vectorstores.extractors import LinkExtractor
 from langchain_core.documents import Document
 from langchain_core.documents.transformers import BaseDocumentTransformer
 from langchain_core.graph_vectorstores.links import add_links
 
-from ragstack_langchain.graph_store.extractors.link_extractor import LinkExtractor
-
 
 class LinkExtractorTransformer(BaseDocumentTransformer):
     def __init__(self, link_extractors: Iterable[LinkExtractor[Document]]):
diff --git a/libs/langchain/tests/unit_tests/test_gliner_link_extractor.py b/libs/langchain/tests/unit_tests/test_gliner_link_extractor.py
index d521f1fcb..f3cd52122 100644
--- a/libs/langchain/tests/unit_tests/test_gliner_link_extractor.py
+++ b/libs/langchain/tests/unit_tests/test_gliner_link_extractor.py
@@ -1,5 +1,5 @@
+from langchain_community.graph_vectorstores.extractors import GLiNERLinkExtractor
 from langchain_core.graph_vectorstores import Link
-from ragstack_langchain.graph_store.extractors import GLiNERLinkExtractor
 
 PAGE_1 = """
 Cristiano Ronaldo dos Santos Aveiro (Portuguese pronunciation: [kɾiʃ'tjɐnu
diff --git a/libs/langchain/tests/unit_tests/test_hierarchy_link_extractor.py b/libs/langchain/tests/unit_tests/test_hierarchy_link_extractor.py
deleted file mode 100644
index 52494480d..000000000
--- a/libs/langchain/tests/unit_tests/test_hierarchy_link_extractor.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from langchain_core.graph_vectorstores import Link
-from ragstack_langchain.graph_store.extractors import HierarchyLinkExtractor
-
-PATH_1 = ["Root", "H1", "h2"]
-
-PATH_2 = ["Root", "H1"]
-
-PATH_3 = ["Root"]
-
-
-def test_up_only():
-    extractor = HierarchyLinkExtractor()
-
-    assert extractor.extract_one(PATH_1) == {
-        # Path1 links up to Root/H1
-        Link.outgoing(kind="hierarchy", tag="up:Root/H1"),
-        # Path1 is linked to by stuff under Root/H1/h2
-        Link.incoming(kind="hierarchy", tag="up:Root/H1/h2"),
-    }
-
-    assert extractor.extract_one(PATH_2) == {
-        # Path2 links up to Root
-        Link.outgoing(kind="hierarchy", tag="up:Root"),
-        # Path2 is linked to by stuff under Root/H1/h2
-        Link.incoming(kind="hierarchy", tag="up:Root/H1"),
-    }
-
-    assert extractor.extract_one(PATH_3) == {
-        # Path3 is linked to by stuff under Root
-        Link.incoming(kind="hierarchy", tag="up:Root"),
-    }
-
-
-def test_up_and_down():
-    extractor = HierarchyLinkExtractor(down_links=True)
-
-    assert extractor.extract_one(PATH_1) == {
-        # Path1 links up to Root/H1
-        Link.outgoing(kind="hierarchy", tag="up:Root/H1"),
-        # Path1 is linked to by stuff under Root/H1/h2
-        Link.incoming(kind="hierarchy", tag="up:Root/H1/h2"),
-        # Path1 links down to things under Root/H1/h2.
-        Link.outgoing(kind="hierarchy", tag="down:Root/H1/h2"),
-        # Path1 is linked down to by Root/H1
-        Link.incoming(kind="hierarchy", tag="down:Root/H1"),
-    }
-
-    assert extractor.extract_one(PATH_2) == {
-        # Path2 links up to Root
-        Link.outgoing(kind="hierarchy", tag="up:Root"),
-        # Path2 is linked to by stuff under Root/H1/h2
-        Link.incoming(kind="hierarchy", tag="up:Root/H1"),
-        # Path2 links down to things under Root/H1.
-        Link.outgoing(kind="hierarchy", tag="down:Root/H1"),
-        # Path2 is linked down to by Root
-        Link.incoming(kind="hierarchy", tag="down:Root"),
-    }
-
-    assert extractor.extract_one(PATH_3) == {
-        # Path3 is linked to by stuff under Root
-        Link.incoming(kind="hierarchy", tag="up:Root"),
-        # Path3 links down to things under Root/H1.
-        Link.outgoing(kind="hierarchy", tag="down:Root"),
-    }
-
-
-def test_sibling():
-    extractor = HierarchyLinkExtractor(sibling_links=True, up_links=False)
-
-    assert extractor.extract_one(PATH_1) == {
-        # Path1 links with anything else in Root/H1
-        Link.bidir(kind="hierarchy", tag="sib:Root/H1"),
-    }
-
-    assert extractor.extract_one(PATH_2) == {
-        # Path2 links with anything else in Root
-        Link.bidir(kind="hierarchy", tag="sib:Root"),
-    }
-
-    assert extractor.extract_one(PATH_3) == {
-        # Path3 links with anything else at the top level
-        Link.bidir(kind="hierarchy", tag="sib:"),
-    }
diff --git a/libs/langchain/tests/unit_tests/test_html_link_extractor.py b/libs/langchain/tests/unit_tests/test_html_link_extractor.py
deleted file mode 100644
index 0b008e228..000000000
--- a/libs/langchain/tests/unit_tests/test_html_link_extractor.py
+++ /dev/null
@@ -1,106 +0,0 @@
-from bs4 import BeautifulSoup
-from langchain_core.graph_vectorstores import Link
-from ragstack_langchain.graph_store.extractors import HtmlInput, HtmlLinkExtractor
-
-PAGE_1 = """
-<html>
-<body>
-Hello.
-<a href="relative">Relative</a>
-<a href="/relative-base">Relative base.</a>
-<a href="http://cnn.com">Aboslute</a>
-<a href="//same.foo">Test</a>
-</body>
-</html>
-"""
-
-PAGE_2 = """
-<html>
-<body>
-Hello.
-<a href="/bar/#fragment">Relative</a>
-</html>
-"""
-
-
-def test_one_from_str():
-    extractor = HtmlLinkExtractor()
-
-    results = extractor.extract_one(HtmlInput(PAGE_1, base_url="https://foo.com/bar/"))
-    assert results == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/bar/"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/relative"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/relative-base"),
-        Link.outgoing(kind="hyperlink", tag="http://cnn.com"),
-        Link.outgoing(kind="hyperlink", tag="https://same.foo"),
-    }
-
-    results = extractor.extract_one(HtmlInput(PAGE_1, base_url="http://foo.com/bar/"))
-    assert results == {
-        Link.incoming(kind="hyperlink", tag="http://foo.com/bar/"),
-        Link.outgoing(kind="hyperlink", tag="http://foo.com/bar/relative"),
-        Link.outgoing(kind="hyperlink", tag="http://foo.com/relative-base"),
-        Link.outgoing(kind="hyperlink", tag="http://cnn.com"),
-        Link.outgoing(kind="hyperlink", tag="http://same.foo"),
-    }
-
-
-def test_one_from_beautiful_soup():
-    extractor = HtmlLinkExtractor()
-    soup = BeautifulSoup(PAGE_1, "html.parser")
-    results = extractor.extract_one(HtmlInput(soup, base_url="https://foo.com/bar/"))
-    assert results == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/bar/"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/relative"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/relative-base"),
-        Link.outgoing(kind="hyperlink", tag="http://cnn.com"),
-        Link.outgoing(kind="hyperlink", tag="https://same.foo"),
-    }
-
-
-def test_drop_fragments():
-    extractor = HtmlLinkExtractor(drop_fragments=True)
-    results = extractor.extract_one(
-        HtmlInput(PAGE_2, base_url="https://foo.com/baz/#fragment")
-    )
-
-    assert results == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/baz/"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/"),
-    }
-
-
-def test_include_fragments():
-    extractor = HtmlLinkExtractor(drop_fragments=False)
-    results = extractor.extract_one(
-        HtmlInput(PAGE_2, base_url="https://foo.com/baz/#fragment")
-    )
-
-    assert results == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/baz/#fragment"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/#fragment"),
-    }
-
-
-def test_batch_from_str():
-    extractor = HtmlLinkExtractor()
-    results = list(
-        extractor.extract_many(
-            [
-                HtmlInput(PAGE_1, base_url="https://foo.com/bar/"),
-                HtmlInput(PAGE_2, base_url="https://foo.com/baz/"),
-            ]
-        )
-    )
-
-    assert results[0] == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/bar/"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/relative"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/relative-base"),
-        Link.outgoing(kind="hyperlink", tag="http://cnn.com"),
-        Link.outgoing(kind="hyperlink", tag="https://same.foo"),
-    }
-    assert results[1] == {
-        Link.incoming(kind="hyperlink", tag="https://foo.com/baz/"),
-        Link.outgoing(kind="hyperlink", tag="https://foo.com/bar/"),
-    }
diff --git a/libs/langchain/tests/unit_tests/test_keybert_link_extractor.py b/libs/langchain/tests/unit_tests/test_keybert_link_extractor.py
index b58fdb275..947c1f117 100644
--- a/libs/langchain/tests/unit_tests/test_keybert_link_extractor.py
+++ b/libs/langchain/tests/unit_tests/test_keybert_link_extractor.py
@@ -1,5 +1,5 @@
+from langchain_community.graph_vectorstores.extractors import KeybertLinkExtractor
 from langchain_core.graph_vectorstores import Link
-from ragstack_langchain.graph_store.extractors import KeybertLinkExtractor
 
 PAGE_1 = """
 Supervised learning is the machine learning task of learning a function that
diff --git a/libs/langchain/tests/unit_tests/test_link_extractor_transformer.py b/libs/langchain/tests/unit_tests/test_link_extractor_transformer.py
index 5bcc0e6dc..9902c65b7 100644
--- a/libs/langchain/tests/unit_tests/test_link_extractor_transformer.py
+++ b/libs/langchain/tests/unit_tests/test_link_extractor_transformer.py
@@ -1,22 +1,37 @@
-from langchain_core.documents import Document
-from langchain_core.graph_vectorstores.links import Link, get_links
-from ragstack_langchain.graph_store.extractors import (
-    HtmlLinkExtractor,
-    LinkExtractorTransformer,
-)
-from ragstack_langchain.graph_store.extractors.gliner_link_extractor import (
+from langchain_community.graph_vectorstores.extractors import (
     GLiNERLinkExtractor,
-)
-from ragstack_langchain.graph_store.extractors.keybert_link_extractor import (
+    HtmlLinkExtractor,
     KeybertLinkExtractor,
 )
+from langchain_core.documents import Document
+from langchain_core.graph_vectorstores.links import Link, get_links
+from ragstack_langchain.graph_store.extractors import LinkExtractorTransformer
 
 from . import (
     test_gliner_link_extractor,
-    test_html_link_extractor,
     test_keybert_link_extractor,
 )
 
+PAGE_1 = """
+<html>
+<body>
+Hello.
+<a href="relative">Relative</a>
+<a href="/relative-base">Relative base.</a>
+<a href="http://cnn.com">Aboslute</a>
+<a href="//same.foo">Test</a>
+</body>
+</html>
+"""
+
+PAGE_2 = """
+<html>
+<body>
+Hello.
+<a href="/bar/#fragment">Relative</a>
+</html>
+"""
+
 
 def test_html_extractor():
     transformer = LinkExtractorTransformer(
@@ -25,13 +40,13 @@ def test_html_extractor():
         ]
     )
     doc1 = Document(
-        page_content=test_html_link_extractor.PAGE_1,
+        page_content=PAGE_1,
         metadata={
             "source": "https://foo.com/bar/",
         },
     )
     doc2 = Document(
-        page_content=test_html_link_extractor.PAGE_2,
+        page_content=PAGE_2,
         metadata={
             "source": "https://foo.com/baz/",
         },