diff --git a/poetry.lock b/poetry.lock index 58749d4..eae01b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -805,6 +805,17 @@ files = [ [package.dependencies] jsonpointer = ">=1.9" +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "jsonpointer" version = "2.4" @@ -1475,6 +1486,20 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-iso639" version = "2023.6.15" @@ -2106,13 +2131,13 @@ typing-extensions = ">=3.7.4" [[package]] name = "unstructured" -version = "0.11.2" +version = "0.11.8" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.7.0" files = [ - {file = "unstructured-0.11.2-py3-none-any.whl", hash = "sha256:4c77696f159afb1ac8d0806ec4e22d473dc7440b775728709a535f660fb2cd13"}, - {file = "unstructured-0.11.2.tar.gz", hash = "sha256:68292dfd1616a3e428ac314d6efd4e473c9363c1b2b0ae799031916eee3cc0a4"}, + {file = "unstructured-0.11.8-py3-none-any.whl", hash = "sha256:71e8d135a723d8c692a0a43683e3dca4a9b3e0fd8a1d255f57689591ed3860c0"}, + {file = "unstructured-0.11.8.tar.gz", hash = "sha256:dba8b2d54fdc781acef6c9590510e1f55e7467abaaf00403031331903d415f07"}, ] [package.dependencies] @@ -2132,16 +2157,18 @@ rapidfuzz = "*" requests = "*" tabulate = "*" typing-extensions = "*" +unstructured-client = "*" wrapt = "*" [package.extras] airtable = ["pyairtable"] -all-docs = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.0)", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.15)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +all-docs = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.0)", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.18)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] azure = ["adlfs", "fsspec (==2023.9.1)"] azure-cognitive-search = ["azure-search-documents"] bedrock = ["boto3", "langchain"] biomed = ["bs4"] box = ["boxfs", "fsspec (==2023.9.1)"] +chroma = ["chromadb"] confluence = ["atlassian-python-api"] csv = ["pandas"] delta-table = ["deltalake", "fsspec (==2023.9.1)"] @@ -2158,9 +2185,9 @@ gitlab = ["python-gitlab"] google-drive = ["google-api-python-client"] hubspot = ["hubspot-api-client", "urllib3 (>=1.26.17)"] huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] -image = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pypdf", "unstructured-inference (==0.7.15)", "unstructured.pytesseract (>=0.3.12)"] +image = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pypdf", "unstructured-inference (==0.7.18)", "unstructured.pytesseract (>=0.3.12)"] jira = ["atlassian-python-api"] -local-inference = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.0)", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.15)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +local-inference = ["markdown", "msg-parser", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.0)", "python-pptx (<=0.6.23)", "unstructured-inference (==0.7.18)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] md = ["markdown"] mongodb = ["pymongo"] msg = ["msg-parser"] @@ -2171,21 +2198,54 @@ openai = ["langchain", "openai", "tiktoken"] org = ["pypandoc"] outlook = ["Office365-REST-Python-Client (<2.4.3)", "msal"] paddleocr = ["unstructured.paddleocr (==2.6.1.3)"] -pdf = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pypdf", "unstructured-inference (==0.7.15)", "unstructured.pytesseract (>=0.3.12)"] +pdf = ["onnx", "pdf2image", "pdfminer.six", "pikepdf", "pypdf", "unstructured-inference (==0.7.18)", "unstructured.pytesseract (>=0.3.12)"] pinecone = ["pinecone-client"] ppt = ["python-pptx (<=0.6.23)"] pptx = ["python-pptx (<=0.6.23)"] +qdrant = ["qdrant-client"] reddit = ["praw"] rst = ["pypandoc"] rtf = ["pypandoc"] s3 = ["fsspec (==2023.9.1)", "s3fs"] salesforce = ["simple-salesforce"] +sftp = ["fsspec", "paramiko"] sharepoint = ["Office365-REST-Python-Client (<2.4.3)", "msal"] slack = ["slack-sdk"] tsv = ["pandas"] +weaviate = ["weaviate-client"] wikipedia = ["wikipedia"] xlsx = ["networkx", "openpyxl", "pandas", "xlrd"] +[[package]] +name = "unstructured-client" +version = "0.15.2" +description = "Python Client SDK for Unstructured API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "unstructured-client-0.15.2.tar.gz", hash = "sha256:47a80d90abf214c0b695812ec5ef982d08b8ed7ffa84bc76cd208f8ddec9ba4c"}, + {file = "unstructured_client-0.15.2-py3-none-any.whl", hash = "sha256:0476ecc86a26709c768c4b03116e446646b192942e5dc6620a0e3845e0e28efc"}, +] + +[package.dependencies] +certifi = ">=2023.7.22" +charset-normalizer = ">=3.2.0" +dataclasses-json = ">=0.6.1" +idna = ">=3.4" +jsonpath-python = ">=1.0.6" +marshmallow = ">=3.19.0" +mypy-extensions = ">=1.0.0" +packaging = ">=23.1" +python-dateutil = ">=2.8.2" +requests = ">=2.31.0" +six = ">=1.16.0" +typing-extensions = ">=4.7.1" +typing-inspect = ">=0.9.0" +urllib3 = ">=1.26.18" + +[package.extras] +dev = ["pylint (==2.16.2)"] + [[package]] name = "urllib3" version = "2.0.7"