Skip to main content
Build production-ready RAG pipelines using LangChain’s Docling integration with document-native chunking.

Overview

This example demonstrates:
  • Using DoclingLoader with LangChain
  • Document chunking with HybridChunker
  • Vector storage with Milvus
  • Retrieval-augmented generation with HuggingFace models
  • Document-native grounding with metadata

Quick Start

1

Install Dependencies

Install LangChain with Docling integration and required packages.
2

Load Documents

Use DoclingLoader to convert and chunk documents.
3

Create Vector Store

Embed and store document chunks in Milvus.
4

Build RAG Chain

Create a retrieval-augmented generation pipeline.

Installation

pip install langchain-docling langchain-core langchain-huggingface \
    langchain_milvus langchain python-dotenv

Configuration

import os
from pathlib import Path
from tempfile import mkdtemp
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_docling.loader import ExportType

load_dotenv()

# Configuration
HF_TOKEN = os.getenv("HF_TOKEN")  # Optional, for increased quota
FILE_PATH = ["https://arxiv.org/pdf/2408.09869"]  # Docling Technical Report
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GEN_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
EXPORT_TYPE = ExportType.DOC_CHUNKS  # or ExportType.MARKDOWN
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3
MILVUS_URI = str(Path(mkdtemp()) / "docling.db")

PROMPT = PromptTemplate.from_template(
    "Context information is below.\n"
    "---------------------\n{context}\n---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\nQuery: {input}\nAnswer:\n"
)

Document Loading

from langchain_docling import DoclingLoader
from docling.chunking import HybridChunker

# Load and chunk documents
loader = DoclingLoader(
    file_path=FILE_PATH,
    export_type=ExportType.DOC_CHUNKS,
    chunker=HybridChunker(tokenizer=EMBED_MODEL_ID),
)

docs = loader.load()
splits = docs  # Already chunked

Vector Store and Embeddings

from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus

# Create embeddings
embedding = HuggingFaceEmbeddings(model_name=EMBED_MODEL_ID)

# Create vector store
vectorstore = Milvus.from_documents(
    documents=splits,
    embedding=embedding,
    collection_name="docling_demo",
    connection_args={"uri": MILVUS_URI},
    index_params={"index_type": "FLAT"},
    drop_old=True,
)

RAG Pipeline

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_huggingface import HuggingFaceEndpoint

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})

# Create LLM
llm = HuggingFaceEndpoint(
    repo_id=GEN_MODEL_ID,
    huggingfacehub_api_token=HF_TOKEN,
)

# Create RAG chain
question_answer_chain = create_stuff_documents_chain(llm, PROMPT)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Query
resp_dict = rag_chain.invoke({"input": QUESTION})

print(f"Question: {resp_dict['input']}")
print(f"Answer: {resp_dict['answer'][:200]}...")

# Show sources
for i, doc in enumerate(resp_dict["context"]):
    print(f"\nSource {i + 1}:")
    print(f"  Text: {doc.page_content[:200]}...")
    print(f"  Metadata: {doc.metadata.get('dl_meta', {}).get('headings', [])}")

Document-Native Grounding

When using ExportType.DOC_CHUNKS, sources include rich metadata:
# Access chunk metadata
for doc in resp_dict["context"]:
    dl_meta = doc.metadata.get('dl_meta', {})
    
    # Document origin
    origin = dl_meta.get('origin', {})
    print(f"File: {origin.get('filename')}")
    
    # Section headings
    headings = dl_meta.get('headings', [])
    print(f"Section: {' > '.join(headings)}")
    
    # Document items with provenance
    doc_items = dl_meta.get('doc_items', [])
    for item in doc_items:
        prov = item.get('prov', [])
        if prov:
            print(f"Page: {prov[0].get('page_no')}")
            bbox = prov[0].get('bbox', {})
            print(f"Bbox: [{bbox.get('l')}, {bbox.get('t')}, "
                  f"{bbox.get('r')}, {bbox.get('b')}]")

Complete Example

import os
from pathlib import Path
from tempfile import mkdtemp

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType
from langchain_huggingface import HuggingFaceEndpoint
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus

from docling.chunking import HybridChunker

# Configuration
HF_TOKEN = os.getenv("HF_TOKEN")
FILE_PATH = ["https://arxiv.org/pdf/2408.09869"]
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GEN_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3

# Load documents with chunking
loader = DoclingLoader(
    file_path=FILE_PATH,
    export_type=ExportType.DOC_CHUNKS,
    chunker=HybridChunker(tokenizer=EMBED_MODEL_ID),
)
docs = loader.load()

# Create vector store
embedding = HuggingFaceEmbeddings(model_name=EMBED_MODEL_ID)
vectorstore = Milvus.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="docling_demo",
    connection_args={"uri": str(Path(mkdtemp()) / "docling.db")},
    index_params={"index_type": "FLAT"},
    drop_old=True,
)

# Create RAG chain
retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
llm = HuggingFaceEndpoint(
    repo_id=GEN_MODEL_ID,
    huggingfacehub_api_token=HF_TOKEN,
)

prompt = PromptTemplate.from_template(
    "Context: {context}\nQuestion: {input}\nAnswer:"
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Query
result = rag_chain.invoke({"input": QUESTION})
print(f"Q: {QUESTION}")
print(f"A: {result['answer']}")

Export Types

  • ExportType.DOC_CHUNKS (recommended): Each chunk is a separate LangChain document with rich metadata including page numbers, bounding boxes, and section headings
  • ExportType.MARKDOWN: Each input document becomes a single LangChain document in Markdown format

Tech Stack

ComponentTechnologyExecution
EmbeddingHugging Face / Sentence TransformersLocal
Vector storeMilvusLocal
Gen AIHugging Face Inference APIRemote
For best conversion speed, use GPU acceleration when available (e.g., Colab GPU runtime).

Build docs developers (and LLMs) love