Skip to main content

Overview

Vector stores store embedded data and perform vector search. They enable semantic search over unstructured data by embedding content and retrieving the most similar items.

VectorStore

Abstract base class for vector store implementations. Source: langchain_core.vectorstores.base:43

Properties

embeddings
Embeddings | None
The embedding model used by the vector store. Returns None if not implemented.

Document Management

add_texts

def add_texts(
    self,
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    *,
    ids: list[str] | None = None,
    **kwargs: Any
) -> list[str]
Add texts to the vector store.
texts
Iterable[str]
required
Texts to add to the vector store
metadatas
list[dict] | None
Optional metadata for each text. Must match length of texts.
ids
list[str] | None
Optional IDs for each text
**kwargs
Any
Vector store-specific parameters
return
list[str]
List of IDs of added texts
Raises:
  • ValueError: If number of metadatas or IDs doesn’t match number of texts

aadd_texts

async def aadd_texts(
    self,
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    *,
    ids: list[str] | None = None,
    **kwargs: Any
) -> list[str]
Async version of add_texts.

add_documents

def add_documents(
    self,
    documents: list[Document],
    **kwargs: Any
) -> list[str]
Add documents to the vector store.
documents
list[Document]
required
Documents to add. Text is extracted from page_content, metadata from metadata field.
return
list[str]
List of IDs of added documents

aadd_documents

async def aadd_documents(
    self,
    documents: list[Document],
    **kwargs: Any
) -> list[str]
Async version of add_documents.

delete

def delete(
    self,
    ids: list[str] | None = None,
    **kwargs: Any
) -> bool | None
Delete documents by ID.
ids
list[str] | None
IDs to delete. If None, delete all.
return
bool | None
True if successful, False if failed, None if not implemented

adelete

async def adelete(
    self,
    ids: list[str] | None = None,
    **kwargs: Any
) -> bool | None
Async version of delete.

get_by_ids

def get_by_ids(
    self,
    ids: Sequence[str],
    /
) -> list[Document]
Get documents by their IDs.
ids
Sequence[str]
required
IDs to retrieve
return
list[Document]
Documents with matching IDs. May return fewer than requested if some IDs not found.
Note: Does not raise exceptions for missing IDs. Order not guaranteed to match input.

aget_by_ids

async def aget_by_ids(
    self,
    ids: Sequence[str],
    /
) -> list[Document]
Async version of get_by_ids.
def similarity_search(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any
) -> list[Document]
Return documents most similar to query. Must be implemented by subclasses.
query
str
required
Query text to search for
k
int
default:"4"
Number of documents to return
**kwargs
Any
Vector store-specific parameters (e.g., filter, namespace)
return
list[Document]
List of most similar documents
async def asimilarity_search(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any
) -> list[Document]
Async similarity search.

similarity_search_with_score

def similarity_search_with_score(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any
) -> list[tuple[Document, float]]
Run similarity search with distance scores.
return
list[tuple[Document, float]]
List of (document, similarity_score) tuples

asimilarity_search_with_score

async def asimilarity_search_with_score(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any
) -> list[tuple[Document, float]]
Async similarity search with scores.

similarity_search_by_vector

def similarity_search_by_vector(
    self,
    embedding: list[float],
    k: int = 4,
    **kwargs: Any
) -> list[Document]
Search using an embedding vector directly.
embedding
list[float]
required
Embedding vector to search with

similarity_search_with_relevance_scores

def similarity_search_with_relevance_scores(
    self,
    query: str,
    k: int = 4,
    *,
    score_threshold: float | None = None,
    **kwargs: Any
) -> list[tuple[Document, float]]
Search with normalized relevance scores (0-1 scale, higher is better).
score_threshold
float | None
Minimum relevance score (0-1). Documents below this are filtered out.
return
list[tuple[Document, float]]
List of (document, relevance_score) tuples

Maximum Marginal Relevance (MMR)

def max_marginal_relevance_search(
    self,
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    **kwargs: Any
) -> list[Document]
Return diverse documents using Maximum Marginal Relevance.
k
int
default:"4"
Number of documents to return
fetch_k
int
default:"20"
Number of documents to fetch before applying MMR
lambda_mult
float
default:"0.5"
Diversity parameter (0-1):
  • 1: Maximum relevance (no diversity)
  • 0: Maximum diversity (no relevance)
return
list[Document]
Diverse set of relevant documents
async def amax_marginal_relevance_search(
    self,
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    **kwargs: Any
) -> list[Document]
Async MMR search.

Unified Search Interface

def search(
    self,
    query: str,
    search_type: Literal["similarity", "mmr", "similarity_score_threshold"],
    **kwargs: Any
) -> list[Document]
Unified search interface supporting multiple search types.
search_type
str
required
Type of search:
  • 'similarity': Standard similarity search
  • 'mmr': Maximum marginal relevance
  • 'similarity_score_threshold': Similarity with score threshold
return
list[Document]
Search results

asearch

async def asearch(
    self,
    query: str,
    search_type: Literal["similarity", "mmr", "similarity_score_threshold"],
    **kwargs: Any
) -> list[Document]
Async unified search.

Construction Methods

from_texts

@classmethod
def from_texts(
    cls,
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    **kwargs: Any
) -> VectorStore
Create a vector store from a list of texts.
texts
list[str]
required
Texts to add
embedding
Embeddings
required
Embedding model to use
metadatas
list[dict] | None
Metadata for each text
return
VectorStore
New vector store instance

afrom_texts

@classmethod
async def afrom_texts(
    cls,
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    **kwargs: Any
) -> VectorStore
Async version of from_texts.

from_documents

@classmethod
def from_documents(
    cls,
    documents: list[Document],
    embedding: Embeddings,
    **kwargs: Any
) -> VectorStore
Create a vector store from documents.
documents
list[Document]
required
Documents to add
embedding
Embeddings
required
Embedding model to use

afrom_documents

@classmethod
async def afrom_documents(
    cls,
    documents: list[Document],
    embedding: Embeddings,
    **kwargs: Any
) -> VectorStore
Async version of from_documents.

Retriever Conversion

as_retriever

def as_retriever(
    self,
    *,
    search_type: Literal["similarity", "mmr", "similarity_score_threshold"] = "similarity",
    search_kwargs: dict[str, Any] | None = None,
    **kwargs: Any
) -> VectorStoreRetriever
Convert the vector store to a retriever.
search_type
str
default:"'similarity'"
Type of search to use
search_kwargs
dict | None
Keyword arguments for search method. Common keys:
  • k: Number of documents
  • score_threshold: Minimum score
  • fetch_k: For MMR
  • lambda_mult: For MMR
return
VectorStoreRetriever
Retriever wrapping the vector store

Example Usage

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document

# Create from texts
vectorstore = FAISS.from_texts(
    texts=["foo", "bar", "baz"],
    embedding=OpenAIEmbeddings(),
    metadatas=[{"source": "a"}, {"source": "b"}, {"source": "c"}]
)

# Similarity search
docs = vectorstore.similarity_search("foo", k=2)

# Search with scores
docs_and_scores = vectorstore.similarity_search_with_score("foo", k=2)

# MMR for diverse results
docs = vectorstore.max_marginal_relevance_search(
    "foo",
    k=2,
    fetch_k=10,
    lambda_mult=0.5
)

# As retriever
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 5, "score_threshold": 0.8}
)
docs = retriever.invoke("foo")

InMemoryVectorStore

Simple in-memory vector store for testing and prototyping. Source: langchain_core.vectorstores.in_memory Inherits: VectorStore Stores vectors in memory using numpy for similarity computation.

Example

from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings

vectorstore = InMemoryVectorStore(embedding=OpenAIEmbeddings())

# Add documents
vectorstore.add_texts(
    texts=["LangChain is great", "Vector stores are useful"],
    metadatas=[{"source": "a"}, {"source": "b"}]
)

# Search
results = vectorstore.similarity_search("LangChain", k=1)

Relevance Score Functions

Vector stores provide static methods for converting distances to relevance scores:

_euclidean_relevance_score_fn

@staticmethod
def _euclidean_relevance_score_fn(distance: float) -> float
Convert Euclidean distance to relevance score (0-1).

_cosine_relevance_score_fn

@staticmethod
def _cosine_relevance_score_fn(distance: float) -> float
Convert cosine distance to relevance score (0-1).

_max_inner_product_relevance_score_fn

@staticmethod
def _max_inner_product_relevance_score_fn(distance: float) -> float
Convert max inner product distance to relevance score.

Build docs developers (and LLMs) love