Skip to main content
pgvector is a PostgreSQL extension that adds vector similarity search capabilities. Store vectors alongside your relational data and perform fast similarity queries with SQL.

Installation

Enable the pgvector extension:
create extension vector;

Vector Types

Define vector columns with a specific dimension:
-- Create table with vector column
create table items (
  id bigserial primary key,
  embedding vector(1536)  -- 1536 dimensions
);

-- Insert vector
insert into items (embedding)
values ('[0.1, 0.2, 0.3, ...]'::vector);

-- Query vector
select * from items where id = 1;

Distance Operators

pgvector provides three distance operators:
-- Cosine distance (0 = identical, 2 = opposite)
select
  id,
  embedding <=> '[0.1, 0.2, 0.3]' as distance
from items
order by distance
limit 5;

Indexing

IVFFlat Index

Fast to build, good for most use cases:
-- Create IVFFlat index
create index on items 
using ivfflat (embedding vector_cosine_ops)
with (lists = 100);

-- Choose lists based on row count:
-- rows   | lists
-- -------+-------
-- 10K    | 30
-- 100K   | 100  
-- 1M     | 300
-- 10M    | 1000

HNSW Index

Better recall, slower to build:
-- Create HNSW index
create index on items 
using hnsw (embedding vector_cosine_ops)
with (m = 16, ef_construction = 64);

-- Parameters:
-- m: max connections per layer (higher = better recall, more memory)
-- ef_construction: size of dynamic candidate list (higher = better index quality)

Index for Different Distance Types

-- Cosine distance
create index on items using ivfflat (embedding vector_cosine_ops);

-- L2 distance  
create index on items using ivfflat (embedding vector_l2_ops);

-- Inner product
create index on items using ivfflat (embedding vector_ip_ops);

Functions

Vector Operations

-- Vector dimensions
select vector_dims(embedding) from items limit 1;

-- Vector norm (length)
select vector_norm(embedding) from items limit 1;

-- Add vectors
select embedding + '[0.1, 0.2, 0.3]'::vector from items;

-- Subtract vectors
select embedding - '[0.1, 0.2, 0.3]'::vector from items;

-- Multiply by scalar
select embedding * 2 from items;

Distance Functions

-- Cosine distance
select cosine_distance(embedding, '[0.1, 0.2]') from items;

-- L2 distance
select l2_distance(embedding, '[0.1, 0.2]') from items;

-- Inner product
select inner_product(embedding, '[0.1, 0.2]') from items;

Advanced Queries

Similarity Search with Threshold

select
  id,
  content,
  1 - (embedding <=> query_embedding) as similarity
from documents
where 1 - (embedding <=> query_embedding) > 0.8
order by embedding <=> query_embedding
limit 10;
-- Search within category
select *
from documents
where category = 'technical'
order by embedding <=> query_embedding
limit 10;

-- Search with date range
select *
from documents
where created_at > now() - interval '30 days'
order by embedding <=> query_embedding
limit 10;

Approximate Nearest Neighbors

-- Set ef_search for HNSW index (higher = better recall, slower)
set hnsw.ef_search = 100;

-- Perform search
select *
from items
order by embedding <=> query_embedding
limit 10;

Query Performance

Explain Plans

-- Check if index is used
explain (analyze, buffers)
select *
from items
order by embedding <=> query_embedding
limit 10;

-- Look for:
-- "Index Scan using items_embedding_idx"

Index Maintenance

-- Reindex if needed
reindex index items_embedding_idx;

-- Vacuum to reclaim space
vacuum analyze items;

Data Types and Limits

-- Maximum dimensions
create table items (
  embedding vector(16000)  -- Max 16,000 dimensions
);

-- Dimension must match
insert into items (embedding)
values ('[1,2,3]');  -- Error if column is vector(1536)

Migration Patterns

Add Vector Column to Existing Table

-- Add column
alter table articles
add column embedding vector(1536);

-- Backfill embeddings
update articles
set embedding = generate_embedding(content)
where embedding is null;

-- Create index
create index on articles 
using ivfflat (embedding vector_cosine_ops);

Change Vector Dimensions

-- Can't alter dimension directly
-- Create new column and migrate
alter table items
add column embedding_new vector(3072);

update items
set embedding_new = generate_new_embedding(content);

-- Drop old, rename new
alter table items drop column embedding;
alter table items rename column embedding_new to embedding;

Best Practices

Normalize Vectors: For cosine distance, normalize vectors to unit length for consistent results.
-- Normalize vectors
create function normalize_vector(vec vector)
returns vector as $$
  select (vec / vector_norm(vec))::vector
$$ language sql immutable;

insert into items (embedding)
values (normalize_vector('[1, 2, 3]'));
Index Size: Indexes can be large. Monitor disk usage and consider partitioning for large datasets.
Bulk Operations: Use copy or batch inserts for better performance when loading vectors.

Troubleshooting

-- Increase work_mem for index creation
set work_mem = '1GB';

-- Drop and recreate index
drop index items_embedding_idx;
create index on items using ivfflat (embedding vector_cosine_ops);

-- Check statistics
analyze items;
  • Use appropriate index type (IVFFlat vs HNSW)
  • Adjust index parameters (lists, m, ef_construction)
  • Add filters before similarity search
  • Consider materialized views for common queries
-- Reduce probes for IVFFlat
set ivfflat.probes = 10;

-- Reduce ef_search for HNSW  
set hnsw.ef_search = 40;

-- Increase maintenance_work_mem for index builds
set maintenance_work_mem = '2GB';

Next Steps

Similarity Search

Build semantic search with pgvector

Vector Embeddings

Generate and store embeddings

Database Functions

Create stored procedures with pgvector

AI Examples

Complete RAG application examples

Build docs developers (and LLMs) love