Overview
The Document Management module provides comprehensive document control with revision tracking, metadata management, AI-powered search, and collaborative features like PDF annotations.
Key Features
Version Control Track document revisions with complete history
AI Search Semantic search using Google Gemini embeddings
PDF Annotations Add comments and pins directly on PDF pages
Metadata Management Flexible custom metadata fields
Automated Processing n8n integration for text extraction and analysis
Access Control Fine-grained permissions and traceability
Data Model
Documento (Document)
class Documento ( models . Model ):
ESTADOS = [
( 'BORRADOR' , 'Draft' ),
( 'EN_REVISION' , 'In Review' ),
( 'APROBADO' , 'Approved' ),
( 'OBSOLETO' , 'Obsolete' ),
]
# Identification
codigo = models.CharField( max_length = 100 , unique = True )
titulo = models.CharField( max_length = 500 )
descripcion = models.TextField( blank = True )
# Classification
tipo = models.ForeignKey( 'TipoDocumento' , on_delete = models. PROTECT )
disciplina = models.ForeignKey( 'Disciplina' , on_delete = models. SET_NULL )
# Relationships
activo = models.ForeignKey( 'activos.Activo' , on_delete = models. SET_NULL )
ubicacion = models.ForeignKey( 'activos.Ubicacion' , on_delete = models. SET_NULL )
# Status
estado = models.CharField( max_length = 20 , choices = ESTADOS )
# Metadata
creado_por = models.ForeignKey(User, on_delete = models. SET_NULL )
fecha_creacion = models.DateTimeField( auto_now_add = True )
Revision (Document Revision)
class Revision ( models . Model ):
documento = models.ForeignKey( 'Documento' , on_delete = models. CASCADE )
numero_revision = models.CharField( max_length = 20 )
# File storage (MinIO/S3)
archivo = models.FileField( upload_to = 'documentos/' )
nombre_archivo = models.CharField( max_length = 255 )
tamano = models.BigIntegerField() # bytes
# AI processing
texto_extraido = models.TextField( blank = True )
embedding = VectorField( dimensions = 768 , blank = True , null = True )
procesado = models.BooleanField( default = False )
# Tracking
subido_por = models.ForeignKey(User, on_delete = models. SET_NULL )
fecha_subida = models.DateTimeField( auto_now_add = True )
# Change description
comentarios = models.TextField( blank = True )
class MetadatoConfig ( models . Model ):
TIPOS = [
( 'TEXT' , 'Text' ),
( 'NUMBER' , 'Number' ),
( 'DATE' , 'Date' ),
( 'SELECT' , 'Dropdown' ),
]
nombre = models.CharField( max_length = 100 )
tipo = models.CharField( max_length = 20 , choices = TIPOS )
tipo_documento = models.ForeignKey( 'TipoDocumento' , on_delete = models. CASCADE )
obligatorio = models.BooleanField( default = False )
opciones = models.JSONField( blank = True , null = True ) # For SELECT type
class MetadatoValor ( models . Model ):
documento = models.ForeignKey( 'Documento' , on_delete = models. CASCADE )
metadato = models.ForeignKey( 'MetadatoConfig' , on_delete = models. CASCADE )
valor = models.TextField()
Document Upload and Processing
Upload Workflow
Upload File
User uploads document file to MinIO storage def upload_documento ( request ):
archivo = request. FILES [ 'archivo' ]
# Create document and revision
documento = Documento.objects.create(
codigo = generate_codigo(),
titulo = request. POST .get( 'titulo' ),
tipo_id = request. POST .get( 'tipo' )
)
revision = Revision.objects.create(
documento = documento,
numero_revision = '0' ,
archivo = archivo,
subido_por = request.user
)
Trigger n8n Workflow
Send webhook to n8n for processing @shared_task
def procesar_documento ( revision_id ):
revision = Revision.objects.get( id = revision_id)
# Trigger n8n workflow
response = requests.post(
settings. N8N_PROCESS_DOCUMENT_WEBHOOK_URL ,
json = {
'revision_id' : revision.id,
'archivo_url' : revision.archivo.url,
'callback_url' : f ' { settings. INTERNAL_SITE_URL } /documentos/callback/'
}
)
Extract Text (n8n)
n8n extracts text from PDF using OCR/PDF libraries
Generate Embeddings
Create vector embeddings using Gemini API @shared_task
def generar_embeddings ( revision_id ):
revision = Revision.objects.get( id = revision_id)
# Generate embedding with Gemini
import google.generativeai as genai
genai.configure( api_key = settings. GEMINI_API_KEY )
result = genai.embed_content(
model = "models/embedding-001" ,
content = revision.texto_extraido
)
# Store in pgvector
revision.embedding = result[ 'embedding' ]
revision.procesado = True
revision.save()
Index for Search
Document is now searchable via text and semantic search
AI-Powered Search
Semantic Search
Find documents by meaning, not just keywords:
def buscar_semantico ( request ):
query = request. GET .get( 'q' )
# Generate query embedding
import google.generativeai as genai
genai.configure( api_key = settings. GEMINI_API_KEY )
query_embedding = genai.embed_content(
model = "models/embedding-001" ,
content = query
)[ 'embedding' ]
# Vector similarity search with pgvector
from django.db.models import F
from pgvector.django import CosineDistance
resultados = Revision.objects.filter(
procesado = True
).annotate(
distancia = CosineDistance( 'embedding' , query_embedding)
).filter(
distancia__lt = 0.5 # Similarity threshold
).order_by( 'distancia' )[: 20 ]
return JsonResponse({
'resultados' : [
{
'documento' : r.documento.codigo,
'titulo' : r.documento.titulo,
'similarity' : 1 - r.distancia
}
for r in resultados
]
})
Hybrid Search
Combine keyword and semantic search:
def buscar_hibrido ( request ):
query = request. GET .get( 'q' )
# Keyword search
keyword_results = Documento.objects.filter(
Q( titulo__icontains = query) |
Q( codigo__icontains = query) |
Q( revisiones__texto_extraido__icontains = query)
).distinct()
# Semantic search
semantic_results = buscar_semantico_interno(query)
# Merge and rank results
combined = merge_results(keyword_results, semantic_results)
return JsonResponse({ 'resultados' : combined})
PDF Annotations
class ComentarioDocumento ( models . Model ):
documento = models.ForeignKey( 'Documento' , on_delete = models. CASCADE )
revision = models.ForeignKey( 'Revision' , on_delete = models. CASCADE )
# Position on PDF
pagina = models.IntegerField()
posicion_x = models.FloatField() # Percentage 0-100
posicion_y = models.FloatField() # Percentage 0-100
# Content
texto = models.TextField()
autor = models.ForeignKey(User, on_delete = models. CASCADE )
fecha = models.DateTimeField( auto_now_add = True )
# Thread
respuesta_a = models.ForeignKey( 'self' , on_delete = models. CASCADE ,
null = True , blank = True ,
related_name = 'respuestas' )
Interactive PDF Viewer
Display PDF with clickable comment pins:
// Add comment pin to PDF
function addComment ( pageNum , x , y ) {
const comentario = {
documento_id: documentoId ,
pagina: pageNum ,
posicion_x: ( x / pageWidth ) * 100 ,
posicion_y: ( y / pageHeight ) * 100 ,
texto: commentText
};
fetch ( '/documentos/comentarios/' , {
method: 'POST' ,
headers: {
'Content-Type' : 'application/json' ,
'X-CSRFToken' : csrfToken
},
body: JSON . stringify ( comentario )
});
}
AI Chat Assistant
N8nChatHistory (Chat History)
class N8nChatHistory ( models . Model ):
documento = models.ForeignKey( 'Documento' , on_delete = models. CASCADE )
usuario = models.ForeignKey(User, on_delete = models. CASCADE )
pregunta = models.TextField()
respuesta = models.TextField()
fecha = models.DateTimeField( auto_now_add = True )
session_id = models.CharField( max_length = 100 )
Chat Interface
Ask questions about document content:
def chat_documento ( request , documento_id ):
documento = Documento.objects.get( id = documento_id)
pregunta = request. POST .get( 'pregunta' )
# Send to n8n chat workflow
response = requests.post(
settings. N8N_CHAT_WEBHOOK_URL ,
json = {
'documento_id' : documento.id,
'pregunta' : pregunta,
'contexto' : documento.ultima_revision.texto_extraido[: 2000 ]
}
)
respuesta = response.json()[ 'respuesta' ]
# Save to history
N8nChatHistory.objects.create(
documento = documento,
usuario = request.user,
pregunta = pregunta,
respuesta = respuesta
)
return JsonResponse({ 'respuesta' : respuesta})
API Endpoints
Document Search Search with filters and AI
Upload & Processing Upload files and track processing
Annotations Manage PDF comments and pins
AI Chat Chat with document content
Best Practices
Naming Convention : Use consistent document codes (e.g., DWG-ELEC-001 for electrical drawings)
Large Files : Files over 50MB may timeout during processing - consider splitting
Metadata Quality : Rich metadata improves search accuracy significantly
Assets Link documents to equipment
Projects Associate with projects
n8n Integration Automation workflows