Qdrant 向量引擎¶
为什么要学 Qdrant¶
Qdrant 是一个高性能的开源向量搜索引擎,用 Rust 编写,以速度和可靠性著称。它支持精确的向量检索、丰富的过滤条件、有效载荷索引和分布式部署。对于需要在生产环境中处理大规模向量数据且对延迟敏感的应用(搜索、推荐、RAG),Qdrant 是性能最优的开源选择之一。
核心概念¶
| 概念 | 白话解释 | 用途 |
|---|---|---|
| Collection | 集合 | 存储向量点的容器 |
| Point | 点 | 一条记录(ID + 向量 + 有效载荷) |
| Payload | 有效载荷 | 附加在向量上的结构化数据 |
| Index | 索引 | 加速搜索的数据结构(HNSW) |
| Segment | 段 | 数据的物理存储单元 |
| Shard | 分片 | 分布式部署的数据分区 |
| Snapshot | 快照 | 数据备份 |
安装配置¶
Docker 部署(推荐)¶
Docker Compose¶
version: '3.8'
services:
qdrant:
image: qdrant/qdrant:latest
ports:
- "6333:6333" # HTTP API
- "6334:6334" # gRPC
volumes:
- qdrant-data:/qdrant/storage
environment:
- QDRANT__SERVICE__GRPC_PORT=6334
- QDRANT__SERVICE__HTTP_PORT=6333
- QDRANT__STORAGE__STORAGE_PATH=/qdrant/storage
- QDRANT__SERVICE__API_KEY=your-api-key
restart: unless-stopped
volumes:
qdrant-data:
Python 客户端¶
本地内存模式(开发用)¶
from qdrant_client import QdrantClient
# 内存模式(不需要服务器)
client = QdrantClient(":memory:")
# 持久化到本地
client = QdrantClient(path="./qdrant_local")
# 连接远程服务
client = QdrantClient(
host="localhost", port=6333,
api_key="your-api-key"
)
快速上手¶
创建集合¶
from qdrant_client import QdrantClient, models
client = QdrantClient("localhost", port=6333)
# 创建集合
client.create_collection(
collection_name="articles",
vectors_config=models.VectorParams(
size=384, # 向量维度
distance=models.Distance.COSINE # 距离度量
)
)
添加数据¶
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
documents = [
{"title": "Python入门", "content": "Python是一种简洁的编程语言", "category": "python"},
{"title": "Rust性能", "content": "Rust提供内存安全的高性能编程", "category": "rust"},
{"title": "Go并发", "content": "Go语言以其简洁的并发模型著称", "category": "go"},
]
# 计算向量
vectors = model.encode([d["content"] for d in documents])
# 上传
client.upsert(
collection_name="articles",
points=[
models.PointStruct(
id=i,
vector=vectors[i].tolist(),
payload=documents[i]
)
for i in range(len(documents))
]
)
搜索¶
# 语义搜索
query_vector = model.encode("高性能编程语言")
results = client.query_points(
collection_name="articles",
query=query_vector.tolist(),
limit=3
)
for point in results.points:
print(f"Score: {point.score:.4f} - {point.payload['title']}")
带过滤的搜索¶
results = client.query_points(
collection_name="articles",
query=query_vector.tolist(),
query_filter=models.Filter(
must=[
models.FieldCondition(
key="category",
match=models.MatchValue(value="python")
)
]
),
limit=5
)
进阶用法¶
复杂过滤¶
# 组合条件
filter = models.Filter(
must=[
models.FieldCondition(key="year", range=models.Range(gte=2023)),
],
should=[
models.FieldCondition(key="category", match=models.MatchValue(value="python")),
models.FieldCondition(key="category", match=models.MatchValue(value="rust")),
],
must_not=[
models.FieldCondition(key="status", match=models.MatchValue(value="draft")),
]
)
Payload 索引¶
# 创建索引加速过滤
client.create_payload_index(
collection_name="articles",
field_name="category",
field_schema=models.PayloadSchemaType.KEYWORD
)
client.create_payload_index(
collection_name="articles",
field_name="year",
field_schema=models.PayloadSchemaType.INTEGER
)
命名向量(多向量)¶
# 一个对象存储多个向量(如标题向量和内容向量)
client.create_collection(
collection_name="multi_vec",
vectors_config={
"title": models.VectorParams(size=384, distance=models.Distance.COSINE),
"content": models.VectorParams(size=768, distance=models.Distance.COSINE),
}
)
# 添加多向量数据
client.upsert(
collection_name="multi_vec",
points=[models.PointStruct(
id=1,
vector={
"title": title_vec.tolist(),
"content": content_vec.tolist(),
},
payload={"title": "文章标题", "text": "文章内容"}
)]
)
# 指定用哪个向量搜索
results = client.query_points(
collection_name="multi_vec",
query=query_vec.tolist(),
using="content",
limit=5
)
量化压缩¶
# 标量量化(减少内存,轻微精度损失)
client.create_collection(
collection_name="quantized",
vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE),
quantization_config=models.ScalarQuantization(
scalar=models.ScalarQuantizationConfig(
type=models.ScalarType.INT8,
quantile=0.99,
always_ram=True
)
)
)
分布式部署¶
# docker-compose-cluster.yml
version: '3.8'
services:
qdrant-node-1:
image: qdrant/qdrant:latest
ports:
- "6333:6333"
- "6335:6335" # P2P
environment:
- QDRANT__CLUSTER__ENABLED=true
- QDRANT__CLUSTER__P2P__PORT=6335
volumes:
- node1-data:/qdrant/storage
qdrant-node-2:
image: qdrant/qdrant:latest
ports:
- "6336:6333"
- "6337:6335"
environment:
- QDRANT__CLUSTER__ENABLED=true
- QDRANT__CLUSTER__P2P__PORT=6335
- QDRANT__CLUSTER__P2P__BOOTSTRAP=http://qdrant-node-1:6335
volumes:
- node2-data:/qdrant/storage
快照与备份¶
# 创建快照
snapshot = client.create_snapshot(collection_name="articles")
print(f"Snapshot: {snapshot.name}")
# 列出快照
snapshots = client.list_snapshots(collection_name="articles")
# 恢复(通过 REST API)
# PUT /collections/{collection}/snapshots/{snapshot}
常见问题¶
Q: Qdrant vs Chroma vs Weaviate?¶
- Qdrant:Rust 编写,性能最优,过滤能力强,适合大规模生产
- Chroma:Python 原生,最简单易用,适合原型和中小规模
- Weaviate:功能最丰富(混合搜索、生成式等),适合全栈 AI 平台
Q: 内存消耗大怎么办?¶
- 启用标量量化(减少4倍内存)
- 配置
on_disk存储向量 - 使用
memmap将数据映射到磁盘
Q: 支持多少量级的数据?¶
单节点可处理数百万到千万级向量。通过分片和集群可扩展到更大规模。
Q: 如何集成到 LangChain?¶
from langchain_qdrant import QdrantVectorStore
vectorstore = QdrantVectorStore.from_documents(
documents, embeddings,
url="http://localhost:6333",
collection_name="langchain_docs"
)
参考资源¶
- GitHub:https://github.com/qdrant/qdrant
- 文档:https://qdrant.tech/documentation/
- API Reference:https://api.qdrant.tech/
- Python Client:https://github.com/qdrant/qdrant-client
- 教程:https://qdrant.tech/documentation/tutorials/
- Discord:https://discord.gg/qdrant