Adding a vector database like Pinecone, Weaviate, or Milvus can significantly improve performance and scalability when working with embeddings. These databases are optimized for large-scale vector storage and similarity searches, offering features like persistent storage, indexing, and distributed querying.
Here's how integrating a vector database can enhance performance in your current workflow:
Persistent Storage:
embeddings.json
).Efficient Similarity Search:
Scalability:
Distributed Queries:
Metadata Support:
Load Knowledge Base and Create Embeddings:
Search for Relevant Chunks:
Generate the Final Answer:
Pinecone is a managed vector database service, making it simple to use without managing your own infrastructure.
pip install pinecone-client
import pinecone
import numpy as np
import os
import requests
def initialize_pinecone(api_key, environment, index_name, dimension):
"""Initialize Pinecone and create an index if it doesn't exist."""
pinecone.init(api_key=api_key, environment=environment)
if index_name not in pinecone.list_indexes():
pinecone.create_index(name=index_name, dimension=dimension)
return pinecone.Index(index_name)
def create_embedding(text, api_key):
"""Create an embedding using the OpenAI API."""
api_url = "https://api.openai.com/v1/embeddings"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "text-embedding-ada-002",
"input": text
}
response = requests.post(api_url, headers=headers, json=payload, timeout=120)
if response.status_code == 200:
data = response.json()
return np.array(data["data"][0]["embedding"], dtype="float32")
else:
print(f"Error in embedding creation: {response.status_code}, {response.text}")
return None
def load_and_chunk_knowledge_base_with_overlap(file_path, chunk_size=50, overlap=10):
"""Load and chunk the knowledge base into word chunks with overlap."""
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Split the text into words
words = content.split()
# Create chunks with overlap
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = ' '.join(words[i:i + chunk_size])
if chunk.strip(): # Ignore empty chunks
chunks.append(chunk)
return chunks
def index_embeddings(chunks, pinecone_index, api_key):
"""Create embeddings for chunks and store them in Pinecone."""
print("Indexing embeddings in Pinecone...")
for idx, chunk in enumerate(chunks):
embedding = create_embedding(chunk, api_key)
if embedding is not None:
# Upsert embedding with metadata
pinecone_index.upsert([(f"chunk-{idx}", embedding.tolist(), {"text": chunk})])
def query_relevant_chunks(query, pinecone_index, api_key, top_k=3):
"""Query Pinecone for the most relevant chunks."""
query_embedding = create_embedding(query, api_key)
if query_embedding is not None:
results = pinecone_index.query(query_embedding.tolist(), top_k=top_k, include_metadata=True)
return [result["metadata"]["text"] for result in results["matches"]]
return []
def generate_chat_response(conversation_history, api_key):
"""Generate a chat response using the OpenAI API."""
api_url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": conversation_history,
"temperature": 0.7
}
response = requests.post(api_url, headers=headers, json=payload, timeout=120)
if response.status_code == 200:
data = response.json()
if "choices" in data:
return data["choices"][0]["message"]["content"]
else:
print(f"Error in chat response: {response.status_code}, {response.text}")
return None
if __name__ == "__main__":
# Configuration
knowledge_base_file = "kn/kn.txt2"
api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_env = os.getenv("PINECONE_ENV")
index_name = "knowledge-base"
# Initialize Pinecone
pinecone_index = initialize_pinecone(pinecone_api_key, pinecone_env, index_name, dimension=1536)
# Load and chunk knowledge base
chunks = load_and_chunk_knowledge_base_with_overlap(knowledge_base_file, chunk_size=50, overlap=10)
# Index embeddings (comment this out if already indexed)
index_embeddings(chunks, pinecone_index, api_key)
# Query example
query = "What is the key topic in this text?"
relevant_chunks = query_relevant_chunks(query, pinecone_index, api_key, top_k=3)
# Prepare conversation history
conversation_history = [{"role": "system", "content": chunk} for chunk in relevant_chunks]
conversation_history.append({"role": "user", "content": query})
# Generate chat response
response = generate_chat_response(conversation_history, api_key)
print("\nFinal Answer:")
print(response)
Integration with Pinecone:
Efficient Embedding Storage:
Metadata Handling:
Query Performance:
Initialization:
Indexing:
Querying:
Response Generation:
Persistent Storage:
Metadata Integration:
Cloud-Based Scalability:
Ease of Use:
This updated solution improves performance, simplifies scalability, and supports large knowledge bases efficiently.