Embeddings-Store-and-Search-Vector-Database-or-FAISS

Adding a vector database like Pinecone, Weaviate, or Milvus can significantly improve performance and scalability when working with embeddings. These databases are optimized for large-scale vector storage and similarity searches, offering features like persistent storage, indexing, and distributed querying.

Here's how integrating a vector database can enhance performance in your current workflow:

Benefits of Using a Vector Database

Workflow with a Vector Database

Example Integration with Pinecone

Pinecone is a managed vector database service, making it simple to use without managing your own infrastructure.

Install Pinecone

Updated Script

import pinecone
import numpy as np
import os
import requests


def initialize_pinecone(api_key, environment, index_name, dimension):
    """Initialize Pinecone and create an index if it doesn't exist."""
    pinecone.init(api_key=api_key, environment=environment)
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(name=index_name, dimension=dimension)
    return pinecone.Index(index_name)


def create_embedding(text, api_key):
    """Create an embedding using the OpenAI API."""
    api_url = "https://api.openai.com/v1/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "text-embedding-ada-002",
        "input": text
    }

    response = requests.post(api_url, headers=headers, json=payload, timeout=120)

    if response.status_code == 200:
        data = response.json()
        return np.array(data["data"][0]["embedding"], dtype="float32")
    else:
        print(f"Error in embedding creation: {response.status_code}, {response.text}")
        return None


def load_and_chunk_knowledge_base_with_overlap(file_path, chunk_size=50, overlap=10):
    """Load and chunk the knowledge base into word chunks with overlap."""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Split the text into words
    words = content.split()

    # Create chunks with overlap
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        if chunk.strip():  # Ignore empty chunks
            chunks.append(chunk)

    return chunks


def index_embeddings(chunks, pinecone_index, api_key):
    """Create embeddings for chunks and store them in Pinecone."""
    print("Indexing embeddings in Pinecone...")
    for idx, chunk in enumerate(chunks):
        embedding = create_embedding(chunk, api_key)
        if embedding is not None:
            # Upsert embedding with metadata
            pinecone_index.upsert([(f"chunk-{idx}", embedding.tolist(), {"text": chunk})])


def query_relevant_chunks(query, pinecone_index, api_key, top_k=3):
    """Query Pinecone for the most relevant chunks."""
    query_embedding = create_embedding(query, api_key)
    if query_embedding is not None:
        results = pinecone_index.query(query_embedding.tolist(), top_k=top_k, include_metadata=True)
        return [result["metadata"]["text"] for result in results["matches"]]
    return []


def generate_chat_response(conversation_history, api_key):
    """Generate a chat response using the OpenAI API."""
    api_url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-3.5-turbo",
        "messages": conversation_history,
        "temperature": 0.7
    }

    response = requests.post(api_url, headers=headers, json=payload, timeout=120)

    if response.status_code == 200:
        data = response.json()
        if "choices" in data:
            return data["choices"][0]["message"]["content"]
    else:
        print(f"Error in chat response: {response.status_code}, {response.text}")
        return None


if __name__ == "__main__":
    # Configuration
    knowledge_base_file = "kn/kn.txt2"
    api_key = os.getenv("OPENAI_API_KEY")
    pinecone_api_key = os.getenv("PINECONE_API_KEY")
    pinecone_env = os.getenv("PINECONE_ENV")
    index_name = "knowledge-base"

    # Initialize Pinecone
    pinecone_index = initialize_pinecone(pinecone_api_key, pinecone_env, index_name, dimension=1536)

    # Load and chunk knowledge base
    chunks = load_and_chunk_knowledge_base_with_overlap(knowledge_base_file, chunk_size=50, overlap=10)

    # Index embeddings (comment this out if already indexed)
    index_embeddings(chunks, pinecone_index, api_key)

    # Query example
    query = "What is the key topic in this text?"
    relevant_chunks = query_relevant_chunks(query, pinecone_index, api_key, top_k=3)

    # Prepare conversation history
    conversation_history = [{"role": "system", "content": chunk} for chunk in relevant_chunks]
    conversation_history.append({"role": "user", "content": query})

    # Generate chat response
    response = generate_chat_response(conversation_history, api_key)
    print("\nFinal Answer:")
    print(response)

Key Features in the Updated Script

Workflow

Advantages Over FAISS

This updated solution improves performance, simplifies scalability, and supports large knowledge bases efficiently.

Embeddings Store and Search Vector Database or FAISS

Benefits of Using a Vector Database

Workflow with a Vector Database

Example Integration with Pinecone

Install Pinecone

Updated Script

Key Features in the Updated Script

Workflow

Advantages Over FAISS