SingleStore Notebooks
New
Launch Open-Source Apps with LangChain
Notebook
In [1]:
%%writefile requirements.txtlangchain==0.0.339openai==1.3.3pdf2image==1.0.0pdfminer==20191125pdfminer.six==20221105pillow_heif==0.13.1tabulate==0.9.0tiktoken==0.5.1unstructured==0.11.0opencv-contrib-python-headless==4.8.1.78unstructured.pytesseract==0.3.12unstructured.inference==0.7.15
In [2]:
%pip install -r requirements.txt --quiet
In [3]:
from langchain.document_loaders import OnlinePDFLoaderloader = OnlinePDFLoader("http://leavcom.com/pdf/DBpdf.pdf")data = loader.load()
In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitterprint (f"You have {len(data)} document(s) in your data")print (f"There are {len(data[0].page_content)} characters in your document")
In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)texts = text_splitter.split_documents(data)print (f"You have {len(texts)} pages")
In [6]:
%%sqlDROP DATABASE IF EXISTS pdf_db;CREATE DATABASE IF NOT EXISTS pdf_db;
Action Required
Make sure to select the pdf_db database from the drop-down menu at the top of this notebook. It updates the connection_url to connect to that database.
In [7]:
%%sqlDROP TABLE IF EXISTS pdf_docs1;CREATE TABLE IF NOT EXISTS pdf_docs1 (id INT PRIMARY KEY,content TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,vector BLOB);
In [8]:
import osimport getpassos.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
In [9]:
import jsonimport sqlalchemy as safrom langchain.embeddings import OpenAIEmbeddingsfrom singlestoredb import create_engineconn = create_engine().connect()embedder = OpenAIEmbeddings()# Fetch all embeddings in one callembeddings = embedder.embed_documents([doc.page_content for doc in texts])# Build query parametersparams = []for i, (text_content, embedding) in enumerate(zip(texts, embeddings)):params.append(dict(id=i+1, content=text_content, vector=json.dumps(embedding)))stmt = sa.text("""INSERT INTO pdf_docs1 (id,content,vector)VALUES (:id,:content,JSON_ARRAY_PACK_F32(:vector))""")conn.execute(stmt, params)
In [10]:
%%sqlSELECT JSON_ARRAY_UNPACK_F32(vector) as vectorFROM pdf_docs1LIMIT 1;
In [11]:
query_text = "Will object-oriented databases be commercially successful?"query_embedding = embedder.embed_documents([query_text])[0]stmt = sa.text("""SELECTcontent,DOT_PRODUCT_F32(JSON_ARRAY_PACK_F32(:embedding), vector) AS scoreFROM pdf_docs1ORDER BY score DESCLIMIT 1""")results = conn.execute(stmt, dict(embedding=json.dumps(query_embedding)))for row in results:print(row[0])
In [12]:
import openaiclient = openai.OpenAI()prompt = f"The user asked: {query_text}. The most similar text from the document is: {row[0]}"response = client.chat.completions.create(model="gpt-3.5-turbo",messages=[{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": prompt}])print(response.choices[0].message.content)
Clean up
In [13]:
%%sqlDROP DATABASE IF EXISTS pdf_db
Details
About this Template
LangChain connector to use SingleStoreDB as your vector database for your apps.
This Notebook can be run in Standard and Enterprise deployments.
Tags
vectordbgenailangchain
License
This Notebook has been released under the Apache 2.0 open source license.