Jun-07-2023, 06:02 PM
I am trying to use langchain to query a pdf document with chatgpt.
import os import openai from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'YourAPIKey') # Initialize OpenAI API openai.api_key = OPENAI_API_KEY # Load the PDF documents loader = PyPDFLoader("ALJDecision.pdf") Don't really understand what is going wrong here. data = loader.load() print(f'You have {len(data)} document(s) in your data') if len(data) >= 31: print(f'There are {len(data[30].page_content)} characters in your document') else: print("Data does not have an element at index 30") # Split the documents into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0) texts = text_splitter.split_documents(data) print(f'Now you have {len(texts)} documents') # Index the documents index = {} for i, t in enumerate(texts): response = openai.Completion.create( engine="davinci", prompt=t.page_content, max_tokens=64, temperature=0.5, top_p=1.0, n=1, stop=None ) text = response.choices[0].get("text") # Get the generated text index[str(i)] = text # Query the index query = "Why did the judge deny this claim for social security disability?" response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": query}], max_tokens=128, temperature=0.5, top_p=1.0, n=1, stop=None ) query_text = response.choices[0].get("message").get("content") # Get the generated text # Perform the search results = [] for doc_id, doc_content in index.items(): if query_text in doc_content: results.append(doc_id) # Print the results for doc_id in results: print(doc_id)
Error:You have 19 document(s) in your data
Data does not have an element at index 30
Now you have 31 documents