Quantcast
Channel: プログラミング
Viewing all articles
Browse latest Browse all 7831

【読書メモ】【LangChain完全入門】Chapter3 Retrieval - 未知のデータを扱えるようにする - yagibrary

$
0
0

02 与えたPDFをもとに回答するチャットボットを作る

pip install chromadb==0.5.3

0.5.4だと上手くいきませんでした。
参考記事
github.com

chat_3.py

import chainlit as cl
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
from langchain.text_splitter import SpacyTextSplitter
from langchain_community.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(
  model_name="oshizo/sbert-jsnli-luke-japanese-base-lite"
)

chat = ChatGroq(model_name="llama3-70b-8192")

prompt = PromptTemplate(template="""文章を元に日本語で質問に答えてください。文章:{document}質問: {query}""", input_variables=["document", "query"])

text_splitter = SpacyTextSplitter(chunk_size=300, pipeline="ja_core_news_sm")

@cl.on_chat_startasyncdefon_chat_start():
  files = Nonewhile files == None:
      files = await cl.AskFileMessage(
          content="Please upload a pdf file to begin!", accept=["application/pdf"]
      ).send()

  file = files[0]

  await cl.Message(
      content=f"`{file.name}` uploaded"
  ).send()

  documents = PyMuPDFLoader(file.path).load()
  splitted_documents = text_splitter.split_documents(documents)

  database = Chroma(
    embedding_function=embeddings,
    # 今回はpersist_directoryを指定しないことでデータベースの永続化を行わない
  )

  database.add_documents(splitted_documents)

  cl.user_session.set(
    "database",
    database
  )

@cl.on_messageasyncdefon_message(input_message):
  message_content = input_message.content
  print("入力されたメッセージ: " + message_content)
  database = cl.user_session.get("database")

  documents = database.similarity_search(message_content)

  documents_string = ""for document in documents:
    documents_string += f"""  -------------------------  {document.page_content}"""

  result = chat([
    HumanMessage(content=prompt.format(document=documents_string, query=message_content))
  ])
  await cl.Message(content=result.content).send()


03 RetrievalQAを使ってQAシステムの構築を楽にする

query_3.py

from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
from langchain_community.vectorstores import Chroma

chat = ChatGroq(model_name="llama3-70b-8192")

embeddings = HuggingFaceEmbeddings(
  model_name="oshizo/sbert-jsnli-luke-japanese-base-lite"
)

database = Chroma(
  persist_directory="./.data",
  embedding_function=embeddings
)

retriever = database.as_retriever()

qa = RetrievalQA.from_llm(
  llm=chat,
  retriever=retriever,
  return_source_documents=True
)

result = qa.invoke("飛行車の最高速度を教えて")

print(result["result"])

print(result["source_documents"])

Viewing all articles
Browse latest Browse all 7831

Trending Articles