|
| 1 | +import uuid |
| 2 | + |
| 3 | +from ..knowledge_base import KnowledgeBase |
| 4 | +from .base import GenerateFromSingleQuestionMixin, _LLMBasedQuestionGenerator |
| 5 | +from .prompt import QAGenerationPrompt |
| 6 | + |
| 7 | +OOS_PROMPT = """ |
| 8 | +You are a powerful auditor and mindful judger, your role is to generate question from a given context and |
| 9 | +add some fake or non-existing details to the context to check whether the agent you are auditing is capable of answering questions |
| 10 | +which have no direct answer in the provided context. |
| 11 | +
|
| 12 | +The agent you are auditing is desribed bellow: |
| 13 | +{agent_description} |
| 14 | +
|
| 15 | +There are your tasks, you should finish them step by step: |
| 16 | +1. Select one fact from the context. |
| 17 | +2. Imagine some fake details not present in the whole provided context but should be plausible based on the detail you selected in the previous step. |
| 18 | +3. Isolate this fake detail into a single sentence. |
| 19 | +4. Generate an open question asking about this new detail, make sure the question is relevant and can not be answered by the information in the context. |
| 20 | +
|
| 21 | +The generated question should be in the following language: {language} |
| 22 | +
|
| 23 | +You will first be provided with an example, followed by the user input. Read the example thoroughly and take inspiration of it but do not use information or name from the example in your answers. |
| 24 | +You will return the isolated detail/fact and the question based exclusively on the new added isolated context. |
| 25 | +You must output a single JSON object with keys 'selected_fact', 'fake_fact' and 'question' , without any other wrapping text or markdown and everything is in low letter. Make sure you only return valid JSON. |
| 26 | +""" |
| 27 | + |
| 28 | +OOS_QUESTION_EXAMPLE_INPUT = """ |
| 29 | +Paul usually go to the market at 8:00 AM. He starts with the grocery store and then goes to the bakery. |
| 30 | +He enjoy buying a fresh baguette every morning at the bakery. The bakery is located at the corner of his street.""" |
| 31 | + |
| 32 | +OOS_QUESTION_EXAMPLE_OUTPUT = """ |
| 33 | +{ |
| 34 | + "selected_fact": "Paul likes to buy a baguette every day.", |
| 35 | + "fake_fact": "Paul Graham pays 1 euro for a baguette", |
| 36 | + "question": "How much does Paul pay for his baguette?" |
| 37 | +} |
| 38 | +""" |
| 39 | + |
| 40 | +DUMMY_ANSWER = "This question can not be answered by the context. No sufficient information is provided in the context to answer this question." |
| 41 | + |
| 42 | + |
| 43 | +class OutOfScopeGenerator(GenerateFromSingleQuestionMixin, _LLMBasedQuestionGenerator): |
| 44 | + """ |
| 45 | + Out of Knowledge Base question generator that generates questions from a KnowledgeBase. |
| 46 | +
|
| 47 | + Parameters |
| 48 | + ---------- |
| 49 | + context_neighbors: int, optional |
| 50 | + Number of context neighbors to use for question generation. |
| 51 | + context_similarity_threshold: float, optional |
| 52 | + Similarity threshold to keep neighboring document during question generation. |
| 53 | + context_window_length: int, optional |
| 54 | + Context window length of the llm used in the `llm_client` of the generator. |
| 55 | + llm_client: LLMClient, optional |
| 56 | + The LLM client to use for question generation. If not specified, a default openai client will be used. |
| 57 | + llm_temperature: float, optional |
| 58 | + The temperature to use in the LLM for question generation. The default is 0.5. |
| 59 | + """ |
| 60 | + |
| 61 | + _OOS_question_generation_prompt = QAGenerationPrompt( |
| 62 | + system_prompt=OOS_PROMPT, |
| 63 | + example_input=OOS_QUESTION_EXAMPLE_INPUT, |
| 64 | + example_output=OOS_QUESTION_EXAMPLE_OUTPUT, |
| 65 | + ) |
| 66 | + |
| 67 | + _question_type = "out of scope" |
| 68 | + |
| 69 | + def generate_single_question(self, knowledge_base: KnowledgeBase, agent_description: str, language: str) -> dict: |
| 70 | + """ |
| 71 | + Generate a question from a list of context documents. |
| 72 | +
|
| 73 | + Parameters |
| 74 | + ---------- |
| 75 | + knowledge_base: KnowledgeBase |
| 76 | + The knowledge base to generate the question from. |
| 77 | + agent_description: str |
| 78 | + The description of the agent to generate the question for. |
| 79 | + language: str |
| 80 | + The language to generate the question in. |
| 81 | +
|
| 82 | + Returns |
| 83 | + ------- |
| 84 | + Tuple[dict, dict] |
| 85 | + The generated question and the metadata of the question. |
| 86 | + """ |
| 87 | + seed_document = knowledge_base.get_random_document() |
| 88 | + |
| 89 | + context_documents = knowledge_base.get_neighbors( |
| 90 | + seed_document, self._context_neighbors, self._context_similarity_threshold |
| 91 | + ) |
| 92 | + |
| 93 | + reference_context = "\n\n".join([f"Document {doc.id}: {doc.content}" for doc in context_documents]) |
| 94 | + |
| 95 | + # setup the OOKB question generation prompt |
| 96 | + question_messages = self._OOS_question_generation_prompt.to_messages( |
| 97 | + system_prompt_input={"agent_description": agent_description, "language": language}, |
| 98 | + user_input=seed_document.content, |
| 99 | + ) |
| 100 | + |
| 101 | + generated_qa = self._llm_complete(messages=question_messages) |
| 102 | + |
| 103 | + question_metadata = { |
| 104 | + "question_type": self._question_type, |
| 105 | + "seed_document_id": seed_document.id, |
| 106 | + "fake_fact": generated_qa["fake_fact"], |
| 107 | + } |
| 108 | + |
| 109 | + question = { |
| 110 | + "id": str(uuid.uuid4()), |
| 111 | + "question": generated_qa["question"], |
| 112 | + "reference_answer": DUMMY_ANSWER, |
| 113 | + "reference_context": reference_context, |
| 114 | + "conversation_history": [], |
| 115 | + "metadata": question_metadata, |
| 116 | + } |
| 117 | + |
| 118 | + return question |
| 119 | + |
| 120 | + |
| 121 | +oos_questions = OutOfScopeGenerator() |
0 commit comments