Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/agentscope/plan/_in_memory_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ def __init__(self) -> None:
super().__init__()
self.plans = OrderedDict()

# Support historical plan serialization and deserialization
self.register_state(
"plans",
lambda plans: {k: v.model_dump() for k, v in plans.items()},
lambda json_data: OrderedDict(
(k, Plan.model_validate(v)) for k, v in json_data.items()
),
)

async def add_plan(self, plan: Plan, override: bool = True) -> None:
"""Add a plan to the storage.
Expand Down
29 changes: 22 additions & 7 deletions src/agentscope/plan/_plan_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class SubTask(BaseModel):
)
outcome: str | None = Field(
description="The actual outcome of the subtask.",
exclude=True,
default=None,
)
state: Literal["todo", "in_progress", "done", "abandoned"] = Field(
Expand All @@ -48,7 +47,6 @@ class SubTask(BaseModel):
finished_at: str | None = Field(
description="The time the subtask was finished.",
default=None,
exclude=True,
)

def finish(self, outcome: str) -> None:
Expand Down Expand Up @@ -106,7 +104,7 @@ def to_markdown(self, detailed: bool = False) -> str:
class Plan(BaseModel):
"""The plan model used in the plan module, contains a list of subtasks."""

id: str = Field(exclude=True, default_factory=shortuuid.uuid)
id: str = Field(default_factory=shortuuid.uuid)
name: str = Field(
description=(
"The plan name, should be concise, descriptive and not exceed 10 "
Expand All @@ -133,24 +131,41 @@ class Plan(BaseModel):
created_at: str = Field(
description="The time the plan was created.",
default_factory=_get_timestamp,
exclude=True,
)
state: Literal["todo", "in_progress", "done", "abandoned"] = Field(
description="The state of the plan.",
default="todo",
exclude=True,
)
finished_at: str | None = Field(
description="The time the plan was finished.",
default=None,
exclude=True,
)
outcome: str | None = Field(
description="The actual outcome of the plan.",
default=None,
exclude=True,
)

def refresh_plan_state(self) -> str:
"""Refresh the plan state based on the states of its subtasks. This
function only switches the plan state between "todo" and "in_progress".
# TODO: Handle the plan state much more formally.
"""
if self.state in ["done", "abandoned"]:
return ""

any_in_progress = any(_.state == "in_progress" for _ in self.subtasks)

if any_in_progress and self.state == "todo":
self.state = "in_progress"
return "The plan state has been updated to 'in_progress'."

elif not any_in_progress and self.state == "in_progress":
self.state = "todo"
return "The plan state has been updated to 'todo'."

return ""

def finish(
self,
state: Literal["done", "abandoned"],
Expand Down
15 changes: 11 additions & 4 deletions src/agentscope/plan/_plan_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ async def revise_current_plan(
async def update_subtask_state(
self,
subtask_idx: int,
state: Literal["todo", "in_progress", "deprecated"],
state: Literal["todo", "in_progress", "abandoned"],
) -> ToolResponse:
"""Update the state of a subtask by given index and state. Note if you
want to mark a subtask as done, you SHOULD call `finish_subtask`
Expand Down Expand Up @@ -487,7 +487,7 @@ async def update_subtask_state(
# Check all previous subtasks are done or deprecated
if idx < subtask_idx and subtask.state not in [
"done",
"deprecated",
"abandoned",
]:
return ToolResponse(
content=[
Expand Down Expand Up @@ -520,14 +520,18 @@ async def update_subtask_state(
)

self.current_plan.subtasks[subtask_idx].state = state

# Update the plan state to in_progress if not already
suffix = self.current_plan.refresh_plan_state()

await self._trigger_plan_change_hooks()
return ToolResponse(
content=[
TextBlock(
type="text",
text=f"Subtask at index {subtask_idx}, named "
f"'{self.current_plan.subtasks[subtask_idx].name}' "
f"is marked as '{state}' successfully.",
f"is marked as '{state}' successfully. " + suffix,
),
],
)
Expand Down Expand Up @@ -585,7 +589,7 @@ async def finish_subtask(
for idx, subtask in enumerate(
self.current_plan.subtasks[0:subtask_idx],
):
if subtask.state not in ["done", "deprecated"]:
if subtask.state not in ["done", "abandoned"]:
return ToolResponse(
content=[
TextBlock(
Expand Down Expand Up @@ -815,12 +819,15 @@ def list_tools(
the agent.
"""
return [
# subtask related tools
self.view_subtasks,
self.update_subtask_state,
self.finish_subtask,
# plan related tools
self.create_plan,
self.revise_current_plan,
self.finish_plan,
# historical plan related tools
self.view_historical_plans,
self.recover_historical_plan,
]
Expand Down
37 changes: 27 additions & 10 deletions src/agentscope/rag/_reader/_word_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,41 @@
import base64
import hashlib
import json
from typing import Literal
from typing import Literal, TYPE_CHECKING

from docx.oxml import CT_P, CT_Tbl
from docx.table import Table
from docx.text.paragraph import Paragraph
from docx.oxml.ns import qn

from ._reader_base import ReaderBase
from ._text_reader import TextReader
from .._document import Document, DocMetadata
from ..._logging import logger
from ...message import ImageBlock, Base64Source, TextBlock

if TYPE_CHECKING:
from docx.table import Table as DocxTable
from docx.text.paragraph import Paragraph as DocxParagraph
else:
DocxTable = "docx.table.Table"
DocxParagraph = "docx.text.paragraph.Paragraph"

def _extract_text_from_paragraph(para: Paragraph) -> str:

def _extract_text_from_paragraph(para: DocxParagraph) -> str:
"""Extract text from a paragraph, including text in text boxes and shapes.
Args:
para: Paragraph object
para (`Paragraph`):
The paragraph object from which to extract text.
Returns:
str: Extracted text
`str`:
Extracted text
"""
text = ""

# Method 1: Extract all w:t elements directly from XML
# (handles revisions, hyperlinks, etc.)
from docx.oxml.ns import qn

for t_elem in para._element.findall(".//" + qn("w:t")):
if t_elem.text:
text += t_elem.text
Expand Down Expand Up @@ -62,7 +70,7 @@ def _extract_text_from_paragraph(para: Paragraph) -> str:
return text.strip()


def _extract_table_data(table: Table) -> list[list[str]]:
def _extract_table_data(table: DocxTable) -> list[list[str]]:
"""Extract table data, handling merged cells and preserving line breaks
within cells.
Expand All @@ -75,6 +83,8 @@ def _extract_table_data(table: Table) -> list[list[str]]:
Table data represented as a 2D list.
"""

from docx.oxml.ns import qn

table_data = []
# Extract table cell elements directly from XML
for tr in table._element.findall(qn("w:tr")):
Expand Down Expand Up @@ -105,7 +115,7 @@ def _extract_table_data(table: Table) -> list[list[str]]:
return table_data


def _extract_image_data(para: Paragraph) -> list[ImageBlock]:
def _extract_image_data(para: DocxParagraph) -> list[ImageBlock]:
"""Extract image data from a paragraph.
Args:
Expand All @@ -118,6 +128,8 @@ def _extract_image_data(para: Paragraph) -> list[ImageBlock]:
"""
images = []

from docx.oxml.ns import qn

# Method 1: Find all drawing elements (modern Word format)
drawings = para._element.findall(".//" + qn("w:drawing"))

Expand Down Expand Up @@ -334,6 +346,11 @@ def _get_data_blocks(self, word_path: str) -> list[TextBlock | ImageBlock]:
# Read the Word document
try:
from docx import Document as DocxDocument
from docx.oxml import CT_P, CT_Tbl
from docx.text.paragraph import Paragraph
from docx.table import Table
from docx.oxml.ns import qn

except ImportError as e:
raise ImportError(
"Please install python-docx to use the Word reader. "
Expand Down
Loading
Loading