Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class BaseTableStructureOptions(BaseOptions):


class TableStructureOptions(BaseTableStructureOptions):
"""Configuration for table structure extraction using the TableFormer model."""
"""Options for the table structure (TableFormer V1 and V2)."""

kind: ClassVar[str] = "docling_tableformer"
do_cell_matching: Annotated[
Expand All @@ -118,6 +118,18 @@ class TableStructureOptions(BaseTableStructureOptions):
] = TableFormerMode.ACCURATE


class TableStructureV2Options(BaseTableStructureOptions):
"""Options for the table structure (TableFormer V2)."""

kind: ClassVar[str] = "docling_tableformer_v2"
do_cell_matching: bool = (
True
# True: Matches predictions back to PDF cells. Can break table output if PDF cells
# are merged across table columns.
# False: Let table structure model define the text cells, ignore PDF cells.
)


class OcrOptions(BaseOptions):
"""OCR options."""

Expand Down
4 changes: 4 additions & 0 deletions docling/models/plugins/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,13 @@ def table_structure_engines():
from docling.models.stages.table_structure.table_structure_model import (
TableStructureModel,
)
from docling.models.stages.table_structure.table_structure_model_v2 import (
TableStructureModelV2,
)

return {
"table_structure_engines": [
TableStructureModel,
TableStructureModelV2,
]
}
83 changes: 83 additions & 0 deletions docling/models/stages/table_structure/table_structure_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,86 @@ def predict_tables(
predictions.append(table_prediction)

return predictions

def _do_prediction_on_image_to_table(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method has no call sites in docling. We should remove it here and put it where it is needed.

self,
*,
table_image, # PIL.Image.Image - table image cropped out of the page
table_cluster, # Cluster - contains bbox and text-cells in page coordinates
page_no: int,
) -> Table:
img_width = table_image.width
img_height = table_image.height

bbox_width = table_cluster.bbox.r - table_cluster.bbox.l
# Infer scale from the ratio of image width to table bbox width
scale = img_width / bbox_width if bbox_width > 0 else self.scale

# The table box spans the entire cropped image
tbl_box = [0.0, 0.0, float(img_width), float(img_height)]

# Translate cell coordinates from page space to image-local space
tokens = []
for c in table_cluster.cells:
if len(c.text.strip()) > 0:
new_cell = copy.deepcopy(c)
cell_bbox = new_cell.rect.to_bounding_box()
local_bbox = BoundingBox(
l=(cell_bbox.l - table_cluster.bbox.l) * scale,
t=(cell_bbox.t - table_cluster.bbox.t) * scale,
r=(cell_bbox.r - table_cluster.bbox.l) * scale,
b=(cell_bbox.b - table_cluster.bbox.t) * scale,
coord_origin=cell_bbox.coord_origin,
)
new_cell.rect = BoundingRectangle.from_bounding_box(local_bbox)
tokens.append(
{
"id": new_cell.index,
"text": new_cell.text,
"bbox": new_cell.rect.to_bounding_box().model_dump(),
}
)

page_input = {
"width": img_width,
"height": img_height,
"image": numpy.asarray(table_image),
"tokens": tokens,
}

tf_output = self.tf_predictor.multi_table_predict(
page_input, [tbl_box], do_matching=self.do_cell_matching
)
table_out = tf_output[0]
table_cells = []
for element in table_out["tf_responses"]:
if not self.do_cell_matching:
# No page backend available to retrieve text; leave token empty
element["bbox"]["token"] = ""

tc = TableCell.model_validate(element)
if tc.bbox is not None:
# Convert bbox from image-local space back to page coordinates
tc.bbox = BoundingBox(
l=tc.bbox.l / scale + table_cluster.bbox.l,
t=tc.bbox.t / scale + table_cluster.bbox.t,
r=tc.bbox.r / scale + table_cluster.bbox.l,
b=tc.bbox.b / scale + table_cluster.bbox.t,
coord_origin=tc.bbox.coord_origin,
)
table_cells.append(tc)

num_rows = table_out["predict_details"].get("num_rows", 0)
num_cols = table_out["predict_details"].get("num_cols", 0)
otsl_seq = table_out["predict_details"].get("prediction", {}).get("rs_seq", [])

return Table(
otsl_seq=otsl_seq,
table_cells=table_cells,
num_rows=num_rows,
num_cols=num_cols,
id=table_cluster.id,
page_no=page_no,
cluster=table_cluster,
label=table_cluster.label,
)
Loading