Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/perserve-styles.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optionally preserve styles with the same id of appended documents. [buchi]
10 changes: 9 additions & 1 deletion docxcompose/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def setup_parser():
help="path to the output file",
metavar="file",
)
parser.add_argument(
"--preserve-styles",
action="store_true",
default=False,
)
return parser


Expand All @@ -46,7 +51,10 @@ def parse_args(parser, args):


def compose_files(parser, parsed_args):
composer = Composer(Document(parsed_args.master))
options = {
"preserve_styles": parsed_args.preserve_styles,
}
composer = Composer(Document(parsed_args.master), **options)
for slave_path in parsed_args.files:
composer.append(Document(slave_path))

Expand Down
90 changes: 75 additions & 15 deletions docxcompose/composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

from docxcompose.image import ImageWrapper
from docxcompose.properties import CustomProperties
from docxcompose.utils import increment_name
from docxcompose.utils import NS
from docxcompose.utils import xml_elements_equal
from docxcompose.utils import xpath


Expand All @@ -34,13 +36,22 @@
RT.FOOTNOTES,
]

IGNORED_STYLE_TAGS = set(
[
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}name",
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}rsid",
]
)


class Composer(object):
def __init__(self, doc):
def __init__(self, doc, preserve_styles=False):
self.doc = doc
self.pkg = doc.part.package

self.restart_numbering = True
self.preserve_styles = preserve_styles
self._preserved_styles = {}

self.reset_reference_mapping()

Expand All @@ -59,6 +70,7 @@ def append(self, doc, remove_property_fields=True):
def insert(self, index, doc, remove_property_fields=True):
"""Insert the given document at the given index."""
self.reset_reference_mapping()
self._current_preserved_styles = {}

# Remove custom property fields but keep the values
if remove_property_fields:
Expand Down Expand Up @@ -299,24 +311,61 @@ def add_styles(self, doc, element):

for style_id in used_style_ids:
our_style_id = self.mapped_style_id(style_id)
if our_style_id not in our_style_ids:
# To preserve styles with the same id from added documents, we
# create a copy and append a suffix to the id and name.
if self.preserve_styles and our_style_id in our_style_ids:
if our_style_id not in self._current_preserved_styles:
style_element = deepcopy(doc.styles.element.get_by_id(style_id))
our_style_element = self.doc.styles.element.get_by_id(our_style_id)

# Check if we already have an identical style
preserved_style_ids = self._preserved_styles.get(
our_style_id, [our_style_id]
)
matched_style_id = None
for pstyle_id in preserved_style_ids:
our_style_element = self.doc.styles.element.get_by_id(pstyle_id)
if xml_elements_equal(
style_element,
our_style_element,
ignored_tags=IGNORED_STYLE_TAGS,
):
matched_style_id = pstyle_id
self._current_preserved_styles[our_style_id] = (
style_element.styleId
)
break
# No matching style found, insert style with a new name
if matched_style_id is None:
new_id = increment_name(our_style_id)
new_name = None
if style_element.name is not None:
new_name = increment_name(style_element.name.val)
while new_id in our_style_ids:
new_id = increment_name(new_id)
if new_name is not None:
new_name = increment_name(new_name)
style_element.styleId = new_id
if new_name is not None:
style_element.name.val = new_name
self.doc.styles.element.append(style_element)
self.add_numberings(doc, style_element)
self.add_linked_styles(doc, style_element)
self._current_preserved_styles[our_style_id] = new_id
self._preserved_styles.setdefault(
our_style_id, [our_style_id]
).append(new_id)
else:
self._current_preserved_styles[our_style_id] = matched_style_id

for el in xpath(element, ".//w:tblStyle|.//w:pStyle|.//w:rStyle"):
el.val = self._current_preserved_styles[our_style_id]
elif our_style_id not in our_style_ids:
style_element = deepcopy(doc.styles.element.get_by_id(style_id))
if style_element is not None:
self.doc.styles.element.append(style_element)
self.add_numberings(doc, style_element)
# Also add linked styles
linked_style_ids = xpath(style_element, ".//w:link/@w:val")
if linked_style_ids:
linked_style_id = linked_style_ids[0]
our_linked_style_id = self.mapped_style_id(linked_style_id)
if our_linked_style_id not in our_style_ids:
our_linked_style = doc.styles.element.get_by_id(
linked_style_id
)
if our_linked_style is not None:
self.doc.styles.element.append(
deepcopy(our_linked_style)
)
self.add_linked_styles(doc, style_element)
else:
# Create a mapping for abstractNumIds used in existing styles
# This is used when adding numberings to avoid having multiple
Expand Down Expand Up @@ -360,6 +409,17 @@ def add_styles(self, doc, element):
# Update our style ids
our_style_ids = [s.style_id for s in self.doc.styles]

def add_linked_styles(self, doc, element):
linked_style_ids = xpath(element, ".//w:link/@w:val")
if linked_style_ids:
linked_style_id = linked_style_ids[0]
our_linked_style_id = self.mapped_style_id(linked_style_id)
our_style_ids = [s.style_id for s in self.doc.styles]
if our_linked_style_id not in our_style_ids:
our_linked_style = doc.styles.element.get_by_id(linked_style_id)
if our_linked_style is not None:
self.doc.styles.element.append(deepcopy(our_linked_style))

def add_numberings(self, doc, element):
"""Add numberings from the given document used in the given element."""
# Search for numbering references
Expand Down
9 changes: 8 additions & 1 deletion docxcompose/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from docx import Document

from docxcompose.composer import Composer
from docxcompose.utils import to_bool


CHUNK_SIZE = 65536
Expand Down Expand Up @@ -48,7 +49,7 @@ async def compose(request):
composed_filename = os.path.join(temp_dir, "composed.docx")

try:
composer = Composer(Document(documents.pop(0)))
composer = Composer(Document(documents.pop(0)), **compose_options(request))
for document in documents:
composer.append(Document(document))
composer.save(composed_filename)
Expand All @@ -63,6 +64,12 @@ async def compose(request):
)


def compose_options(request):
return {
"preserve_styles": to_bool(request.rel_url.query.get("preserve_styles", "")),
}


async def save_part_to_file(part, directory):
filename = os.path.join(directory, f"{part.name}_{part.filename}")
with open(filename, "wb") as file_:
Expand Down
84 changes: 84 additions & 0 deletions docxcompose/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,87 @@ def word_to_python_date_format(format_str):
for word_format, python_format in date_format_map:
format_str = re.sub(word_format, python_format, format_str)
return format_str


def increment_name(name):
increment_part = name.split("_")[-1]
try:
increment = int(increment_part)
except ValueError:
return f"{name}_1"
return f"{name.removesuffix(increment_part)}{increment + 1}"


def to_bool(value):
return value.lower() in ["1", "yes", "true", "on", "ok"]


def xml_elements_equal(
left,
right,
ignored_tags=None,
compare_text=True,
compare_tail=False,
compare_attributes=True,
):
return xml_element_signature(
left,
ignored_tags=ignored_tags,
compare_text=compare_text,
compare_tail=compare_tail,
compare_attributes=compare_attributes,
) == xml_element_signature(
right,
ignored_tags=ignored_tags,
compare_text=compare_text,
compare_tail=compare_tail,
compare_attributes=compare_attributes,
)


def xml_element_signature(
element,
ignored_tags=None,
compare_text=True,
compare_tail=False,
compare_attributes=True,
is_root=True,
):
"""
Creates a canonical, recursive representation of an element.

Child elements are included as a sorted list of signatures,
so their order is irrelevant.
"""
tag = element.tag
attrs = tuple(sorted(element.attrib.items())) if compare_attributes else ()
text = normalize_text(element.text) if compare_text else None
tail = normalize_text(element.tail) if compare_tail else None

child_signatures = []
for child in element:
if ignored_tags and child.tag in ignored_tags:
continue

child_signatures.append(
xml_element_signature(
child,
ignored_tags=ignored_tags,
compare_text=compare_text,
compare_tail=compare_tail,
compare_attributes=compare_attributes,
is_root=False,
)
)
child_signatures.sort()

if is_root:
return (None, None, None, None, tuple(child_signatures))
else:
return (tag, attrs, text, tail, tuple(child_signatures))


def normalize_text(value):
if value is None:
return ""
return value.strip()
Binary file not shown.
Binary file added tests/docs/styles_preserve1.docx
Binary file not shown.
Binary file added tests/docs/styles_preserve2.docx
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ async def test_post_returns_500_if_compose_fails(http_client):
assert text == "Failed composing documents"


async def test_post_with_url_parameters(http_client):
files = {
"master": open(docx_path("master.docx"), "rb"),
"table": open(docx_path("table.docx"), "rb"),
}
resp = await http_client.post("/?preserve_styles=1", data=files)
assert resp.status == 200
composed_doc = ComparableDocument(Document(BytesIO(await resp.read())))
composed_fixture = FixtureDocument("table.docx")
assert composed_doc == composed_fixture


async def test_healtcheck_returns_200(http_client):
resp = await http_client.get("/healthcheck")
assert resp.status == 200
Expand Down
37 changes: 37 additions & 0 deletions tests/test_styles.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from docx import Document
from utils import ComparableDocument
from utils import ComposedDocument
from utils import docx_path
from utils import FixtureDocument
Expand Down Expand Up @@ -64,6 +65,42 @@ def test_continue_when_no_styles():
ComposedDocument("aatmay.docx", "aatmay.docx")


def test_preserve_styles_with_same_id():
composer = Composer(
Document(docx_path("styles_preserve1.docx")), preserve_styles=True
)
composer.append(Document(docx_path("styles_preserve2.docx")))
style_ids = [s.style_id for s in composer.doc.styles]
assert "MyCustomStyle" in style_ids
assert "MyCustomStyle_1" in style_ids

expected = FixtureDocument("styles_preserve.docx")
composed = ComparableDocument(composer.doc)
assert composed == expected


def test_ignore_styles_with_same_id():
composer = Composer(Document(docx_path("styles_preserve1.docx")))
composer.append(Document(docx_path("styles_preserve2.docx")))
style_ids = [s.style_id for s in composer.doc.styles]
assert "MyCustomStyle" in style_ids
assert "MyCustomStyle_1" not in style_ids


def test_preserve_styles_does_not_duplicate_identical_styles():
composer = Composer(
Document(docx_path("styles_preserve1.docx")), preserve_styles=True
)
composer.append(Document(docx_path("styles_preserve2.docx")))
composer.append(Document(docx_path("styles_preserve2.docx")))
composer.append(Document(docx_path("styles_preserve1.docx")))
assert [
s.style_id
for s in composer.doc.styles
if s.style_id.startswith("MyCustomStyle")
] == ["MyCustomStyle", "MyCustomStyleZchn", "MyCustomStyle_1"]


@pytest.fixture
def merged_styles():
composer = Composer(Document(docx_path("styles_en.docx")))
Expand Down
43 changes: 43 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from lxml import etree

from docxcompose.utils import xml_elements_equal


def test_xml_elements_are_equal():
xml1 = """
<root>
<a x="1">Foo</a>
<b>Bar</b>
<ignore_me>123</ignore_me>
</root>
"""
xml2 = """
<root>
<b>Bar</b>
<a x="1">Foo</a>
<ignore_me>999</ignore_me>
</root>
"""
e1 = etree.fromstring(xml1)
e2 = etree.fromstring(xml2)
assert xml_elements_equal(e1, e2, ignored_tags=["ignore_me"]) is True


def test_xml_elements_are_not_equal():
xml1 = """
<root>
<a x="1">Foo</a>
<b>Bar</b>
<ignore_me>123</ignore_me>
</root>
"""
xml2 = """
<root>
<b>Bar</b>
<a x="2">Foo</a>
<ignore_me>999</ignore_me>
</root>
"""
e1 = etree.fromstring(xml1)
e2 = etree.fromstring(xml2)
assert xml_elements_equal(e1, e2, ignored_tags=["ignore_me"]) is False
Loading