diff --git a/changes/perserve-styles.feature b/changes/perserve-styles.feature
new file mode 100644
index 0000000..9f15f5d
--- /dev/null
+++ b/changes/perserve-styles.feature
@@ -0,0 +1 @@
+Optionally preserve styles with the same id of appended documents. [buchi]
\ No newline at end of file
diff --git a/docxcompose/command.py b/docxcompose/command.py
index 0b2e253..be50f73 100644
--- a/docxcompose/command.py
+++ b/docxcompose/command.py
@@ -27,6 +27,11 @@ def setup_parser():
help="path to the output file",
metavar="file",
)
+ parser.add_argument(
+ "--preserve-styles",
+ action="store_true",
+ default=False,
+ )
return parser
@@ -46,7 +51,10 @@ def parse_args(parser, args):
def compose_files(parser, parsed_args):
- composer = Composer(Document(parsed_args.master))
+ options = {
+ "preserve_styles": parsed_args.preserve_styles,
+ }
+ composer = Composer(Document(parsed_args.master), **options)
for slave_path in parsed_args.files:
composer.append(Document(slave_path))
diff --git a/docxcompose/composer.py b/docxcompose/composer.py
index 05a1088..72cb754 100644
--- a/docxcompose/composer.py
+++ b/docxcompose/composer.py
@@ -15,7 +15,9 @@
from docxcompose.image import ImageWrapper
from docxcompose.properties import CustomProperties
+from docxcompose.utils import increment_name
from docxcompose.utils import NS
+from docxcompose.utils import xml_elements_equal
from docxcompose.utils import xpath
@@ -34,13 +36,22 @@
RT.FOOTNOTES,
]
+IGNORED_STYLE_TAGS = set(
+ [
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}name",
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}rsid",
+ ]
+)
+
class Composer(object):
- def __init__(self, doc):
+ def __init__(self, doc, preserve_styles=False):
self.doc = doc
self.pkg = doc.part.package
self.restart_numbering = True
+ self.preserve_styles = preserve_styles
+ self._preserved_styles = {}
self.reset_reference_mapping()
@@ -59,6 +70,7 @@ def append(self, doc, remove_property_fields=True):
def insert(self, index, doc, remove_property_fields=True):
"""Insert the given document at the given index."""
self.reset_reference_mapping()
+ self._current_preserved_styles = {}
# Remove custom property fields but keep the values
if remove_property_fields:
@@ -299,24 +311,61 @@ def add_styles(self, doc, element):
for style_id in used_style_ids:
our_style_id = self.mapped_style_id(style_id)
- if our_style_id not in our_style_ids:
+ # To preserve styles with the same id from added documents, we
+ # create a copy and append a suffix to the id and name.
+ if self.preserve_styles and our_style_id in our_style_ids:
+ if our_style_id not in self._current_preserved_styles:
+ style_element = deepcopy(doc.styles.element.get_by_id(style_id))
+ our_style_element = self.doc.styles.element.get_by_id(our_style_id)
+
+ # Check if we already have an identical style
+ preserved_style_ids = self._preserved_styles.get(
+ our_style_id, [our_style_id]
+ )
+ matched_style_id = None
+ for pstyle_id in preserved_style_ids:
+ our_style_element = self.doc.styles.element.get_by_id(pstyle_id)
+ if xml_elements_equal(
+ style_element,
+ our_style_element,
+ ignored_tags=IGNORED_STYLE_TAGS,
+ ):
+ matched_style_id = pstyle_id
+ self._current_preserved_styles[our_style_id] = (
+ style_element.styleId
+ )
+ break
+ # No matching style found, insert style with a new name
+ if matched_style_id is None:
+ new_id = increment_name(our_style_id)
+ new_name = None
+ if style_element.name is not None:
+ new_name = increment_name(style_element.name.val)
+ while new_id in our_style_ids:
+ new_id = increment_name(new_id)
+ if new_name is not None:
+ new_name = increment_name(new_name)
+ style_element.styleId = new_id
+ if new_name is not None:
+ style_element.name.val = new_name
+ self.doc.styles.element.append(style_element)
+ self.add_numberings(doc, style_element)
+ self.add_linked_styles(doc, style_element)
+ self._current_preserved_styles[our_style_id] = new_id
+ self._preserved_styles.setdefault(
+ our_style_id, [our_style_id]
+ ).append(new_id)
+ else:
+ self._current_preserved_styles[our_style_id] = matched_style_id
+
+ for el in xpath(element, ".//w:tblStyle|.//w:pStyle|.//w:rStyle"):
+ el.val = self._current_preserved_styles[our_style_id]
+ elif our_style_id not in our_style_ids:
style_element = deepcopy(doc.styles.element.get_by_id(style_id))
if style_element is not None:
self.doc.styles.element.append(style_element)
self.add_numberings(doc, style_element)
- # Also add linked styles
- linked_style_ids = xpath(style_element, ".//w:link/@w:val")
- if linked_style_ids:
- linked_style_id = linked_style_ids[0]
- our_linked_style_id = self.mapped_style_id(linked_style_id)
- if our_linked_style_id not in our_style_ids:
- our_linked_style = doc.styles.element.get_by_id(
- linked_style_id
- )
- if our_linked_style is not None:
- self.doc.styles.element.append(
- deepcopy(our_linked_style)
- )
+ self.add_linked_styles(doc, style_element)
else:
# Create a mapping for abstractNumIds used in existing styles
# This is used when adding numberings to avoid having multiple
@@ -360,6 +409,17 @@ def add_styles(self, doc, element):
# Update our style ids
our_style_ids = [s.style_id for s in self.doc.styles]
+ def add_linked_styles(self, doc, element):
+ linked_style_ids = xpath(element, ".//w:link/@w:val")
+ if linked_style_ids:
+ linked_style_id = linked_style_ids[0]
+ our_linked_style_id = self.mapped_style_id(linked_style_id)
+ our_style_ids = [s.style_id for s in self.doc.styles]
+ if our_linked_style_id not in our_style_ids:
+ our_linked_style = doc.styles.element.get_by_id(linked_style_id)
+ if our_linked_style is not None:
+ self.doc.styles.element.append(deepcopy(our_linked_style))
+
def add_numberings(self, doc, element):
"""Add numberings from the given document used in the given element."""
# Search for numbering references
diff --git a/docxcompose/server.py b/docxcompose/server.py
index 4a0a8cb..36885f4 100644
--- a/docxcompose/server.py
+++ b/docxcompose/server.py
@@ -10,6 +10,7 @@
from docx import Document
from docxcompose.composer import Composer
+from docxcompose.utils import to_bool
CHUNK_SIZE = 65536
@@ -48,7 +49,7 @@ async def compose(request):
composed_filename = os.path.join(temp_dir, "composed.docx")
try:
- composer = Composer(Document(documents.pop(0)))
+ composer = Composer(Document(documents.pop(0)), **compose_options(request))
for document in documents:
composer.append(Document(document))
composer.save(composed_filename)
@@ -63,6 +64,12 @@ async def compose(request):
)
+def compose_options(request):
+ return {
+ "preserve_styles": to_bool(request.rel_url.query.get("preserve_styles", "")),
+ }
+
+
async def save_part_to_file(part, directory):
filename = os.path.join(directory, f"{part.name}_{part.filename}")
with open(filename, "wb") as file_:
diff --git a/docxcompose/utils.py b/docxcompose/utils.py
index 36be83a..e287968 100644
--- a/docxcompose/utils.py
+++ b/docxcompose/utils.py
@@ -48,3 +48,87 @@ def word_to_python_date_format(format_str):
for word_format, python_format in date_format_map:
format_str = re.sub(word_format, python_format, format_str)
return format_str
+
+
+def increment_name(name):
+ increment_part = name.split("_")[-1]
+ try:
+ increment = int(increment_part)
+ except ValueError:
+ return f"{name}_1"
+ return f"{name.removesuffix(increment_part)}{increment + 1}"
+
+
+def to_bool(value):
+ return value.lower() in ["1", "yes", "true", "on", "ok"]
+
+
+def xml_elements_equal(
+ left,
+ right,
+ ignored_tags=None,
+ compare_text=True,
+ compare_tail=False,
+ compare_attributes=True,
+):
+ return xml_element_signature(
+ left,
+ ignored_tags=ignored_tags,
+ compare_text=compare_text,
+ compare_tail=compare_tail,
+ compare_attributes=compare_attributes,
+ ) == xml_element_signature(
+ right,
+ ignored_tags=ignored_tags,
+ compare_text=compare_text,
+ compare_tail=compare_tail,
+ compare_attributes=compare_attributes,
+ )
+
+
+def xml_element_signature(
+ element,
+ ignored_tags=None,
+ compare_text=True,
+ compare_tail=False,
+ compare_attributes=True,
+ is_root=True,
+):
+ """
+ Creates a canonical, recursive representation of an element.
+
+ Child elements are included as a sorted list of signatures,
+ so their order is irrelevant.
+ """
+ tag = element.tag
+ attrs = tuple(sorted(element.attrib.items())) if compare_attributes else ()
+ text = normalize_text(element.text) if compare_text else None
+ tail = normalize_text(element.tail) if compare_tail else None
+
+ child_signatures = []
+ for child in element:
+ if ignored_tags and child.tag in ignored_tags:
+ continue
+
+ child_signatures.append(
+ xml_element_signature(
+ child,
+ ignored_tags=ignored_tags,
+ compare_text=compare_text,
+ compare_tail=compare_tail,
+ compare_attributes=compare_attributes,
+ is_root=False,
+ )
+ )
+ child_signatures.sort()
+
+ if is_root:
+ return (None, None, None, None, tuple(child_signatures))
+ else:
+ return (tag, attrs, text, tail, tuple(child_signatures))
+
+
+def normalize_text(value):
+ if value is None:
+ return ""
+ return value.strip()
diff --git a/tests/docs/composed_fixture/styles_preserve.docx b/tests/docs/composed_fixture/styles_preserve.docx
new file mode 100644
index 0000000..4845f7f
Binary files /dev/null and b/tests/docs/composed_fixture/styles_preserve.docx differ
diff --git a/tests/docs/styles_preserve1.docx b/tests/docs/styles_preserve1.docx
new file mode 100644
index 0000000..0329556
Binary files /dev/null and b/tests/docs/styles_preserve1.docx differ
diff --git a/tests/docs/styles_preserve2.docx b/tests/docs/styles_preserve2.docx
new file mode 100644
index 0000000..6da64a9
Binary files /dev/null and b/tests/docs/styles_preserve2.docx differ
diff --git a/tests/test_server.py b/tests/test_server.py
index 93f8360..91e3fbc 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -62,6 +62,18 @@ async def test_post_returns_500_if_compose_fails(http_client):
assert text == "Failed composing documents"
+async def test_post_with_url_parameters(http_client):
+ files = {
+ "master": open(docx_path("master.docx"), "rb"),
+ "table": open(docx_path("table.docx"), "rb"),
+ }
+ resp = await http_client.post("/?preserve_styles=1", data=files)
+ assert resp.status == 200
+ composed_doc = ComparableDocument(Document(BytesIO(await resp.read())))
+ composed_fixture = FixtureDocument("table.docx")
+ assert composed_doc == composed_fixture
+
+
async def test_healtcheck_returns_200(http_client):
resp = await http_client.get("/healthcheck")
assert resp.status == 200
diff --git a/tests/test_styles.py b/tests/test_styles.py
index 3b0060e..cc160d7 100644
--- a/tests/test_styles.py
+++ b/tests/test_styles.py
@@ -1,5 +1,6 @@
import pytest
from docx import Document
+from utils import ComparableDocument
from utils import ComposedDocument
from utils import docx_path
from utils import FixtureDocument
@@ -64,6 +65,42 @@ def test_continue_when_no_styles():
ComposedDocument("aatmay.docx", "aatmay.docx")
+def test_preserve_styles_with_same_id():
+ composer = Composer(
+ Document(docx_path("styles_preserve1.docx")), preserve_styles=True
+ )
+ composer.append(Document(docx_path("styles_preserve2.docx")))
+ style_ids = [s.style_id for s in composer.doc.styles]
+ assert "MyCustomStyle" in style_ids
+ assert "MyCustomStyle_1" in style_ids
+
+ expected = FixtureDocument("styles_preserve.docx")
+ composed = ComparableDocument(composer.doc)
+ assert composed == expected
+
+
+def test_ignore_styles_with_same_id():
+ composer = Composer(Document(docx_path("styles_preserve1.docx")))
+ composer.append(Document(docx_path("styles_preserve2.docx")))
+ style_ids = [s.style_id for s in composer.doc.styles]
+ assert "MyCustomStyle" in style_ids
+ assert "MyCustomStyle_1" not in style_ids
+
+
+def test_preserve_styles_does_not_duplicate_identical_styles():
+ composer = Composer(
+ Document(docx_path("styles_preserve1.docx")), preserve_styles=True
+ )
+ composer.append(Document(docx_path("styles_preserve2.docx")))
+ composer.append(Document(docx_path("styles_preserve2.docx")))
+ composer.append(Document(docx_path("styles_preserve1.docx")))
+ assert [
+ s.style_id
+ for s in composer.doc.styles
+ if s.style_id.startswith("MyCustomStyle")
+ ] == ["MyCustomStyle", "MyCustomStyleZchn", "MyCustomStyle_1"]
+
+
@pytest.fixture
def merged_styles():
composer = Composer(Document(docx_path("styles_en.docx")))
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..8509578
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,43 @@
+from lxml import etree
+
+from docxcompose.utils import xml_elements_equal
+
+
+def test_xml_elements_are_equal():
+ xml1 = """
+
+ Foo
+ Bar
+ 123
+
+ """
+ xml2 = """
+
+ Bar
+ Foo
+ 999
+
+ """
+ e1 = etree.fromstring(xml1)
+ e2 = etree.fromstring(xml2)
+ assert xml_elements_equal(e1, e2, ignored_tags=["ignore_me"]) is True
+
+
+def test_xml_elements_are_not_equal():
+ xml1 = """
+
+ Foo
+ Bar
+ 123
+
+ """
+ xml2 = """
+
+ Bar
+ Foo
+ 999
+
+ """
+ e1 = etree.fromstring(xml1)
+ e2 = etree.fromstring(xml2)
+ assert xml_elements_equal(e1, e2, ignored_tags=["ignore_me"]) is False