py-pdf · stefan6419846 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/pypdf/generic/_files.py b/pypdf/generic/_files.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import bisect
 from functools import cached_property
 from typing import TYPE_CHECKING, cast
 
@@ -82,17 +83,22 @@ def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> Embe
         from pypdf.generic import create_string_object  # noqa: PLC0415
         filespec = DictionaryObject()
         filespec_reference = writer._add_object(filespec)
+        name_object = cast(TextStringObject, create_string_object(name))
         filespec.update(
             {
                 NameObject(PG.TYPE): NameObject("/Filespec"),
-                NameObject(FileSpecificationDictionaryEntries.F): create_string_object(name),
+                NameObject(FileSpecificationDictionaryEntries.F): name_object,
                 NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
             }
         )
 
-        # Add the name and filespec to the names array
+        # Add the name and filespec to the names array.
+        # We use the inverse order for insertion, as this allows us to re-use the
+        # same index.
         names_array = cls._get_names_array(writer)
-        names_array.extend([create_string_object(name), filespec_reference])
+        insertion_index = cls._get_insertion_index(names_array, name_object)
+        names_array.insert(insertion_index, filespec_reference)
+        names_array.insert(insertion_index, name_object)
 
         # Return an EmbeddedFile instance
         return cls(name=name, pdf_object=filespec, parent=names_array)
@@ -141,6 +147,22 @@ def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
                 names.append(name)
         return names
 
+    @classmethod
+    def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
+        keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
+        name_bytes = name.encode("utf-8")
+
+        start = bisect.bisect_left(keys, name_bytes)
+        end = bisect.bisect_right(keys, name_bytes)
+
+        if start != end:
+            return end * 2
+        if start == 0:
+            return 0
+        if start == (key_count := len(keys)):
+            return key_count * 2
+        return end * 2
+
     @property
     def alternative_name(self) -> str | None:
         """Retrieve the alternative name (file specification)."""

diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py
@@ -10,6 +10,7 @@
 from pypdf import PdfReader, PdfWriter
 from pypdf.errors import PdfReadError, PyPdfError
 from pypdf.generic import (
+    ArrayObject,
     ByteStringObject,
     DictionaryObject,
     EmbeddedFile,
@@ -489,3 +490,87 @@ def test_embedded_file__create__neither_kids_nor_names():
 
     with pytest.raises(expected_exception=PdfReadError, match=r"^Got neither Names nor Kids in embedded files tree\.$"):
         writer.add_attachment("test2.txt", b"content2")
+
+
+def test_embedded_file__get_insertion_index():
+    # Empty list.
+    assert EmbeddedFile._get_insertion_index(ArrayObject(), "test.txt") == 0
+
+    # One mismatching entry.
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("dummy.txt"), NullObject()]),
+        "test.txt"
+    ) == 2
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("xxx.txt"), NullObject()]),
+        "test.txt"
+    ) == 0
+
+    # Multiple entries.
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("dummy.txt"), NullObject(), TextStringObject("xxx.txt"), NullObject()]),
+        "test.txt"
+    ) == 2
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("xxx.txt"), NullObject(), TextStringObject("yyy.txt"), NullObject()]),
+        "test.txt"
+    ) == 0
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("aaa.txt"), NullObject(), TextStringObject("bbb.txt"), NullObject()]),
+        "test.txt"
+    ) == 4
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([
+            TextStringObject("aaa.txt"), NullObject(),
+            TextStringObject("test.txt"), NullObject(),
+            TextStringObject("zzz.txt"), NullObject()
+        ]),
+        "test.txt"
+    ) == 4
+
+    # Length.
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("a"), NullObject()]),
+        "aa"
+    ) == 2
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("a"), NullObject()]),
+        "a"
+    ) == 2
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("aaa"), NullObject()]),
+        "aa"
+    ) == 0
+
+    # Special characters.
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("café"), NullObject()]),
+        "cafe"
+    ) == 0
+    assert EmbeddedFile._get_insertion_index(
+        ArrayObject([TextStringObject("Tun"), NullObject()]),
+        "Tür"
+    ) == 2
+
+
+def test_embedded_file__order():
+    writer = PdfWriter()
+    writer.add_blank_page(100, 100)
+
+    attachment1 = writer.add_attachment("test.txt", "content")
+    attachment2 = writer.add_attachment("abc.txt", "content")
+    attachment3 = writer.add_attachment("xyz.txt", "content")
+    attachment4 = writer.add_attachment("test.txt", "content2")
+
+    assert dict(writer.attachments) == {
+        "abc.txt": [b"content"],
+        "test.txt": [b"content", b"content2"],
+        "xyz.txt": [b"content"]
+    }
+
+    assert writer.root_object["/Names"]["/EmbeddedFiles"]["/Names"] == [
+        "abc.txt", attachment2.pdf_object.indirect_reference,
+        "test.txt", attachment1.pdf_object.indirect_reference,
+        "test.txt", attachment4.pdf_object.indirect_reference,
+        "xyz.txt", attachment3.pdf_object.indirect_reference,
+    ]