From a9e3793cfae3073fd7eb5380ca6b86733d17e7e0 Mon Sep 17 00:00:00 2001 From: Jakob Huber Date: Sat, 29 Nov 2025 13:22:35 +0100 Subject: [PATCH 1/4] TST: Add a testcase for a choice field without /Opt key --- resources/choice_field_without_opt_key.pdf | Bin 0 -> 538 bytes tests/test_reader.py | 5 +++++ 2 files changed, 5 insertions(+) create mode 100644 resources/choice_field_without_opt_key.pdf diff --git a/resources/choice_field_without_opt_key.pdf b/resources/choice_field_without_opt_key.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3e383f74db63ede14a96e6467377f6e750f1cf4d GIT binary patch literal 538 zcmZXS!A`IO>Mj=hm}sT2D@Zi67VHG_y+PK&elL1 z*kmvN&VK(tv)hTT!wc2-p!2)`{sDy&&D#fz#~>nW(#?V_cGtUgn)P5Z0V+~=JZP0A zyEUPR^pZ*tGqWiwR3Ps1w8R%2a$$_~)>ZH9>P%O9WtME{80gqBi0Q(b(AZCOQNo;7 zHP3H&O&Vu%2;HOs5mRY4cEL<$aJjgrG}l*V%fVz{)>}!5ZqJ>)B@X}Z-_V`qf&Pr$ z9*6|H$-}GiE~ka1lvoK96tnkMms?92NDM$87Y4rX`IzBRQ;c>Ci$?RgnG;PZp4q83 z3eL2_h+8_;RaWOKvh_ng2OD|BuiL-+$D CfQO9$ literal 0 HcmV?d00001 diff --git a/tests/test_reader.py b/tests/test_reader.py index f7ac48499..fffc14ff6 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -419,6 +419,11 @@ def test_get_page_of_encrypted_file(pdffile, password, should_fail): {}, None, ), + ( + "choice_field_without_opt_key.pdf", + {}, + {"TN_Anrede": {"/FT": "/Ch", "/T": "TN_Anrede", "/V": ""}}, + ), ], ) def test_get_form(src, expected, expected_get_fields, txt_file_path): From 9ba8a4f0e957961f5469a8066b78d841060659b7 Mon Sep 17 00:00:00 2001 From: Jakob Huber Date: Sat, 29 Nov 2025 13:27:27 +0100 Subject: [PATCH 2/4] BUG: Fix reading of choice fields missing the /Opt key Closes #2838 According to the PDF 2.0 specification, the `/Opt` key is not required for choice fields --- pypdf/_doc_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 9d92ebb0e..fa3ca3130 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -599,7 +599,7 @@ def _build_field( fileobj.write("\n") retval[key] = Field(field) obj = retval[key].indirect_reference.get_object() # to get the full object - if obj.get(FA.FT, "") == "/Ch": + if obj.get(FA.FT, "") == "/Ch" and obj.get(NameObject(FA.Opt)): retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)] if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj: # Checkbox From bc5f3740c259aaa3fb6ce02d9736e1c483b072b2 Mon Sep 17 00:00:00 2001 From: Jakob Huber Date: Mon, 1 Dec 2025 14:12:40 +0100 Subject: [PATCH 3/4] TST: Refactor unittest to use PDF from github comment instead of local file --- resources/choice_field_without_opt_key.pdf | Bin 538 -> 0 bytes tests/test_reader.py | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) delete mode 100644 resources/choice_field_without_opt_key.pdf diff --git a/resources/choice_field_without_opt_key.pdf b/resources/choice_field_without_opt_key.pdf deleted file mode 100644 index 3e383f74db63ede14a96e6467377f6e750f1cf4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 538 zcmZXS!A`IO>Mj=hm}sT2D@Zi67VHG_y+PK&elL1 z*kmvN&VK(tv)hTT!wc2-p!2)`{sDy&&D#fz#~>nW(#?V_cGtUgn)P5Z0V+~=JZP0A zyEUPR^pZ*tGqWiwR3Ps1w8R%2a$$_~)>ZH9>P%O9WtME{80gqBi0Q(b(AZCOQNo;7 zHP3H&O&Vu%2;HOs5mRY4cEL<$aJjgrG}l*V%fVz{)>}!5ZqJ>)B@X}Z-_V`qf&Pr$ z9*6|H$-}GiE~ka1lvoK96tnkMms?92NDM$87Y4rX`IzBRQ;c>Ci$?RgnG;PZp4q83 z3eL2_h+8_;RaWOKvh_ng2OD|BuiL-+$D CfQO9$ diff --git a/tests/test_reader.py b/tests/test_reader.py index fffc14ff6..5b5ccccef 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -418,12 +418,7 @@ def test_get_page_of_encrypted_file(pdffile, password, should_fail): "crazyones.pdf", {}, None, - ), - ( - "choice_field_without_opt_key.pdf", - {}, - {"TN_Anrede": {"/FT": "/Ch", "/T": "TN_Anrede", "/V": ""}}, - ), + ) ], ) def test_get_form(src, expected, expected_get_fields, txt_file_path): @@ -452,6 +447,20 @@ def test_get_form(src, expected, expected_get_fields, txt_file_path): field.additional_actions, ] +@pytest.mark.enable_socket +def test_reading_choice_field_without_opt_key(): + """Tests reading a choice field in a PDF without an /Opt key.""" + url = "https://github.com/user-attachments/files/23853677/Musterservicevertrag-HNRAGB_Okt2022-Blanko.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name="Musterservicevertrag-HNRAGB_Okt2022-Blanko.pdf"))) + fields = reader.get_fields() + + tn_anrede = fields.get("TN_Anrede") + assert tn_anrede is not None + + # Ensure that parsing of a choice field without /Opt key worked + tn_anrede_opt = tn_anrede.get("/Opt") + assert tn_anrede_opt is None + @pytest.mark.parametrize( ("src", "page_number"), From 4710d786049a93f8bebcadc6d64f2cd02d8c45cc Mon Sep 17 00:00:00 2001 From: Jakob Huber Date: Mon, 1 Dec 2025 17:15:11 +0100 Subject: [PATCH 4/4] TST: Add missing newline in test_reader.py --- tests/test_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_reader.py b/tests/test_reader.py index 5b5ccccef..53527cad7 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -447,6 +447,7 @@ def test_get_form(src, expected, expected_get_fields, txt_file_path): field.additional_actions, ] + @pytest.mark.enable_socket def test_reading_choice_field_without_opt_key(): """Tests reading a choice field in a PDF without an /Opt key."""