From 2946d9626d92314b246b9d06919aa60adc88b82a Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 5 Sep 2022 22:07:32 +0200
Subject: [PATCH 1/2] ROB : fix image extraction

fix some images reading when some operations are inserted between EI and Q
end of image is now considered with [whitespace]EI[whitespace] (4 characters should be sufficient)
---
 PyPDF2/generic/_data_structures.py |  8 ++++----
 tests/test_workflows.py            | 13 +++++--------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/PyPDF2/generic/_data_structures.py b/PyPDF2/generic/_data_structures.py
index 283b33b225..f6630066c6 100644
--- a/PyPDF2/generic/_data_structures.py
+++ b/PyPDF2/generic/_data_structures.py
@@ -759,17 +759,17 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
                 tok = stream.read(1)
                 # Check for End Image
                 tok2 = stream.read(1)
-                if tok2 == b"I":
-                    # Data can contain EI, so check for the Q operator.
+                if tok2 == b"I" and buf[loc - 1 : loc] in WHITESPACES:
+                    # Data can contain [\s]EI,  so check for the separator \s; 4 chars suffisent Q operator not required.
                     tok3 = stream.read(1)
                     info = tok + tok2
-                    # We need to find whitespace between EI and Q.
+                    # We need to find at least one whitespace after.
                     has_q_whitespace = False
                     while tok3 in WHITESPACES:
                         has_q_whitespace = True
                         info += tok3
                         tok3 = stream.read(1)
-                    if tok3 == b"Q" and has_q_whitespace:
+                    if has_q_whitespace:
                         stream.seek(-1, 1)
                         break
                     else:
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index cc194f435b..200e030c71 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -425,7 +425,7 @@ def test_get_metadata(url, name):
             "https://corpora.tika.apache.org/base/docs/govdocs1/938/938702.pdf",
             "tika-938702.pdf",
             False,
-            (PdfReadError, "Unexpected end of stream"),
+            None,  # iss #1090 is now fixed
         ),
         (
             "https://corpora.tika.apache.org/base/docs/govdocs1/942/942358.pdf",
@@ -512,19 +512,16 @@ def test_extract_text(url, name, strict, exception):
         ),
         (
             "https://corpora.tika.apache.org/base/docs/govdocs1/957/957304.pdf",
-            "tika-938702.pdf",
+            "tika-957304.pdf",
         ),
     ],
 )
 def test_compress_raised(url, name):
     data = BytesIO(get_pdf_from_url(url, name=name))
     reader = PdfReader(data)
-    # TODO: which page exactly?
-    # TODO: Is it reasonable to have an exception here?
-    with pytest.raises(PdfReadError) as exc:
-        for page in reader.pages:
-            page.compress_content_streams()
-    assert exc.value.args[0] == "Unexpected end of stream"
+    # no more error since iss #1090 fix
+    for page in reader.pages:
+        page.compress_content_streams()
 
 
 @pytest.mark.parametrize(

From 38c4e68350d46e0b8024517934cfcce9c8c9174a Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 5 Sep 2022 22:19:06 +0200
Subject: [PATCH 2/2] flake8

---
 tests/test_workflows.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 200e030c71..57cab70180 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -17,7 +17,7 @@
 from PyPDF2.constants import ImageAttributes as IA
 from PyPDF2.constants import PageAttributes as PG
 from PyPDF2.constants import Ressources as RES
-from PyPDF2.errors import PdfReadError, PdfReadWarning
+from PyPDF2.errors import PdfReadWarning
 from PyPDF2.filters import _xobj_to_image
 
 from . import get_pdf_from_url, normalize_warnings