Skip to content

Commit 8a52e86

Browse files
committed
ci fixes
1 parent c7af319 commit 8a52e86

File tree

4 files changed

+43
-47
lines changed

4 files changed

+43
-47
lines changed

.github/workflows/ci-cd.yaml

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -93,40 +93,40 @@ jobs:
9393
brew install poppler tesseract
9494
uv pip install --upgrade pdftotext pytesseract
9595
96-
- name: Install pdf2text and ocr dependencies (Windows w/latest Python)
97-
# Do full install and testing of pdf2text and ocr only on latest Python version
98-
if: ${{ matrix.python-version == '3.14' && runner.os == 'Windows' }}
99-
shell: pwsh
100-
run: |
101-
# Set the preference variable to display Debug messages
102-
$VerbosePreference = "Continue"
103-
104-
# Install Poppler
105-
# 1. install miniconda per https://conda.io/projects/conda/en/stable/user-guide/install/windows.html
106-
$condaInstallerUrl = "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
107-
$condaInstallerPath = "$env:TEMP\Miniconda3-setup.exe"
108-
$installDir = "$env:TEMP\Miniconda3"
109-
Invoke-WebRequest -Uri $condaInstallerUrl -OutFile $condaInstallerPath
110-
# Run the installer silently
111-
Start-Process -FilePath $condaInstallerPath -ArgumentList "/RegisterPython=0 /D=$installDir" -Wait -NoNewWindow
112-
Remove-Item -Path $condaInstallerPath
113-
# 2. install Poppler using conda
114-
Start-Process -FilePath "$installDir\conda.exe" -ArgumentList "install -c conda-forge poppler --yes" -Wait -NoNewWindow
115-
116-
# Install Tesseract
117-
# See https://github.com/UB-Mannheim/tesseract/wiki
118-
$tesseractInstallerUrl = "https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe" # Replace with the latest version
119-
$tesseractInstallerPath = "$env:TEMP\tesseract-ocr-setup.exe"
120-
Invoke-WebRequest -Uri $tesseractInstallerUrl -OutFile $tesseractInstallerPath
121-
# Run the installer silently
122-
Start-Process -FilePath $tesseractInstallerPath -ArgumentList "/S /D=C:\Program Files\Tesseract-OCR" -Wait -NoNewWindow
123-
Remove-Item -Path $tesseractInstallerPath
124-
# Add Tesseract to PATH
125-
$env:Path += ";C:\Program Files\Tesseract-OCR"
126-
echo "C:\Program Files\Tesseract-OCR" | Out-File -FilePath $env:GITHUB_PATH -Append
127-
128-
# Install Python packages
129-
uv pip install --upgrade pdftotext pytesseract
96+
# - name: Install pdf2text and ocr dependencies (Windows w/latest Python)
97+
# # Do full install and testing of pdf2text and ocr only on latest Python version
98+
# if: ${{ matrix.python-version == '3.14' && runner.os == 'Windows' }}
99+
# shell: pwsh
100+
# run: |
101+
# # Set the preference variable to display Verbose messages
102+
# $VerbosePreference = "Continue"
103+
104+
# # Install Poppler
105+
# # 1. install miniconda per https://conda.io/projects/conda/en/stable/user-guide/install/windows.html
106+
# $condaInstallerUrl = "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
107+
# $condaInstallerPath = "$env:TEMP\Miniconda3-setup.exe"
108+
# $installDir = "$env:TEMP\Miniconda3"
109+
# Invoke-WebRequest -Uri $condaInstallerUrl -OutFile $condaInstallerPath -HttpVersion 2.0
110+
# # Run the installer silently
111+
# Start-Process -FilePath $condaInstallerPath -ArgumentList "/RegisterPython=0 /D=$installDir" -Wait -NoNewWindow
112+
# Remove-Item -Path $condaInstallerPath
113+
# # 2. install Poppler using conda
114+
# Start-Process -FilePath "$installDir\conda.exe" -ArgumentList "install -c conda-forge poppler --yes" -Wait -NoNewWindow
115+
116+
# # Install Tesseract
117+
# # See https://github.com/UB-Mannheim/tesseract/wiki
118+
# $tesseractInstallerUrl = "https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe" # Replace with the latest version
119+
# $tesseractInstallerPath = "$env:TEMP\tesseract-ocr-setup.exe"
120+
# Invoke-WebRequest -Uri $tesseractInstallerUrl -OutFile $tesseractInstallerPath -HttpVersion 2.0
121+
# # Run the installer silently
122+
# Start-Process -FilePath $tesseractInstallerPath -ArgumentList "/S /D=C:\Program Files\Tesseract-OCR" -Wait -NoNewWindow
123+
# Remove-Item -Path $tesseractInstallerPath
124+
# # Add Tesseract to PATH
125+
# $env:Path += ";C:\Program Files\Tesseract-OCR"
126+
# echo "C:\Program Files\Tesseract-OCR" | Out-File -FilePath $env:GITHUB_PATH -Append
127+
128+
# # Install Python packages
129+
# uv pip install --upgrade pdftotext pytesseract
130130

131131
- name: Install all other dependencies (GIL)
132132
if: ${{ ! endsWith(matrix.python-version, 't') }}

tests/test_filters.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,6 @@ def test_providing_unknown_subfilter_raises_valueerror() -> None:
122122
assert err_msg.endswith(('re, text are supported.', 'text, re are supported.'))
123123

124124

125-
# @pytest.mark.skipif(sys.platform == 'darwin', reason='Often leads to Process completed with exit code 141 on macOS')
126-
# # type: ignore[misc]
127125
def test_execute_inherits_environment_but_does_not_modify_it() -> None:
128126
# https://github.com/thp/urlwatch/issues/541
129127

@@ -155,8 +153,6 @@ def test_execute_inherits_environment_but_does_not_modify_it() -> None:
155153
assert os.environ['URLWATCH_JOB_NAME'] == 'should-not-be-overwritten'
156154

157155

158-
# @pytest.mark.skipif(sys.platform == 'darwin', reason='Often leads to Process completed with exit code 141 on macOS')
159-
# # type: ignore[misc]
160156
def test_shellpipe_inherits_environment_but_does_not_modify_it() -> None:
161157
# https://github.com/thp/urlwatch/issues/541
162158
# if os.getenv('GITHUB_ACTIONS') and sys.version_info[0:2] == (3, 6) and sys.platform == 'linux':
@@ -403,7 +399,6 @@ def test_filter_exceptions() -> None:
403399
pytest.xfail('jq not installed')
404400

405401

406-
# @pytest.mark.xfail('Not working due to an html2text bug')
407402
def test_html2text_roundtrip() -> None:
408403
html = '1 | <a href="https://www.example.com">1</a><br><strong>2 |<a href="https://www.example.com">2</a></strong>'
409404
data, _ = Html2TextFilter(job_state).filter(html, 'text/plain', {}) # type: ignore[arg-type]

tests/test_jobs.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,7 @@ def test_check_ignore_connection_errors(job_data: dict[str, Any]) -> None:
450450
assert job_state.exception
451451
assert any(
452452
x in str(job_state.exception.args)
453-
for x in (
454-
'Max retries exceeded',
455-
'Connection refused',
456-
'No connection could be made',
457-
'net::ERR_CONNECTION_REFUSED',
458-
)
453+
for x in ('Connection refused', 'No connection could be made', 'net::ERR_CONNECTION_REFUSED')
459454
)
460455
assert getattr(job_state, 'error_ignored', False) is False
461456

@@ -486,7 +481,10 @@ def test_check_bad_proxy(job_data: dict[str, Any]) -> None:
486481
with JobState(ssdb_storage, job) as job_state:
487482
job_state.process()
488483
assert job_state.exception
489-
assert any(x in str(job_state.exception.args) for x in ('Max retries exceeded', 'Connection refused'))
484+
assert any(
485+
x in str(job_state.exception.args)
486+
for x in ('Connection refused', 'No connection could be made', 'ERR_PROXY_CONNECTION_FAILED ')
487+
)
490488
assert job_state.error_ignored is False
491489

492490

webchanges/jobs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1986,7 +1986,10 @@ def ignore_error(self, exception: Exception) -> bool:
19861986
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
19871987

19881988
if isinstance(exception, (BrowserResponseError, PlaywrightError)):
1989-
chromium_error = str(exception.args[0]).split()[0]
1989+
try:
1990+
chromium_error = str(exception.args[0]).split()[1]
1991+
except IndexError:
1992+
chromium_error = str(exception.args[0])
19901993
if self.ignore_connection_errors and (
19911994
isinstance(exception, PlaywrightTimeoutError) or chromium_error in self.chromium_connection_errors
19921995
):

0 commit comments

Comments
 (0)