Skip to content

Commit b3f73d8

Browse files
authored
fix(scrubbing): Catch exceptions when download required spacy model (#733)
1 parent ca04b6d commit b3f73d8

File tree

4 files changed

+22
-21
lines changed

4 files changed

+22
-21
lines changed

openadapt/app/cards.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from datetime import datetime
77
import multiprocessing
8-
import sys
98
import time
109

1110
from nicegui import ui
@@ -42,17 +41,11 @@ def reset(self) -> None:
4241

4342
def wait(self) -> None:
4443
"""Wait for the recording process to finish."""
45-
if sys.platform == "win32":
46-
# a bug on windows for when `record` runs for a long time, where even when
47-
# the `record` function returns a value, `record_proc` refuses to join.
48-
# this is a workaround to ensure the process is terminated
49-
while True:
50-
if self.terminate_recording.is_set():
51-
self.record_proc.terminate()
52-
return
53-
time.sleep(0.1)
54-
else:
55-
self.record_proc.join()
44+
while True:
45+
if self.terminate_recording.is_set():
46+
self.record_proc.terminate()
47+
return
48+
time.sleep(0.1)
5649

5750
def is_running(self) -> bool:
5851
"""Check if the recording process is running."""

openadapt/app/dashboard/app/scrubbing/ScrubbingUpdates.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export const ScrubbingUpdates = ({ data, resetScrubbingStatus }: Props) => {
2525
<Text component='h2'>Scrubbing updates for recording {data.recording.task_description}</Text>
2626
<Text>Provider: {data.provider}</Text>
2727
{data.copying_recording ? (
28-
<Text>Copying recording first...</Text>
28+
<Text>Copying recording (this may take a while if Spacy dependencies need to be downloaded on the first run)...</Text>
2929
) : data.error ? (
3030
<Text c="red">{data.error}</Text>
3131
) : (

openadapt/privacy/providers/presidio.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@
2222
from openadapt.privacy.base import Modality, ScrubbingProvider, TextScrubbingMixin
2323
from openadapt.privacy.providers import ScrubProvider
2424

25-
if not spacy.util.is_package(config.SPACY_MODEL_NAME): # pylint: disable=no-member
26-
logger.info(f"Downloading {config.SPACY_MODEL_NAME} model...")
27-
if not is_running_from_executable():
28-
spacy.cli.download(config.SPACY_MODEL_NAME)
29-
else:
30-
download_spacy_model(config.SPACY_MODEL_NAME)
31-
3225

3326
class PresidioScrubbingProvider(
3427
ScrubProvider, ScrubbingProvider, TextScrubbingMixin
@@ -38,6 +31,13 @@ class PresidioScrubbingProvider(
3831
name: str = ScrubProvider.PRESIDIO # pylint: disable=E1101
3932
capabilities: List[Modality] = [Modality.TEXT, Modality.PIL_IMAGE]
4033

34+
if not spacy.util.is_package(config.SPACY_MODEL_NAME): # pylint: disable=no-member
35+
logger.info(f"Downloading {config.SPACY_MODEL_NAME} model...")
36+
if not is_running_from_executable():
37+
spacy.cli.download(config.SPACY_MODEL_NAME)
38+
else:
39+
download_spacy_model(config.SPACY_MODEL_NAME)
40+
4141
def scrub_text(self, text: str, is_separated: bool = False) -> str:
4242
"""Scrub the text of all PII/PHI using Presidio ANALYZER.TRF and Anonymizer.
4343

openadapt/scrub.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,15 @@ def inner() -> None:
163163

164164
new_recording = crud.get_recording_by_id(write_session, new_recording_id)
165165

166-
scrubber = ScrubProvider.get_scrubber(provider_id)
166+
try:
167+
scrubber = ScrubProvider.get_scrubber(provider_id)
168+
except Exception as e:
169+
logger.error(e)
170+
cleanup(
171+
"Failed to download the required spacy model. Please try again after"
172+
" some time."
173+
)
174+
return
167175

168176
crud.scrub_item(new_recording_id, Recording, scrubber)
169177

0 commit comments

Comments
 (0)