Skip to content

Commit 6c0ae47

Browse files
authored
Merge pull request #40 from predictionguard/jacob/audio-doc-exts
Adding Extensions to Doc and Audio services, along with new Doc parameters
2 parents 93202ec + 4a865aa commit 6c0ae47

File tree

3 files changed

+65
-13
lines changed

3 files changed

+65
-13
lines changed

predictionguard/src/audio.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import json
2-
31
import requests
42
from typing import Any, Dict, Optional
53

@@ -46,6 +44,10 @@ def create(
4644
language: Optional[str] = "auto",
4745
temperature: Optional[float] = 0.0,
4846
prompt: Optional[str] = "",
47+
toxicity: Optional[bool] = False,
48+
pii: Optional[str] = "",
49+
replace_method: Optional[str] = "",
50+
injection: Optional[bool] = False,
4951
) -> Dict[str, Any]:
5052
"""
5153
Creates a audio transcription request to the Prediction Guard /audio/transcriptions API
@@ -55,25 +57,41 @@ def create(
5557
:param language: The language of the audio file
5658
:param temperature: The temperature parameter for model transcription
5759
:param prompt: A prompt to assist in transcription styling
60+
:param toxicity: Whether to check for output toxicity
61+
:param pii: Whether to check for or replace pii
62+
:param replace_method: Replace method for any PII that is present.
63+
:param injection: Whether to check for prompt injection
5864
:result: A dictionary containing the transcribed text.
5965
"""
6066

6167
# Create a list of tuples, each containing all the parameters for
6268
# a call to _transcribe_audio
63-
args = (model, file, language, temperature, prompt)
69+
args = (
70+
model, file, language, temperature,
71+
prompt, toxicity, pii, replace_method,
72+
injection
73+
)
6474

6575
# Run _transcribe_audio
6676
choices = self._transcribe_audio(*args)
6777
return choices
6878

69-
def _transcribe_audio(self, model, file, language, temperature, prompt):
79+
def _transcribe_audio(
80+
self, model, file,
81+
language, temperature, prompt,
82+
toxicity, pii, replace_method, injection
83+
):
7084
"""
7185
Function to transcribe an audio file.
7286
"""
7387

7488
headers = {
7589
"Authorization": "Bearer " + self.api_key,
7690
"User-Agent": "Prediction Guard Python Client: " + __version__,
91+
"Toxicity": str(toxicity),
92+
"Pii": pii,
93+
"Replace-Method": replace_method,
94+
"Injection": str(injection)
7795
}
7896

7997
with open(file, "rb") as audio_file:

predictionguard/src/documents.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import json
2-
from pyexpat import model
3-
41
import requests
52
from typing import Any, Dict, Optional
63

@@ -10,7 +7,7 @@
107
class Documents:
118
"""Documents allows you to extract text from various document file types.
129
13-
Usage::
10+
Usage:
1411
1512
from predictionguard import PredictionGuard
1613
@@ -39,34 +36,71 @@ def __init__(self, api_key, url):
3936

4037
def create(
4138
self,
42-
file: str
39+
file: str,
40+
embed_images: Optional[bool] = False,
41+
output_format: Optional[str] = None,
42+
chunk_document: Optional[bool] = False,
43+
chunk_size: Optional[int] = None,
44+
toxicity: Optional[bool] = False,
45+
pii: Optional[str] = "",
46+
replace_method: Optional[str] = "",
47+
injection: Optional[bool] = False,
4348
) -> Dict[str, Any]:
4449
"""
4550
Creates a documents request to the Prediction Guard /documents/extract API
4651
4752
:param file: Document to be parsed
53+
:param embed_images: Whether to embed images into documents
54+
:param output_format: Output format
55+
:param chunk_document: Whether to chunk documents into chunks
56+
:param chunk_size: Chunk size
57+
:param toxicity: Whether to check for output toxicity
58+
:param pii: Whether to check for or replace pii
59+
:param replace_method: Replace method for any PII that is present.
60+
:param injection: Whether to check for prompt injection
4861
:result: A dictionary containing the title, content, and length of the document.
4962
"""
5063

5164
# Run _extract_documents
52-
choices = self._extract_documents(file)
65+
choices = self._extract_documents(
66+
file, embed_images, output_format,
67+
chunk_document, chunk_size, toxicity,
68+
pii, replace_method, injection
69+
)
5370
return choices
5471

55-
def _extract_documents(self, file):
72+
def _extract_documents(
73+
self, file, embed_images,
74+
output_format, chunk_document,
75+
chunk_size, toxicity, pii,
76+
replace_method, injection
77+
):
5678
"""
5779
Function to extract a document.
5880
"""
5981

6082
headers = {
6183
"Authorization": "Bearer " + self.api_key,
6284
"User-Agent": "Prediction Guard Python Client: " + __version__,
85+
"Toxicity": str(toxicity),
86+
"Pii": pii,
87+
"Replace-Method": replace_method,
88+
"Injection": str(injection)
89+
}
90+
91+
data = {
92+
"embedImages": embed_images,
93+
"outputFormat": output_format,
94+
"chunkDocument": chunk_document,
95+
"chunkSize": chunk_size,
6396
}
6497

6598
with open(file, "rb") as doc_file:
6699
files = {"file": (file, doc_file)}
67100

68101
response = requests.request(
69-
"POST", self.url + "/documents/extract", headers=headers, files=files
102+
"POST", self.url + "/documents/extract",
103+
headers=headers, files=files, data=data
70104
)
71105

72106
# If the request was successful, print the proxies.

predictionguard/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# Setting the package version
2-
__version__ = "2.8.1"
2+
__version__ = "2.8.2"

0 commit comments

Comments
 (0)