22
33from __future__ import annotations
44
5- from typing import List
6-
5+ import io
6+ import os
7+ import logging
8+ import builtins
9+ from typing import List , overload
10+ from pathlib import Path
11+
12+ import anyio
713import httpx
814
915from ... import _legacy_response
3137__all__ = ["Uploads" , "AsyncUploads" ]
3238
3339
40+ # 64MB
41+ DEFAULT_PART_SIZE = 64 * 1024 * 1024
42+
43+ log : logging .Logger = logging .getLogger (__name__ )
44+
45+
3446class Uploads (SyncAPIResource ):
3547 @cached_property
3648 def parts (self ) -> Parts :
@@ -44,6 +56,105 @@ def with_raw_response(self) -> UploadsWithRawResponse:
4456 def with_streaming_response (self ) -> UploadsWithStreamingResponse :
4557 return UploadsWithStreamingResponse (self )
4658
59+ @overload
60+ def upload_file_chunked (
61+ self ,
62+ * ,
63+ file : os .PathLike [str ],
64+ mime_type : str ,
65+ purpose : FilePurpose ,
66+ bytes : int | None = None ,
67+ part_size : int | None = None ,
68+ md5 : str | NotGiven = NOT_GIVEN ,
69+ ) -> Upload :
70+ """Splits a file into multiple 64MB parts and uploads them sequentially."""
71+
72+ @overload
73+ def upload_file_chunked (
74+ self ,
75+ * ,
76+ file : bytes ,
77+ filename : str ,
78+ bytes : int ,
79+ mime_type : str ,
80+ purpose : FilePurpose ,
81+ part_size : int | None = None ,
82+ md5 : str | NotGiven = NOT_GIVEN ,
83+ ) -> Upload :
84+ """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
85+
86+ def upload_file_chunked (
87+ self ,
88+ * ,
89+ file : os .PathLike [str ] | bytes ,
90+ mime_type : str ,
91+ purpose : FilePurpose ,
92+ filename : str | None = None ,
93+ bytes : int | None = None ,
94+ part_size : int | None = None ,
95+ md5 : str | NotGiven = NOT_GIVEN ,
96+ ) -> Upload :
97+ """Splits the given file into multiple parts and uploads them sequentially.
98+
99+ ```py
100+ from pathlib import Path
101+
102+ client.uploads.upload_file(
103+ file=Path("my-paper.pdf"),
104+ mime_type="pdf",
105+ purpose="assistants",
106+ )
107+ ```
108+ """
109+ if isinstance (file , builtins .bytes ):
110+ if filename is None :
111+ raise TypeError ("The `filename` argument must be given for in-memory files" )
112+
113+ if bytes is None :
114+ raise TypeError ("The `bytes` argument must be given for in-memory files" )
115+ else :
116+ if not isinstance (file , Path ):
117+ file = Path (file )
118+
119+ if not filename :
120+ filename = file .name
121+
122+ if bytes is None :
123+ bytes = file .stat ().st_size
124+
125+ upload = self .create (
126+ bytes = bytes ,
127+ filename = filename ,
128+ mime_type = mime_type ,
129+ purpose = purpose ,
130+ )
131+
132+ part_ids : list [str ] = []
133+
134+ if part_size is None :
135+ part_size = DEFAULT_PART_SIZE
136+
137+ if isinstance (file , builtins .bytes ):
138+ buf : io .FileIO | io .BytesIO = io .BytesIO (file )
139+ else :
140+ buf = io .FileIO (file )
141+
142+ try :
143+ while True :
144+ data = buf .read (part_size )
145+ if not data :
146+ # EOF
147+ break
148+
149+ part = self .parts .create (upload_id = upload .id , data = data )
150+ log .info ("Uploaded part %s for upload %s" , part .id , upload .id )
151+ part_ids .append (part .id )
152+ except Exception :
153+ buf .close ()
154+ raise
155+
156+ return self .complete (upload_id = upload .id , part_ids = part_ids , md5 = md5 )
157+
47158 def create (
48159 self ,
49160 * ,
@@ -227,6 +338,116 @@ def with_raw_response(self) -> AsyncUploadsWithRawResponse:
227338 def with_streaming_response (self ) -> AsyncUploadsWithStreamingResponse :
228339 return AsyncUploadsWithStreamingResponse (self )
229340
341+ @overload
342+ async def upload_file_chunked (
343+ self ,
344+ * ,
345+ file : os .PathLike [str ],
346+ mime_type : str ,
347+ purpose : FilePurpose ,
348+ bytes : int | None = None ,
349+ part_size : int | None = None ,
350+ md5 : str | NotGiven = NOT_GIVEN ,
351+ ) -> Upload :
352+ """Splits a file into multiple 64MB parts and uploads them sequentially."""
353+
354+ @overload
355+ async def upload_file_chunked (
356+ self ,
357+ * ,
358+ file : bytes ,
359+ filename : str ,
360+ bytes : int ,
361+ mime_type : str ,
362+ purpose : FilePurpose ,
363+ part_size : int | None = None ,
364+ md5 : str | NotGiven = NOT_GIVEN ,
365+ ) -> Upload :
366+ """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
367+
368+ async def upload_file_chunked (
369+ self ,
370+ * ,
371+ file : os .PathLike [str ] | bytes ,
372+ mime_type : str ,
373+ purpose : FilePurpose ,
374+ filename : str | None = None ,
375+ bytes : int | None = None ,
376+ part_size : int | None = None ,
377+ md5 : str | NotGiven = NOT_GIVEN ,
378+ ) -> Upload :
379+ """Splits the given file into multiple parts and uploads them sequentially.
380+
381+ ```py
382+ from pathlib import Path
383+
384+ client.uploads.upload_file(
385+ file=Path("my-paper.pdf"),
386+ mime_type="pdf",
387+ purpose="assistants",
388+ )
389+ ```
390+ """
391+ if isinstance (file , builtins .bytes ):
392+ if filename is None :
393+ raise TypeError ("The `filename` argument must be given for in-memory files" )
394+
395+ if bytes is None :
396+ raise TypeError ("The `bytes` argument must be given for in-memory files" )
397+ else :
398+ if not isinstance (file , anyio .Path ):
399+ file = anyio .Path (file )
400+
401+ if not filename :
402+ filename = file .name
403+
404+ if bytes is None :
405+ stat = await file .stat ()
406+ bytes = stat .st_size
407+
408+ upload = await self .create (
409+ bytes = bytes ,
410+ filename = filename ,
411+ mime_type = mime_type ,
412+ purpose = purpose ,
413+ )
414+
415+ part_ids : list [str ] = []
416+
417+ if part_size is None :
418+ part_size = DEFAULT_PART_SIZE
419+
420+ if isinstance (file , anyio .Path ):
421+ fd = await file .open ("rb" )
422+ async with fd :
423+ while True :
424+ data = await fd .read (part_size )
425+ if not data :
426+ # EOF
427+ break
428+
429+ part = await self .parts .create (upload_id = upload .id , data = data )
430+ log .info ("Uploaded part %s for upload %s" , part .id , upload .id )
431+ part_ids .append (part .id )
432+ else :
433+ buf = io .BytesIO (file )
434+
435+ try :
436+ while True :
437+ data = buf .read (part_size )
438+ if not data :
439+ # EOF
440+ break
441+
442+ part = await self .parts .create (upload_id = upload .id , data = data )
443+ log .info ("Uploaded part %s for upload %s" , part .id , upload .id )
444+ part_ids .append (part .id )
445+ except Exception :
446+ buf .close ()
447+ raise
448+
449+ return await self .complete (upload_id = upload .id , part_ids = part_ids , md5 = md5 )
450+
230451 async def create (
231452 self ,
232453 * ,
0 commit comments