Skip to content

Commit 45b7649

Browse files
authored
feat: Make file type detection and binary checks asynchronous (#3286) (#3288)
1 parent f4d077c commit 45b7649

File tree

3 files changed

+71
-50
lines changed

3 files changed

+71
-50
lines changed

packages/core/src/tools/read-many-files.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ Use this tool when the user's query implies needing the content of several files
405405
.relative(this.config.getTargetDir(), filePath)
406406
.replace(/\\/g, '/');
407407

408-
const fileType = detectFileType(filePath);
408+
const fileType = await detectFileType(filePath);
409409

410410
if (fileType === 'image' || fileType === 'pdf') {
411411
const fileExtension = path.extname(filePath).toLowerCase();

packages/core/src/utils/fileUtils.test.ts

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -142,41 +142,41 @@ describe('fileUtils', () => {
142142
}
143143
});
144144

145-
it('should return false for an empty file', () => {
145+
it('should return false for an empty file', async () => {
146146
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
147-
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
147+
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
148148
});
149149

150-
it('should return false for a typical text file', () => {
150+
it('should return false for a typical text file', async () => {
151151
actualNodeFs.writeFileSync(
152152
filePathForBinaryTest,
153153
'Hello, world!\nThis is a test file with normal text content.',
154154
);
155-
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
155+
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
156156
});
157157

158-
it('should return true for a file with many null bytes', () => {
158+
it('should return true for a file with many null bytes', async () => {
159159
const binaryContent = Buffer.from([
160160
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
161161
]); // "He\0llo\0\0\0\0\0"
162162
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
163-
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
163+
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
164164
});
165165

166-
it('should return true for a file with high percentage of non-printable ASCII', () => {
166+
it('should return true for a file with high percentage of non-printable ASCII', async () => {
167167
const binaryContent = Buffer.from([
168168
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
169169
]); // AB\x01\x02\x03\x04\x05CD\x06
170170
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
171-
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
171+
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
172172
});
173173

174-
it('should return false if file access fails (e.g., ENOENT)', () => {
174+
it('should return false if file access fails (e.g., ENOENT)', async () => {
175175
// Ensure the file does not exist
176176
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
177177
actualNodeFs.unlinkSync(filePathForBinaryTest);
178178
}
179-
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
179+
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
180180
});
181181
});
182182

@@ -196,64 +196,64 @@ describe('fileUtils', () => {
196196
vi.restoreAllMocks(); // Restore spies on actualNodeFs
197197
});
198198

199-
it('should detect typescript type by extension (ts)', () => {
200-
expect(detectFileType('file.ts')).toBe('text');
201-
expect(detectFileType('file.test.ts')).toBe('text');
199+
it('should detect typescript type by extension (ts)', async () => {
200+
expect(await detectFileType('file.ts')).toBe('text');
201+
expect(await detectFileType('file.test.ts')).toBe('text');
202202
});
203203

204-
it('should detect image type by extension (png)', () => {
204+
it('should detect image type by extension (png)', async () => {
205205
mockMimeLookup.mockReturnValueOnce('image/png');
206-
expect(detectFileType('file.png')).toBe('image');
206+
expect(await detectFileType('file.png')).toBe('image');
207207
});
208208

209-
it('should detect image type by extension (jpeg)', () => {
209+
it('should detect image type by extension (jpeg)', async () => {
210210
mockMimeLookup.mockReturnValueOnce('image/jpeg');
211-
expect(detectFileType('file.jpg')).toBe('image');
211+
expect(await detectFileType('file.jpg')).toBe('image');
212212
});
213213

214-
it('should detect svg type by extension', () => {
215-
expect(detectFileType('image.svg')).toBe('svg');
216-
expect(detectFileType('image.icon.svg')).toBe('svg');
214+
it('should detect svg type by extension', async () => {
215+
expect(await detectFileType('image.svg')).toBe('svg');
216+
expect(await detectFileType('image.icon.svg')).toBe('svg');
217217
});
218218

219-
it('should detect pdf type by extension', () => {
219+
it('should detect pdf type by extension', async () => {
220220
mockMimeLookup.mockReturnValueOnce('application/pdf');
221-
expect(detectFileType('file.pdf')).toBe('pdf');
221+
expect(await detectFileType('file.pdf')).toBe('pdf');
222222
});
223223

224-
it('should detect audio type by extension', () => {
224+
it('should detect audio type by extension', async () => {
225225
mockMimeLookup.mockReturnValueOnce('audio/mpeg');
226-
expect(detectFileType('song.mp3')).toBe('audio');
226+
expect(await detectFileType('song.mp3')).toBe('audio');
227227
});
228228

229-
it('should detect video type by extension', () => {
229+
it('should detect video type by extension', async () => {
230230
mockMimeLookup.mockReturnValueOnce('video/mp4');
231-
expect(detectFileType('movie.mp4')).toBe('video');
231+
expect(await detectFileType('movie.mp4')).toBe('video');
232232
});
233233

234-
it('should detect known binary extensions as binary (e.g. .zip)', () => {
234+
it('should detect known binary extensions as binary (e.g. .zip)', async () => {
235235
mockMimeLookup.mockReturnValueOnce('application/zip');
236-
expect(detectFileType('archive.zip')).toBe('binary');
236+
expect(await detectFileType('archive.zip')).toBe('binary');
237237
});
238-
it('should detect known binary extensions as binary (e.g. .exe)', () => {
238+
it('should detect known binary extensions as binary (e.g. .exe)', async () => {
239239
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
240-
expect(detectFileType('app.exe')).toBe('binary');
240+
expect(await detectFileType('app.exe')).toBe('binary');
241241
});
242242

243-
it('should use isBinaryFile for unknown extensions and detect as binary', () => {
243+
it('should use isBinaryFile for unknown extensions and detect as binary', async () => {
244244
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
245245
// Create a file that isBinaryFile will identify as binary
246246
const binaryContent = Buffer.from([
247247
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248248
]);
249249
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
250-
expect(detectFileType(filePathForDetectTest)).toBe('binary');
250+
expect(await detectFileType(filePathForDetectTest)).toBe('binary');
251251
});
252252

253-
it('should default to text if mime type is unknown and content is not binary', () => {
253+
it('should default to text if mime type is unknown and content is not binary', async () => {
254254
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
255255
// filePathForDetectTest is already a text file by default from beforeEach
256-
expect(detectFileType(filePathForDetectTest)).toBe('text');
256+
expect(await detectFileType(filePathForDetectTest)).toBe('text');
257257
});
258258
});
259259

packages/core/src/utils/fileUtils.ts

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
* SPDX-License-Identifier: Apache-2.0
55
*/
66

7-
import fs from 'fs';
8-
import path from 'path';
7+
import fs from 'node:fs';
8+
import path from 'node:path';
99
import { PartUnion } from '@google/genai';
1010
import mime from 'mime-types';
1111

@@ -56,22 +56,24 @@ export function isWithinRoot(
5656
/**
5757
* Determines if a file is likely binary based on content sampling.
5858
* @param filePath Path to the file.
59-
* @returns True if the file appears to be binary.
59+
* @returns Promise that resolves to true if the file appears to be binary.
6060
*/
61-
export function isBinaryFile(filePath: string): boolean {
61+
export async function isBinaryFile(filePath: string): Promise<boolean> {
62+
let fileHandle: fs.promises.FileHandle | undefined;
6263
try {
63-
const fd = fs.openSync(filePath, 'r');
64+
fileHandle = await fs.promises.open(filePath, 'r');
65+
6466
// Read up to 4KB or file size, whichever is smaller
65-
const fileSize = fs.fstatSync(fd).size;
67+
const stats = await fileHandle.stat();
68+
const fileSize = stats.size;
6669
if (fileSize === 0) {
6770
// Empty file is not considered binary for content checking
68-
fs.closeSync(fd);
6971
return false;
7072
}
7173
const bufferSize = Math.min(4096, fileSize);
7274
const buffer = Buffer.alloc(bufferSize);
73-
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
74-
fs.closeSync(fd);
75+
const result = await fileHandle.read(buffer, 0, buffer.length, 0);
76+
const bytesRead = result.bytesRead;
7577

7678
if (bytesRead === 0) return false;
7779

@@ -84,21 +86,40 @@ export function isBinaryFile(filePath: string): boolean {
8486
}
8587
// If >30% non-printable characters, consider it binary
8688
return nonPrintableCount / bytesRead > 0.3;
87-
} catch {
89+
} catch (error) {
90+
// Log error for debugging while maintaining existing behavior
91+
console.warn(
92+
`Failed to check if file is binary: ${filePath}`,
93+
error instanceof Error ? error.message : String(error),
94+
);
8895
// If any error occurs (e.g. file not found, permissions),
8996
// treat as not binary here; let higher-level functions handle existence/access errors.
9097
return false;
98+
} finally {
99+
// Safely close the file handle if it was successfully opened
100+
if (fileHandle) {
101+
try {
102+
await fileHandle.close();
103+
} catch (closeError) {
104+
// Log close errors for debugging while continuing with cleanup
105+
console.warn(
106+
`Failed to close file handle for: ${filePath}`,
107+
closeError instanceof Error ? closeError.message : String(closeError),
108+
);
109+
// The important thing is that we attempted to clean up
110+
}
111+
}
91112
}
92113
}
93114

94115
/**
95116
* Detects the type of file based on extension and content.
96117
* @param filePath Path to the file.
97-
* @returns 'text', 'image', 'pdf', 'audio', 'video', or 'binary'.
118+
* @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
98119
*/
99-
export function detectFileType(
120+
export async function detectFileType(
100121
filePath: string,
101-
): 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg' {
122+
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
102123
const ext = path.extname(filePath).toLowerCase();
103124

104125
// The mimetype for "ts" is MPEG transport stream (a video format) but we want
@@ -166,7 +187,7 @@ export function detectFileType(
166187

167188
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
168189
// and it's not a known binary extension.
169-
if (isBinaryFile(filePath)) {
190+
if (await isBinaryFile(filePath)) {
170191
return 'binary';
171192
}
172193

@@ -227,7 +248,7 @@ export async function processSingleFileContent(
227248
);
228249
}
229250

230-
const fileType = detectFileType(filePath);
251+
const fileType = await detectFileType(filePath);
231252
const relativePathForDisplay = path
232253
.relative(rootDirectory, filePath)
233254
.replace(/\\/g, '/');

0 commit comments

Comments
 (0)