|
| 1 | +import { createDataConnector } from "../../../DataConnector"; |
| 2 | + |
| 3 | +describe('FileDataProvider', () => { |
| 4 | + it('should return correct documents', async () => { |
| 5 | + const fileDataConnector = createDataConnector({ provider: 'file' }); |
| 6 | + |
| 7 | + await fileDataConnector.setOptions({ |
| 8 | + files: [ |
| 9 | + './src/__tests__/providers/File/files/test.csv', |
| 10 | + './src/__tests__/providers/File/files/test.md', |
| 11 | + './src/__tests__/providers/File/files/test.pdf', |
| 12 | + './src/__tests__/providers/File/files/test.txt', |
| 13 | + './src/__tests__/providers/File/files/test.xml' |
| 14 | + ], |
| 15 | + }); |
| 16 | + |
| 17 | + const documents = await fileDataConnector.getDocuments(); |
| 18 | + expect(documents).not.toBe(null); |
| 19 | + expect(documents.length).toBe(5); |
| 20 | + expect(documents[0].content).not.toBe(null); |
| 21 | + expect(documents[0].content.length).toBeGreaterThan(0); |
| 22 | + expect(documents).toEqual([ |
| 23 | + { |
| 24 | + content: 'id, column1, column2, column3\n1, test, 11111, test test\n2, test2 test2, 22222, test\n3, test3, 33333, test test test', |
| 25 | + metadata: { sourceURL: expect.stringMatching(/^#FILE_\d+$/) }, |
| 26 | + provider: 'file', |
| 27 | + type: 'csv' |
| 28 | + }, |
| 29 | + { |
| 30 | + content: '# This is a test markdown file\n\nThis file is used for testing purposes. Below is a list of items:\n\n- Item 1\n- Item 2\n- Item 3\n\nEnd of file.\n', |
| 31 | + metadata: { sourceURL: expect.stringMatching(/^#FILE_\d+$/) }, |
| 32 | + provider: 'file', |
| 33 | + type: 'md' |
| 34 | + }, |
| 35 | + { |
| 36 | + content: '\n\nDummy PDF file', |
| 37 | + metadata: { sourceURL: expect.stringMatching(/^#FILE_\d+$/) }, |
| 38 | + provider: 'file', |
| 39 | + type: 'pdf' |
| 40 | + }, |
| 41 | + { |
| 42 | + content: 'This is a test file.\n', |
| 43 | + metadata: { sourceURL: expect.stringMatching(/^#FILE_\d+$/) }, |
| 44 | + provider: 'file', |
| 45 | + type: 'txt' |
| 46 | + }, |
| 47 | + { |
| 48 | + content: '<?xml version="1.0" encoding="UTF-8"?>\n<tests>\n <test>\n <id>1</id>\n <column1>test</column1>\n <column2>11111</column2>\n <column3>test test</column3>\n </test>\n <test>\n <id>2</id>\n <column1>test2 test2</column1>\n <column2>22222</column2>\n <column3>test</column3>\n </test>\n <test>\n <id>3</id>\n <column1>test3</column1>\n <column2>33333</column2>\n <column3>test test test</column3>\n </test>\n</tests>\n', |
| 49 | + metadata: { sourceURL: expect.stringMatching(/^#FILE_\d+$/) }, |
| 50 | + provider: 'file', |
| 51 | + type: 'xml' |
| 52 | + } |
| 53 | + ]); |
| 54 | + }); |
| 55 | + |
| 56 | + it('should fetch documents from URLs', async () => { |
| 57 | + const fileUrlDataConnector = createDataConnector({ provider: 'file' }); |
| 58 | + |
| 59 | + const optionsURLs = { |
| 60 | + urls: [ |
| 61 | + 'https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test.csv', |
| 62 | + 'https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test.md', |
| 63 | + 'https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test%20%281%29.pdf', |
| 64 | + 'https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test.txt', |
| 65 | + 'https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test.xml' |
| 66 | + ] |
| 67 | + } |
| 68 | + |
| 69 | + await fileUrlDataConnector.setOptions(optionsURLs); |
| 70 | + const documentsByURL = await fileUrlDataConnector.getDocuments(); |
| 71 | + |
| 72 | + expect(documentsByURL).not.toBe(null); |
| 73 | + expect(documentsByURL.length).toBe(5); |
| 74 | + expect(documentsByURL[0].content).not.toBe(null); |
| 75 | + expect(documentsByURL[0].content.length).toBeGreaterThan(0); |
| 76 | + expect(documentsByURL[0].metadata.sourceURL).not.toBe(null); |
| 77 | + expect(documentsByURL[0].provider).toBe('file'); |
| 78 | + expect(documentsByURL).toContainEqual({ |
| 79 | + content: 'id, column1, column2, column3\n1, test, 11111, test test\n2, test2 test2, 22222, test\n3, test3, 33333, test test test\n', |
| 80 | + metadata: { sourceURL: optionsURLs.urls[0] }, |
| 81 | + provider: 'file', |
| 82 | + type: 'csv' |
| 83 | + }); |
| 84 | + expect(documentsByURL).toContainEqual({ |
| 85 | + content: expect.stringContaining('# This is a test markdown file\n\nThis file is used for testing purposes. Below is a list of items:\n\n- Item 1\n- Item 2\n- Item 3\n\nEnd of file.\n'), |
| 86 | + metadata: { sourceURL: optionsURLs.urls[1] }, |
| 87 | + provider: 'file', |
| 88 | + type: 'md' |
| 89 | + }); |
| 90 | + expect(documentsByURL).toContainEqual({ |
| 91 | + content: expect.stringContaining('Dummy PDF file'), |
| 92 | + metadata: { sourceURL: optionsURLs.urls[2] }, |
| 93 | + provider: 'file', |
| 94 | + type: 'pdf' |
| 95 | + }); |
| 96 | + expect(documentsByURL).toContainEqual({ |
| 97 | + content: expect.stringContaining('This is a test file.'), |
| 98 | + metadata: { sourceURL: optionsURLs.urls[3] }, |
| 99 | + provider: 'file', |
| 100 | + type: 'txt' |
| 101 | + }); |
| 102 | + expect(documentsByURL).toContainEqual({ |
| 103 | + content: expect.stringContaining('<?xml version="1.0" encoding="UTF-8"?>\n<tests>\n <test>\n <id>1</id>\n <column1>test</column1>\n <column2>11111</column2>\n <column3>test test</column3>\n </test>\n <test>\n <id>2</id>\n <column1>test2 test2</column1>\n <column2>22222</column2>\n <column3>test</column3>\n </test>\n <test>\n <id>3</id>\n <column1>test3</column1>\n <column2>33333</column2>\n <column3>test test test</column3>\n </test>\n</tests>'), |
| 104 | + metadata: { sourceURL: optionsURLs.urls[4] }, |
| 105 | + provider: 'file', |
| 106 | + type: 'xml' |
| 107 | + }); |
| 108 | + }); |
| 109 | +}); |
0 commit comments