Skip to content

Commit af165e4

Browse files
committed
handle use case in which no fields are quoted
1 parent c6e7dc8 commit af165e4

3 files changed

Lines changed: 12 additions & 26 deletions

File tree

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ See below for an exhaustive list of configuration fields:
136136
// if true, unzip the file before reading it at a csv
137137
"unzip": true,
138138

139+
// if specified, override the default CSV quoting config
140+
// More info: https://docs.python.org/3/library/csv.html#csv.QUOTE_ALL
141+
"quoting": "QUOTE_NONE",
142+
139143
// if the files don't have a header row, you can specify the field names
140144
"field_names": ["id", "first_name", "last_name"],
141145

tap_s3_csv/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Required('format'): Any('csv', 'excel'),
1616
Optional('unzip'): bool,
1717
Optional('delimiter'): str,
18+
Optional('quoting'): Any('QUOTE_MINIMAL', 'QUOTE_ALL', 'QUOTE_NONNUMERIC', 'QUOTE_NONE'),
1819
Optional('search_prefix'): str,
1920
Optional('field_names'): [str],
2021
Optional('worksheet_name'): str,

tap_s3_csv/csv_handler.py

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import codecs
22
import csv
33
import re
4-
5-
import zlib
6-
import io
4+
import gzip
75

86

97
def generator_wrapper(reader):
@@ -27,30 +25,9 @@ def generator_wrapper(reader):
2725
yield to_return
2826

2927

30-
def gunzip(stream):
31-
dec = zlib.decompressobj(32 + zlib.MAX_WBITS)
32-
for chunk in stream:
33-
rv = dec.decompress(chunk)
34-
if rv:
35-
yield rv
36-
37-
38-
def iter_lines(stream):
39-
buf = ""
40-
for chunk in stream:
41-
for byte in chunk:
42-
char = chr(byte)
43-
if char == '\n':
44-
yield buf.encode('utf-8')
45-
buf = ""
46-
else:
47-
buf += char
48-
49-
5028
def get_row_iterator(table_spec, file_handle):
51-
5229
if table_spec.get('unzip'):
53-
raw_stream = iter_lines(gunzip(file_handle._raw_stream))
30+
raw_stream = gzip.GzipFile(fileobj=file_handle._raw_stream)
5431
else:
5532
raw_stream = file_handle._raw_stream
5633

@@ -66,6 +43,10 @@ def get_row_iterator(table_spec, file_handle):
6643
field_names = table_spec['field_names']
6744

6845
delimiter = table_spec.get('delimiter', ',')
69-
reader = csv.DictReader(file_stream, delimiter=delimiter, fieldnames=field_names)
46+
47+
quote_config = table_spec.get('quote_config', 'QUOTE_MINIMAL')
48+
quoting = getattr(csv, quote_config)
49+
50+
reader = csv.DictReader(file_stream, quoting=quoting, delimiter=delimiter, fieldnames=field_names)
7051

7152
return generator_wrapper(reader)

0 commit comments

Comments
 (0)