Skip to content

Commit 48f9780

Browse files
committed
ARROW-375: Fix unicode Python 3 issue in columns argument of parquet.read_table
Author: Wes McKinney <[email protected]> Closes #204 from wesm/ARROW-375 and squashes the following commits: 9e6f2a6 [Wes McKinney] BUG: convert unicode to utf8 bytes for column filtering
1 parent 7f048a4 commit 48f9780

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

python/pyarrow/parquet.pyx

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,23 +93,27 @@ cdef class ParquetReader:
9393
Integer index of the position of the column
9494
"""
9595
cdef:
96-
const FileMetaData* metadata = self.reader.get().parquet_reader().metadata()
96+
const FileMetaData* metadata = (self.reader.get()
97+
.parquet_reader().metadata())
9798
int i = 0
9899

99100
if self.column_idx_map is None:
100101
self.column_idx_map = {}
101102
for i in range(0, metadata.num_columns()):
102-
self.column_idx_map[str(metadata.schema().Column(i).path().get().ToDotString())] = i
103+
col_bytes = tobytes(metadata.schema().Column(i)
104+
.path().get().ToDotString())
105+
self.column_idx_map[col_bytes] = i
103106

104-
return self.column_idx_map[column_name]
107+
return self.column_idx_map[tobytes(column_name)]
105108

106109
def read_column(self, int column_index):
107110
cdef:
108111
Array array = Array()
109112
shared_ptr[CArray] carray
110113

111114
with nogil:
112-
check_status(self.reader.get().ReadFlatColumn(column_index, &carray))
115+
check_status(self.reader.get()
116+
.ReadFlatColumn(column_index, &carray))
113117

114118
array.init(carray)
115119
return array

0 commit comments

Comments
 (0)