@@ -83,7 +83,9 @@ impl<'a> Parser<'a> {
8383 Token :: Decimal256 => self . parse_decimal_256 ( ) ,
8484 Token :: Dictionary => self . parse_dictionary ( ) ,
8585 Token :: List => self . parse_list ( ) ,
86+ Token :: ListView => self . parse_list_view ( ) ,
8687 Token :: LargeList => self . parse_large_list ( ) ,
88+ Token :: LargeListView => self . parse_large_list_view ( ) ,
8789 Token :: FixedSizeList => self . parse_fixed_size_list ( ) ,
8890 Token :: Struct => self . parse_struct ( ) ,
8991 tok => Err ( make_error (
@@ -93,35 +95,87 @@ impl<'a> Parser<'a> {
9395 }
9496 }
9597
96- /// Parses the List type
98+ /// Parses list field name. Returns default field name if not found.
99+ fn parse_list_field_name ( & mut self , context : & str ) -> ArrowResult < String > {
100+ // field must be after a comma
101+ if self
102+ . tokenizer
103+ . next_if ( |next| matches ! ( next, Ok ( Token :: Comma ) ) )
104+ . is_none ( )
105+ {
106+ return Ok ( Field :: LIST_FIELD_DEFAULT_NAME . into ( ) ) ;
107+ }
108+
109+ // expects: `field: 'field_name'`.
110+ self . expect_token ( Token :: Field ) ?;
111+ self . expect_token ( Token :: Colon ) ?;
112+ self . parse_single_quoted_string ( context)
113+ }
114+
115+ /// Parses the List type (called after `List` has been consumed)
116+ /// E.g: List(nullable Int64, field: 'foo')
97117 fn parse_list ( & mut self ) -> ArrowResult < DataType > {
98118 self . expect_token ( Token :: LParen ) ?;
119+ let nullable = self . parse_opt_nullable ( ) ;
120+ let data_type = self . parse_next_type ( ) ?;
121+ let field = self . parse_list_field_name ( "List" ) ?;
122+ self . expect_token ( Token :: RParen ) ?;
123+ Ok ( DataType :: List ( Arc :: new ( Field :: new (
124+ field, data_type, nullable,
125+ ) ) ) )
126+ }
127+
128+ /// Parses the ListView type (called after `ListView` has been consumed)
129+ /// E.g: ListView(nullable Int64, field: 'foo')
130+ fn parse_list_view ( & mut self ) -> ArrowResult < DataType > {
131+ self . expect_token ( Token :: LParen ) ?;
132+ let nullable = self . parse_opt_nullable ( ) ;
99133 let data_type = self . parse_next_type ( ) ?;
134+ let field = self . parse_list_field_name ( "ListView" ) ?;
100135 self . expect_token ( Token :: RParen ) ?;
101- Ok ( DataType :: List ( Arc :: new ( Field :: new_list_field (
102- data_type, true ,
136+ Ok ( DataType :: ListView ( Arc :: new ( Field :: new (
137+ field , data_type, nullable ,
103138 ) ) ) )
104139 }
105140
106- /// Parses the LargeList type
141+ /// Parses the LargeList type (called after `LargeList` has been consumed)
142+ /// E.g: LargeList(nullable Int64, field: 'foo')
107143 fn parse_large_list ( & mut self ) -> ArrowResult < DataType > {
108144 self . expect_token ( Token :: LParen ) ?;
145+ let nullable = self . parse_opt_nullable ( ) ;
109146 let data_type = self . parse_next_type ( ) ?;
147+ let field = self . parse_list_field_name ( "LargeList" ) ?;
110148 self . expect_token ( Token :: RParen ) ?;
111- Ok ( DataType :: LargeList ( Arc :: new ( Field :: new_list_field (
112- data_type, true ,
149+ Ok ( DataType :: LargeList ( Arc :: new ( Field :: new (
150+ field , data_type, nullable ,
113151 ) ) ) )
114152 }
115153
116- /// Parses the FixedSizeList type
154+ /// Parses the LargeListView type (called after `LargeListView` has been consumed)
155+ /// E.g: LargeListView(nullable Int64, field: 'foo')
156+ fn parse_large_list_view ( & mut self ) -> ArrowResult < DataType > {
157+ self . expect_token ( Token :: LParen ) ?;
158+ let nullable = self . parse_opt_nullable ( ) ;
159+ let data_type = self . parse_next_type ( ) ?;
160+ let field = self . parse_list_field_name ( "LargeListView" ) ?;
161+ self . expect_token ( Token :: RParen ) ?;
162+ Ok ( DataType :: LargeListView ( Arc :: new ( Field :: new (
163+ field, data_type, nullable,
164+ ) ) ) )
165+ }
166+
167+ /// Parses the FixedSizeList type (called after `FixedSizeList` has been consumed)
168+ /// E.g: FixedSizeList(5 x nullable Int64, field: 'foo')
117169 fn parse_fixed_size_list ( & mut self ) -> ArrowResult < DataType > {
118170 self . expect_token ( Token :: LParen ) ?;
119171 let length = self . parse_i32 ( "FixedSizeList" ) ?;
120- self . expect_token ( Token :: Comma ) ?;
172+ self . expect_token ( Token :: X ) ?;
173+ let nullable = self . parse_opt_nullable ( ) ;
121174 let data_type = self . parse_next_type ( ) ?;
175+ let field = self . parse_list_field_name ( "FixedSizeList" ) ?;
122176 self . expect_token ( Token :: RParen ) ?;
123177 Ok ( DataType :: FixedSizeList (
124- Arc :: new ( Field :: new_list_field ( data_type, true ) ) ,
178+ Arc :: new ( Field :: new ( field , data_type, nullable ) ) ,
125179 length,
126180 ) )
127181 }
@@ -150,6 +204,19 @@ impl<'a> Parser<'a> {
150204 }
151205 }
152206
207+ /// Parses the next single quoted string
208+ fn parse_single_quoted_string ( & mut self , context : & str ) -> ArrowResult < String > {
209+ let token = self . next_token ( ) ?;
210+ if let Token :: SingleQuotedString ( string) = token {
211+ Ok ( string)
212+ } else {
213+ Err ( make_error (
214+ self . val ,
215+ & format ! ( "expected single quoted string for {context}, got '{token}'" ) ,
216+ ) )
217+ }
218+ }
219+
153220 /// Parses the next integer value
154221 fn parse_i64 ( & mut self , context : & str ) -> ArrowResult < i64 > {
155222 match self . next_token ( ) ? {
@@ -340,6 +407,8 @@ impl<'a> Parser<'a> {
340407 Box :: new ( value_type) ,
341408 ) )
342409 }
410+
411+ /// Parses the next Struct (called after `Struct` has been consumed)
343412 fn parse_struct ( & mut self ) -> ArrowResult < DataType > {
344413 self . expect_token ( Token :: LParen ) ?;
345414 let mut fields = Vec :: new ( ) ;
@@ -354,16 +423,13 @@ impl<'a> Parser<'a> {
354423 tok => {
355424 return Err ( make_error (
356425 self . val ,
357- & format ! ( "Expected a quoted string for a field name; got {tok:?}" ) ,
426+ & format ! ( "Expected a double quoted string for a field name; got {tok:?}" ) ,
358427 ) ) ;
359428 }
360429 } ;
361430 self . expect_token ( Token :: Colon ) ?;
362431
363- let nullable = self
364- . tokenizer
365- . next_if ( |next| matches ! ( next, Ok ( Token :: Nullable ) ) )
366- . is_some ( ) ;
432+ let nullable = self . parse_opt_nullable ( ) ;
367433 let field_type = self . parse_next_type ( ) ?;
368434 fields. push ( Arc :: new ( Field :: new ( field_name, field_type, nullable) ) ) ;
369435 match self . next_token ( ) ? {
@@ -382,6 +448,13 @@ impl<'a> Parser<'a> {
382448 Ok ( DataType :: Struct ( Fields :: from ( fields) ) )
383449 }
384450
451+ /// return and consume if the next token is `Token::Nullable`
452+ fn parse_opt_nullable ( & mut self ) -> bool {
453+ self . tokenizer
454+ . next_if ( |next| matches ! ( next, Ok ( Token :: Nullable ) ) )
455+ . is_some ( )
456+ }
457+
385458 /// return the next token, or an error if there are none left
386459 fn next_token ( & mut self ) -> ArrowResult < Token > {
387460 match self . tokenizer . next ( ) {
@@ -406,6 +479,11 @@ fn is_separator(c: char) -> bool {
406479 c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
407480}
408481
482+ enum QuoteType {
483+ Double ,
484+ Single ,
485+ }
486+
409487#[ derive( Debug ) ]
410488/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
411489///
@@ -497,7 +575,9 @@ impl<'a> Tokenizer<'a> {
497575 "Date64" => Token :: SimpleType ( DataType :: Date64 ) ,
498576
499577 "List" => Token :: List ,
578+ "ListView" => Token :: ListView ,
500579 "LargeList" => Token :: LargeList ,
580+ "LargeListView" => Token :: LargeListView ,
501581 "FixedSizeList" => Token :: FixedSizeList ,
502582
503583 "s" | "Second" => Token :: TimeUnit ( TimeUnit :: Second ) ,
@@ -527,6 +607,8 @@ impl<'a> Tokenizer<'a> {
527607 "None" => Token :: None ,
528608
529609 "nullable" => Token :: Nullable ,
610+ "field" => Token :: Field ,
611+ "x" => Token :: X ,
530612
531613 "Struct" => Token :: Struct ,
532614
@@ -537,9 +619,14 @@ impl<'a> Tokenizer<'a> {
537619 Ok ( token)
538620 }
539621
540- /// Parses e.g. `"foo bar"`
541- fn parse_quoted_string ( & mut self ) -> ArrowResult < Token > {
542- if self . next_char ( ) != Some ( '\"' ) {
622+ /// Parses e.g. `"foo bar"`, `'foo bar'`
623+ fn parse_quoted_string ( & mut self , quote_type : QuoteType ) -> ArrowResult < Token > {
624+ let quote = match quote_type {
625+ QuoteType :: Double => '\"' ,
626+ QuoteType :: Single => '\'' ,
627+ } ;
628+
629+ if self . next_char ( ) != Some ( quote) {
543630 return Err ( make_error ( self . val , "Expected \" " ) ) ;
544631 }
545632
@@ -561,7 +648,7 @@ impl<'a> Tokenizer<'a> {
561648 is_escaped = true ;
562649 self . word . push ( c) ;
563650 }
564- '"' => {
651+ c if c == quote => {
565652 if is_escaped {
566653 self . word . push ( c) ;
567654 is_escaped = false ;
@@ -585,7 +672,10 @@ impl<'a> Tokenizer<'a> {
585672 return Err ( make_error ( self . val , "empty strings aren't allowed" ) ) ;
586673 }
587674
588- Ok ( Token :: DoubleQuotedString ( val) )
675+ match quote_type {
676+ QuoteType :: Double => Ok ( Token :: DoubleQuotedString ( val) ) ,
677+ QuoteType :: Single => Ok ( Token :: SingleQuotedString ( val) ) ,
678+ }
589679 }
590680}
591681
@@ -601,7 +691,10 @@ impl Iterator for Tokenizer<'_> {
601691 continue ;
602692 }
603693 '"' => {
604- return Some ( self . parse_quoted_string ( ) ) ;
694+ return Some ( self . parse_quoted_string ( QuoteType :: Double ) ) ;
695+ }
696+ '\'' => {
697+ return Some ( self . parse_quoted_string ( QuoteType :: Single ) ) ;
605698 }
606699 '(' => {
607700 self . next_char ( ) ;
@@ -652,19 +745,26 @@ enum Token {
652745 None ,
653746 Integer ( i64 ) ,
654747 DoubleQuotedString ( String ) ,
748+ SingleQuotedString ( String ) ,
655749 List ,
750+ ListView ,
656751 LargeList ,
752+ LargeListView ,
657753 FixedSizeList ,
658754 Struct ,
659755 Nullable ,
756+ Field ,
757+ X ,
660758}
661759
662760impl Display for Token {
663761 fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
664762 match self {
665763 Token :: SimpleType ( t) => write ! ( f, "{t}" ) ,
666764 Token :: List => write ! ( f, "List" ) ,
765+ Token :: ListView => write ! ( f, "ListView" ) ,
667766 Token :: LargeList => write ! ( f, "LargeList" ) ,
767+ Token :: LargeListView => write ! ( f, "LargeListView" ) ,
668768 Token :: FixedSizeList => write ! ( f, "FixedSizeList" ) ,
669769 Token :: Timestamp => write ! ( f, "Timestamp" ) ,
670770 Token :: Time32 => write ! ( f, "Time32" ) ,
@@ -687,8 +787,11 @@ impl Display for Token {
687787 Token :: Dictionary => write ! ( f, "Dictionary" ) ,
688788 Token :: Integer ( v) => write ! ( f, "Integer({v})" ) ,
689789 Token :: DoubleQuotedString ( s) => write ! ( f, "DoubleQuotedString({s})" ) ,
790+ Token :: SingleQuotedString ( s) => write ! ( f, "SingleQuotedString({s})" ) ,
690791 Token :: Struct => write ! ( f, "Struct" ) ,
691792 Token :: Nullable => write ! ( f, "nullable" ) ,
793+ Token :: Field => write ! ( f, "field" ) ,
794+ Token :: X => write ! ( f, "x" ) ,
692795 }
693796 }
694797}
@@ -828,7 +931,58 @@ mod test {
828931 ) ,
829932 ] ) ) ,
830933 DataType :: Struct ( Fields :: empty( ) ) ,
831- // TODO support more structured types (List, LargeList, Union, Map, RunEndEncoded, etc)
934+ DataType :: List ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) ) ,
935+ DataType :: List ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) ) ,
936+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
937+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) ) ,
938+ DataType :: List ( Arc :: new( Field :: new(
939+ "nested_list" ,
940+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
941+ true ,
942+ ) ) ) ,
943+ DataType :: ListView ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) ) ,
944+ DataType :: ListView ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) ) ,
945+ DataType :: ListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
946+ DataType :: ListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) ) ,
947+ DataType :: ListView ( Arc :: new( Field :: new(
948+ "nested_list_view" ,
949+ DataType :: ListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
950+ true ,
951+ ) ) ) ,
952+ DataType :: LargeList ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) ) ,
953+ DataType :: LargeList ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) ) ,
954+ DataType :: LargeList ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
955+ DataType :: LargeList ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) ) ,
956+ DataType :: LargeList ( Arc :: new( Field :: new(
957+ "nested_large_list" ,
958+ DataType :: LargeList ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
959+ true ,
960+ ) ) ) ,
961+ DataType :: LargeListView ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) ) ,
962+ DataType :: LargeListView ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) ) ,
963+ DataType :: LargeListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
964+ DataType :: LargeListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) ) ,
965+ DataType :: LargeListView ( Arc :: new( Field :: new(
966+ "nested_large_list_view" ,
967+ DataType :: LargeListView ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
968+ true ,
969+ ) ) ) ,
970+ DataType :: FixedSizeList ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) , 2 ) ,
971+ DataType :: FixedSizeList ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) , 2 ) ,
972+ DataType :: FixedSizeList ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) , 2 ) ,
973+ DataType :: FixedSizeList ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) , 2 ) ,
974+ DataType :: FixedSizeList (
975+ Arc :: new( Field :: new(
976+ "nested_fixed_size_list" ,
977+ DataType :: FixedSizeList (
978+ Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ,
979+ 2 ,
980+ ) ,
981+ true ,
982+ ) ) ,
983+ 2 ,
984+ ) ,
985+ // TODO support more structured types (Union, Map, RunEndEncoded, etc)
832986 ]
833987 }
834988
0 commit comments