@@ -98,6 +98,8 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
9898 values : Vec < T :: Native > ,
9999 /// The random state used to generate hashes
100100 random_state : RandomState ,
101+
102+ append_row_indices : Vec < u32 > ,
101103}
102104
103105impl < T : ArrowPrimitiveType > GroupValuesPrimitive < T > {
@@ -109,6 +111,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
109111 values : Vec :: with_capacity ( 128 ) ,
110112 null_group : None ,
111113 random_state : Default :: default ( ) ,
114+ append_row_indices : Vec :: new ( ) ,
112115 }
113116 }
114117}
@@ -119,13 +122,18 @@ where
119122{
120123 fn intern ( & mut self , cols : & [ ArrayRef ] , groups : & mut Vec < usize > ) -> Result < ( ) > {
121124 assert_eq ! ( cols. len( ) , 1 ) ;
125+ let col = cols[ 0 ] . as_primitive :: < T > ( ) ;
126+
122127 groups. clear ( ) ;
128+ self . append_row_indices . clear ( ) ;
123129
124- for v in cols[ 0 ] . as_primitive :: < T > ( ) {
130+ let mut num_total_groups = self . values . len ( ) ;
131+ for ( row_index, v) in col. iter ( ) . enumerate ( ) {
125132 let group_id = match v {
126133 None => * self . null_group . get_or_insert_with ( || {
127- let group_id = self . values . len ( ) ;
128- self . values . push ( Default :: default ( ) ) ;
134+ let group_id = num_total_groups;
135+ self . append_row_indices . push ( row_index as u32 ) ;
136+ num_total_groups += 1 ;
129137 group_id
130138 } ) ,
131139 Some ( key) => {
@@ -140,16 +148,28 @@ where
140148 match insert {
141149 hashbrown:: hash_table:: Entry :: Occupied ( o) => o. get ( ) . 0 ,
142150 hashbrown:: hash_table:: Entry :: Vacant ( v) => {
143- let g = self . values . len ( ) ;
151+ let g = num_total_groups ;
144152 v. insert ( ( g, key) ) ;
145- self . values . push ( key) ;
153+ self . append_row_indices . push ( row_index as u32 ) ;
154+ num_total_groups += 1 ;
146155 g
147156 }
148157 }
149158 }
150159 } ;
151160 groups. push ( group_id)
152161 }
162+
163+ // If all are new groups, we just extend it
164+ if self . append_row_indices . len ( ) == col. len ( ) {
165+ self . values . extend_from_slice ( col. values ( ) ) ;
166+ } else {
167+ let col_values = col. values ( ) ;
168+ for & row_index in self . append_row_indices . iter ( ) {
169+ self . values . push ( col_values[ row_index as usize ] ) ;
170+ }
171+ }
172+
153173 Ok ( ( ) )
154174 }
155175
0 commit comments