11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4+ use std:: borrow:: Borrow ;
45use std:: iter:: once;
56use std:: sync:: Arc ;
67
8+ use itertools:: Itertools ;
79use vortex_error:: VortexExpect ;
810use vortex_error:: VortexResult ;
11+ use vortex_error:: vortex_bail;
912use vortex_error:: vortex_err;
1013
1114use crate :: ArrayRef ;
@@ -16,6 +19,7 @@ use crate::array::EmptyArrayData;
1619use crate :: array:: TypedArrayRef ;
1720use crate :: array:: child_to_validity;
1821use crate :: array:: validity_to_child;
22+ use crate :: arrays:: ChunkedArray ;
1923use crate :: arrays:: Struct ;
2024use crate :: dtype:: DType ;
2125use crate :: dtype:: FieldName ;
@@ -430,9 +434,7 @@ impl Array<Struct> {
430434 } ;
431435 Some ( ( new_array, field) )
432436 }
433- }
434437
435- impl Array < Struct > {
436438 pub fn with_column ( & self , name : impl Into < FieldName > , array : ArrayRef ) -> VortexResult < Self > {
437439 let name = name. into ( ) ;
438440 let struct_dtype = self . struct_fields ( ) ;
@@ -453,4 +455,70 @@ impl Array<Struct> {
453455 pub fn remove_column_owned ( & self , name : impl Into < FieldName > ) -> Option < ( Self , ArrayRef ) > {
454456 self . remove_column ( name)
455457 }
458+
459+ pub fn try_concat < T > ( chunks : impl IntoIterator < Item = T > ) -> VortexResult < Self >
460+ where
461+ T : Borrow < Array < Struct > > ,
462+ {
463+ let mut it = chunks. into_iter ( ) ;
464+ let Some ( first) = it. next ( ) else {
465+ vortex_bail ! ( "cannot concat empty iterator of arrays" ) ;
466+ } ;
467+ let first_dtype = first. borrow ( ) . dtype ( ) . clone ( ) ;
468+ let struct_fields = first_dtype. as_struct_fields ( ) . clone ( ) ;
469+ let names = struct_fields. names ( ) ;
470+
471+ let it = [ first] . into_iter ( ) . chain ( it) ;
472+ let ( field_arrays_per_chunk, validities) = it
473+ . map ( |chunk| {
474+ let chunk = chunk. borrow ( ) ;
475+ if & first_dtype != chunk. dtype ( ) {
476+ vortex_bail ! (
477+ "cannot concatenate struct arrays with differing dtypes: {}, {}" ,
478+ first_dtype,
479+ chunk. dtype( ) ,
480+ ) ;
481+ }
482+
483+ let fields = names
484+ . iter ( )
485+ . map ( |name| {
486+ chunk
487+ . unmasked_field_by_name ( name)
488+ . vortex_expect ( "field exists because it is in dtype" )
489+ . clone ( )
490+ } )
491+ . collect :: < Vec < _ > > ( ) ;
492+ let validity = chunk. validity ( ) ?;
493+
494+ Ok ( ( fields, ( validity, chunk. len ( ) ) ) )
495+ } )
496+ . process_results ( |iter| iter. unzip :: < _ , _ , Vec < _ > , Vec < _ > > ( ) ) ?;
497+
498+ let field_arrays = struct_fields
499+ . fields ( )
500+ . enumerate ( )
501+ . map ( |( i, dtype) | {
502+ // SAFETY: We establish above that every array has the same type.
503+ let chunks = field_arrays_per_chunk
504+ . iter ( )
505+ . map ( |x| x[ i] . clone ( ) )
506+ . collect ( ) ;
507+ unsafe { ChunkedArray :: new_unchecked ( chunks, dtype) } . into_array ( )
508+ } )
509+ . collect :: < Vec < _ > > ( ) ;
510+ let len = validities. iter ( ) . map ( |( _v, len) | len) . sum ( ) ;
511+ let validity = Validity :: concat ( validities) . vortex_expect ( "verified non-empty above" ) ;
512+
513+ // SAFETY:
514+ //
515+ // 1. The field arrays, by construction, have the type specified in fields.
516+ //
517+ // 2. Each Array<Struct> has a valid len, therefore the sum of those lens should be valid
518+ // for the concatenation of each field.
519+ //
520+ // 3. Each Array<Struct> has a valid validity, so the concatenation of those validities has
521+ // the correct length and dtype harmony.
522+ Ok ( unsafe { Array :: < Struct > :: new_unchecked ( field_arrays, struct_fields, len, validity) } )
523+ }
456524}
0 commit comments