Skip to content

Commit e1d948f

Browse files
committed
feat(parquet): add all-null fast paths for level building
When an entire list, struct, fixed-size list, or leaf array is null, skip per-row iteration and emit bulk uniform def/rep levels via `extend_uniform_levels` in O(1). Signed-off-by: Hippolyte Barraud <hippolyte.barraud@datadoghq.com>
1 parent 7abb225 commit e1d948f

1 file changed

Lines changed: 148 additions & 0 deletions

File tree

parquet/src/arrow/arrow_writer/levels.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,17 @@ impl LevelInfoBuilder {
311311
nulls: Option<&NullBuffer>,
312312
range: Range<usize>,
313313
) {
314+
// Fast path: entire list array is null; emit bulk null rep/def levels
315+
if let Some(nulls) = nulls {
316+
if nulls.null_count() == nulls.len() {
317+
let count = range.end - range.start;
318+
child.visit_leaves(|leaf| {
319+
leaf.extend_uniform_levels(ctx.def_level - 2, ctx.rep_level - 1, count);
320+
});
321+
return;
322+
}
323+
}
324+
314325
let offsets = &offsets[range.start..range.end + 1];
315326

316327
let write_non_null_slice =
@@ -501,6 +512,19 @@ impl LevelInfoBuilder {
501512
nulls: Option<&NullBuffer>,
502513
range: Range<usize>,
503514
) {
515+
// Fast path: entire struct array is null; emit bulk null def/rep levels
516+
if let Some(nulls) = nulls {
517+
if nulls.null_count() == nulls.len() {
518+
let len = range.end - range.start;
519+
for child in children.iter_mut() {
520+
child.visit_leaves(|info| {
521+
info.extend_uniform_levels(ctx.def_level - 1, ctx.rep_level, len);
522+
});
523+
}
524+
return;
525+
}
526+
}
527+
504528
let write_null = |children: &mut [LevelInfoBuilder], range: Range<usize>| {
505529
for child in children {
506530
child.visit_leaves(|info| {
@@ -560,6 +584,17 @@ impl LevelInfoBuilder {
560584
nulls: Option<&NullBuffer>,
561585
range: Range<usize>,
562586
) {
587+
// Fast path: entire fixed-size list array is null
588+
if let Some(nulls) = nulls {
589+
if nulls.null_count() == nulls.len() {
590+
let count = range.end - range.start;
591+
child.visit_leaves(|leaf| {
592+
leaf.extend_uniform_levels(ctx.def_level - 2, ctx.rep_level - 1, count);
593+
});
594+
return;
595+
}
596+
}
597+
563598
let write_non_null = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
564599
let values_start = start_idx * fixed_size;
565600
let values_end = end_idx * fixed_size;
@@ -638,6 +673,14 @@ impl LevelInfoBuilder {
638673
fn write_leaf(info: &mut ArrayLevels, range: Range<usize>) {
639674
let len = range.end - range.start;
640675

676+
// Fast path: entire leaf array is null
677+
if let Some(nulls) = &info.logical_nulls {
678+
if !matches!(info.def_levels, LevelData::Absent) && nulls.null_count() == nulls.len() {
679+
info.extend_uniform_levels(info.max_def_level - 1, info.max_rep_level, len);
680+
return;
681+
}
682+
}
683+
641684
if matches!(info.def_levels, LevelData::Absent) {
642685
info.non_null_indices.extend(range.clone());
643686
} else {
@@ -972,6 +1015,12 @@ impl ArrayLevels {
9721015
}
9731016
}
9741017

1018+
/// Bulk-emit `count` uniform def/rep levels.
1019+
fn extend_uniform_levels(&mut self, def_val: i16, rep_val: i16, count: usize) {
1020+
self.def_levels.append_run(def_val, count);
1021+
self.rep_levels.append_run(rep_val, count);
1022+
}
1023+
9751024
fn append_def_level_run(&mut self, value: i16, count: usize) {
9761025
self.def_levels.append_run(value, count);
9771026
}
@@ -2442,4 +2491,103 @@ mod tests {
24422491
assert_eq!(sliced.non_null_indices, Vec::<usize>::new());
24432492
assert_eq!(sliced.array.len(), 0);
24442493
}
2494+
2495+
#[test]
2496+
fn test_all_null_struct() {
2497+
// Struct<Int32> where every struct slot is null.
2498+
// Schema: a (struct, nullable) -> c (int32, nullable)
2499+
// Data: [null, null, null, null]
2500+
//
2501+
// Expected: max_def=2, def_levels all 0 (struct is null → child never reached),
2502+
// leaf values are empty.
2503+
let c = Int32Array::from(vec![None::<i32>; 4]);
2504+
let leaf = Arc::new(c) as ArrayRef;
2505+
let c_field = Arc::new(Field::new("c", DataType::Int32, true));
2506+
let a = StructArray::from((vec![(c_field, leaf.clone())], Buffer::from([0b00000000])));
2507+
let a_field = Field::new("a", a.data_type().clone(), true);
2508+
let a_array = Arc::new(a) as ArrayRef;
2509+
2510+
let levels = calculate_array_levels(&a_array, &a_field).unwrap();
2511+
assert_eq!(levels.len(), 1);
2512+
2513+
let expected = ArrayLevels {
2514+
def_levels: LevelData::Uniform { value: 0, count: 4 },
2515+
rep_levels: LevelData::Absent,
2516+
non_null_indices: vec![],
2517+
max_def_level: 2,
2518+
max_rep_level: 0,
2519+
array: leaf,
2520+
logical_nulls: Some(NullBuffer::new_null(4)),
2521+
};
2522+
assert_eq!(&levels[0], &expected);
2523+
}
2524+
2525+
#[test]
2526+
fn test_all_null_nested_struct() {
2527+
// Struct<Struct<Int32>> where the outer struct is entirely null.
2528+
// Schema: a (struct, nullable) -> b (struct, nullable) -> c (int32, nullable)
2529+
// Data: [null, null, null]
2530+
//
2531+
// Expected: max_def=3, def_levels all 0.
2532+
let c = Int32Array::from(vec![None::<i32>; 3]);
2533+
let leaf = Arc::new(c) as ArrayRef;
2534+
let c_field = Arc::new(Field::new("c", DataType::Int32, true));
2535+
let b = StructArray::from((vec![(c_field, leaf.clone())], Buffer::from([0b00000000])));
2536+
let b_field = Arc::new(Field::new("b", b.data_type().clone(), true));
2537+
let a = StructArray::from((
2538+
vec![(b_field, Arc::new(b) as ArrayRef)],
2539+
Buffer::from([0b00000000]),
2540+
));
2541+
let a_field = Field::new("a", a.data_type().clone(), true);
2542+
let a_array = Arc::new(a) as ArrayRef;
2543+
2544+
let levels = calculate_array_levels(&a_array, &a_field).unwrap();
2545+
assert_eq!(levels.len(), 1);
2546+
2547+
let expected = ArrayLevels {
2548+
def_levels: LevelData::Uniform { value: 0, count: 3 },
2549+
rep_levels: LevelData::Absent,
2550+
non_null_indices: vec![],
2551+
max_def_level: 3,
2552+
max_rep_level: 0,
2553+
array: leaf,
2554+
logical_nulls: Some(NullBuffer::new_null(3)),
2555+
};
2556+
assert_eq!(&levels[0], &expected);
2557+
}
2558+
2559+
#[test]
2560+
fn test_all_null_struct_multiple_children() {
2561+
// Struct with two leaf children, entirely null.
2562+
// Schema: a (struct, nullable) -> { c1 (int32, nullable), c2 (int32, nullable) }
2563+
// Data: [null, null]
2564+
//
2565+
// Both leaf columns should get uniform def_levels=0.
2566+
let c1 = Arc::new(Int32Array::from(vec![None::<i32>; 2])) as ArrayRef;
2567+
let c2 = Arc::new(Int32Array::from(vec![None::<i32>; 2])) as ArrayRef;
2568+
let c1_field = Arc::new(Field::new("c1", DataType::Int32, true));
2569+
let c2_field = Arc::new(Field::new("c2", DataType::Int32, true));
2570+
let a = StructArray::from((
2571+
vec![(c1_field, c1.clone()), (c2_field, c2.clone())],
2572+
Buffer::from([0b00000000]),
2573+
));
2574+
let a_field = Field::new("a", a.data_type().clone(), true);
2575+
let a_array = Arc::new(a) as ArrayRef;
2576+
2577+
let levels = calculate_array_levels(&a_array, &a_field).unwrap();
2578+
assert_eq!(levels.len(), 2);
2579+
2580+
for (i, leaf) in [c1, c2].into_iter().enumerate() {
2581+
let expected = ArrayLevels {
2582+
def_levels: LevelData::Uniform { value: 0, count: 2 },
2583+
rep_levels: LevelData::Absent,
2584+
non_null_indices: vec![],
2585+
max_def_level: 2,
2586+
max_rep_level: 0,
2587+
array: leaf,
2588+
logical_nulls: Some(NullBuffer::new_null(2)),
2589+
};
2590+
assert_eq!(&levels[i], &expected, "leaf {i} mismatch");
2591+
}
2592+
}
24452593
}

0 commit comments

Comments
 (0)