diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 02099f6b8..9a6efafca 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -165,6 +165,7 @@ set(ICEBERG_DATA_SOURCES data/position_delete_writer.cc data/writer.cc deletes/position_delete_index.cc + deletes/position_delete_range_consumer.cc deletes/roaring_position_bitmap.cc puffin/file_metadata.cc puffin/json_serde.cc diff --git a/src/iceberg/data/delete_loader.cc b/src/iceberg/data/delete_loader.cc index 35bc926ba..805142bb5 100644 --- a/src/iceberg/data/delete_loader.cc +++ b/src/iceberg/data/delete_loader.cc @@ -19,14 +19,21 @@ #include "iceberg/data/delete_loader.h" +#include +#include #include #include +#include + +#include "iceberg/arrow/nanoarrow_status_internal.h" #include "iceberg/arrow_c_data_guard_internal.h" #include "iceberg/deletes/position_delete_index.h" +#include "iceberg/deletes/position_delete_range_consumer.h" #include "iceberg/file_reader.h" #include "iceberg/manifest/manifest_entry.h" #include "iceberg/metadata_columns.h" +#include "iceberg/result.h" #include "iceberg/row/arrow_array_wrapper.h" #include "iceberg/schema.h" #include "iceberg/util/macros.h" @@ -57,6 +64,24 @@ Result> OpenDeleteFile(const DataFile& file, return ReaderFactoryRegistry::Open(file.file_format, options); } +/// Raw `int64` values buffer (offset-adjusted). Skips the validity bitmap: +/// `kDeleteFilePos` is required by the V2 spec. +const int64_t* Int64ValuesBuffer(const ArrowArrayView* view) { + return view->buffer_views[1].data.as_int64 + view->offset; +} + +/// String-equals at `row_idx` via nanoarrow's unsafe direct-buffer access. +/// Skips the validity bitmap: `kDeleteFilePath` is required by the V2 spec. +bool StringEquals(const ArrowArrayView* view, int64_t row_idx, + std::string_view target) { + ArrowStringView sv = ArrowArrayViewGetStringUnsafe(view, row_idx); + if (static_cast(sv.size_bytes) != target.size()) { + return false; + } + return target.empty() || + std::memcmp(sv.data, target.data(), target.size()) == 0; +} + } // namespace DeleteLoader::DeleteLoader(std::shared_ptr io) : io_(std::move(io)) {} @@ -71,6 +96,25 @@ Status DeleteLoader::LoadPositionDelete(const DataFile& file, PositionDeleteInde ICEBERG_ASSIGN_OR_RAISE(auto arrow_schema, reader->Schema()); internal::ArrowSchemaGuard schema_guard(&arrow_schema); + // Reused across batches; reads child buffers directly to avoid the + // per-row `Scalar` dispatch in `ArrowArrayStructLike`. + ArrowArrayView array_view; + internal::ArrowArrayViewGuard view_guard(&array_view); + ArrowError error; + ICEBERG_NANOARROW_RETURN_UNEXPECTED_WITH_ERROR( + ArrowArrayViewInitFromSchema(&array_view, &arrow_schema, &error), error); + + // Fast path when the writer's `referenced_data_file` hint matches our + // target: skip the path column, hand `pos_data` straight to + // `ForEachPositionDelete`. Trusts the hint -- spec-compliant writers + // only set it when all rows share one data file. + const bool use_referenced_data_file_fast_path = + file.referenced_data_file.has_value() && + file.referenced_data_file.value() == data_file_path; + + // Filter-path staging buffer; reused across batches via `clear()`. + std::vector positions; + while (true) { ICEBERG_ASSIGN_OR_RAISE(auto batch_opt, reader->Next()); if (!batch_opt.has_value()) break; @@ -78,23 +122,34 @@ Status DeleteLoader::LoadPositionDelete(const DataFile& file, PositionDeleteInde auto& batch = batch_opt.value(); internal::ArrowArrayGuard batch_guard(&batch); - ICEBERG_ASSIGN_OR_RAISE( - auto row, ArrowArrayStructLike::Make(arrow_schema, batch, /*row_index=*/0)); + ICEBERG_NANOARROW_RETURN_UNEXPECTED_WITH_ERROR( + ArrowArrayViewSetArray(&array_view, &batch, &error), error); - for (int64_t i = 0; i < batch.length; ++i) { - if (i > 0) { - ICEBERG_RETURN_UNEXPECTED(row->Reset(i)); - } - // Field 0: file_path - ICEBERG_ASSIGN_OR_RAISE(auto path_scalar, row->GetField(0)); - auto path = std::get(path_scalar); - - if (path == data_file_path) { - // Field 1: pos - ICEBERG_ASSIGN_OR_RAISE(auto pos_scalar, row->GetField(1)); - index.Delete(std::get(pos_scalar)); + const int64_t length = batch.length; + if (length <= 0) { + continue; + } + + // Child indices must match `PosDeleteSchema()`: 0 = file_path, 1 = pos. + const ArrowArrayView* pos_view = array_view.children[1]; + const int64_t* pos_data = Int64ValuesBuffer(pos_view); + + if (use_referenced_data_file_fast_path) { + ForEachPositionDelete(std::span(pos_data, length), index); + continue; + } + + const ArrowArrayView* path_view = array_view.children[0]; + positions.clear(); + if (positions.capacity() < static_cast(length)) { + positions.reserve(static_cast(length)); + } + for (int64_t i = 0; i < length; ++i) { + if (StringEquals(path_view, i, data_file_path)) { + positions.push_back(pos_data[i]); } } + ForEachPositionDelete(positions, index); } return reader->Close(); diff --git a/src/iceberg/deletes/position_delete_index.cc b/src/iceberg/deletes/position_delete_index.cc index 0ff8f8303..afae4fab5 100644 --- a/src/iceberg/deletes/position_delete_index.cc +++ b/src/iceberg/deletes/position_delete_index.cc @@ -39,4 +39,9 @@ void PositionDeleteIndex::Merge(const PositionDeleteIndex& other) { bitmap_.Or(other.bitmap_); } +void PositionDeleteIndex::BulkAddForKey(int32_t key, const uint32_t* positions, + size_t n) { + bitmap_.AddManyForKey(key, positions, n); +} + } // namespace iceberg diff --git a/src/iceberg/deletes/position_delete_index.h b/src/iceberg/deletes/position_delete_index.h index 5de82a591..a30c8e25b 100644 --- a/src/iceberg/deletes/position_delete_index.h +++ b/src/iceberg/deletes/position_delete_index.h @@ -24,17 +24,19 @@ #include #include +#include #include "iceberg/deletes/roaring_position_bitmap.h" #include "iceberg/iceberg_data_export.h" namespace iceberg { -/// \brief Tracks deleted row positions using a bitmap. +/// \brief Tracks deleted row positions for an Iceberg MOR data file. +/// Positions are 0-based row indices. /// -/// This class provides a domain-specific API for position deletes -/// in Iceberg MOR (merge-on-read) tables. Positions are 0-based -/// row indices within a data file. +/// \note Not thread-safe. Callers must externally serialize every access +/// -- including read-only methods -- whenever any thread might mutate +/// the instance. Distinct instances are independent. class ICEBERG_DATA_EXPORT PositionDeleteIndex { public: PositionDeleteIndex() = default; @@ -65,6 +67,14 @@ class ICEBERG_DATA_EXPORT PositionDeleteIndex { void Merge(const PositionDeleteIndex& other); private: + // Bulk-add `n` positions sharing high-32-bit `key`. Private hook for + // `ForEachPositionDelete`'s bulk path; keeps `Delete` the sole public + // mutation surface. + void BulkAddForKey(int32_t key, const uint32_t* positions, size_t n); + + friend void ForEachPositionDelete(std::span positions, + PositionDeleteIndex& target); + RoaringPositionBitmap bitmap_; }; diff --git a/src/iceberg/deletes/position_delete_range_consumer.cc b/src/iceberg/deletes/position_delete_range_consumer.cc new file mode 100644 index 000000000..cdbb48bfe --- /dev/null +++ b/src/iceberg/deletes/position_delete_range_consumer.cc @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/deletes/position_delete_range_consumer.h" + +#include +#include +#include +#include + +#include "iceberg/deletes/position_delete_index.h" +#include "iceberg/deletes/roaring_position_bitmap.h" + +namespace iceberg { + +namespace { + +bool IsValidPosition(int64_t pos) { + return pos >= 0 && pos <= RoaringPositionBitmap::kMaxPosition; +} + +// Unsigned subtraction so negative or wrap-around input can't +// false-positive via signed overflow. +bool IsAdjacent(int64_t prev, int64_t next) { + return (static_cast(next) - static_cast(prev)) == 1; +} + +// `RoaringPositionBitmap` shards positions by their high 32 bits; the +// bulk path groups by this key before flushing via `BulkAddForKey`. +int32_t HighKeyFromPosition(int64_t pos) { + return static_cast(pos >> 32); +} + +// Emit `[range_start, last_position]`, collapsing singletons. Callers +// pre-filter via `IsValidPosition`, so `last_position + 1` cannot overflow. +void EmitRange(PositionDeleteIndex& target, int64_t range_start, + int64_t last_position) { + if (range_start == last_position) { + target.Delete(range_start); + } else { + target.Delete(range_start, last_position + 1); + } +} + +// Emit closed-interval runs; out-of-range positions are silently skipped +// to match `Delete(pos)`. +void CoalesceIntoRanges(std::span positions, + PositionDeleteIndex& target) { + const size_t n = positions.size(); + + size_t i = 0; + while (i < n && !IsValidPosition(positions[i])) { + ++i; + } + if (i == n) { + return; + } + + int64_t range_start = positions[i]; + int64_t last_position = range_start; + ++i; + + for (; i < n; ++i) { + const int64_t pos = positions[i]; + if (!IsValidPosition(pos)) { + continue; + } + if (!IsAdjacent(last_position, pos)) { + EmitRange(target, range_start, last_position); + range_start = pos; + } + last_position = pos; + } + + EmitRange(target, range_start, last_position); +} + +} // namespace + +void ForEachPositionDelete(std::span positions, + PositionDeleteIndex& target) { + if (positions.empty()) { + return; + } + + // Below this size the bulk path's fixed overhead exceeds coalescing + // even on fully scattered input; skip the sniff. + constexpr size_t kMinSniffSize = 64; + if (positions.size() < kMinSniffSize) { + CoalesceIntoRanges(positions, target); + return; + } + + // Estimate boundary density (fraction of adjacent pairs where + // `pos[i] != pos[i-1] + 1`) over a bounded prefix. Misclassification is + // performance-only -- both paths produce identical contents. + constexpr size_t kSniffSize = 1024; + // 10% threshold: boundary-heavy inputs go to bulk addMany; run-heavy + // inputs stay on coalesce where Roaring's addRange collapses runs. + constexpr size_t kBulkThresholdPercent = 10; + + const size_t sniff = std::min(positions.size(), kSniffSize); + size_t boundaries = 0; + for (size_t i = 1; i < sniff; ++i) { + boundaries += static_cast(!IsAdjacent(positions[i - 1], positions[i])); + } + + // boundaries / (sniff - 1) > kBulkThresholdPercent / 100, without FP. + if (boundaries * 100 > (sniff - 1) * kBulkThresholdPercent) { + // Bulk path: group by high-32-bit key, flush each group via CRoaring's + // `addMany` (through `BulkAddForKey`). The thread-local buffer is + // reused across calls; nested invocations on the same thread would + // corrupt it -- see `\warning` on `ForEachPositionDelete`. + thread_local std::vector bulk_key_positions; + const size_t n = positions.size(); + size_t i = 0; + while (i < n) { + while (i < n && !IsValidPosition(positions[i])) { + ++i; + } + if (i == n) { + break; + } + const int32_t key = HighKeyFromPosition(positions[i]); + bulk_key_positions.clear(); + while (i < n && IsValidPosition(positions[i]) && + HighKeyFromPosition(positions[i]) == key) { + bulk_key_positions.push_back( + static_cast(positions[i] & 0xFFFFFFFFu)); + ++i; + } + target.BulkAddForKey(key, bulk_key_positions.data(), + bulk_key_positions.size()); + } + return; + } + + CoalesceIntoRanges(positions, target); +} + +} // namespace iceberg diff --git a/src/iceberg/deletes/position_delete_range_consumer.h b/src/iceberg/deletes/position_delete_range_consumer.h new file mode 100644 index 000000000..463bdaa25 --- /dev/null +++ b/src/iceberg/deletes/position_delete_range_consumer.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/iceberg_data_export.h" + +namespace iceberg { + +class PositionDeleteIndex; + +/// \brief Apply `positions` to `target` as deletes; semantically equivalent +/// to calling `target.Delete(pos)` for each entry. Out-of-range positions +/// are silently ignored. Sorted, mostly-contiguous input is fastest. +/// Mirrors Java's `PositionDeleteRangeConsumer.forEach`. +/// +/// \warning Not safe to call recursively or interleaved on the same thread: +/// the bulk dispatch path uses a thread-local staging buffer that a +/// nested invocation would corrupt. Concurrent calls on different +/// threads are safe with disjoint `target` (see `PositionDeleteIndex`). +void ICEBERG_DATA_EXPORT ForEachPositionDelete(std::span positions, + PositionDeleteIndex& target); + +} // namespace iceberg diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc index 2bf749589..1afe0a1a0 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.cc +++ b/src/iceberg/deletes/roaring_position_bitmap.cc @@ -105,6 +105,12 @@ void RoaringPositionBitmap::Add(int64_t pos) { impl_->bitmaps[key].add(pos32); } +void RoaringPositionBitmap::AddManyForKey(int32_t key, const uint32_t* positions, + size_t n) { + impl_->AllocateBitmapsIfNeeded(key + 1); + impl_->bitmaps[key].addMany(n, positions); +} + void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) { pos_start = std::max(pos_start, int64_t{0}); pos_end = std::min(pos_end, kMaxPosition + 1); diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h index 8d4b3586d..9108a60f0 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.h +++ b/src/iceberg/deletes/roaring_position_bitmap.h @@ -22,6 +22,7 @@ /// \file iceberg/deletes/roaring_position_bitmap.h /// A 64-bit position bitmap using an array of 32-bit Roaring bitmaps. +#include #include #include #include @@ -33,6 +34,8 @@ namespace iceberg { +class PositionDeleteIndex; + /// \brief A bitmap that supports positive 64-bit positions, optimized /// for cases where most positions fit in 32 bits. /// @@ -110,6 +113,12 @@ class ICEBERG_DATA_EXPORT RoaringPositionBitmap { std::unique_ptr impl_; explicit RoaringPositionBitmap(std::unique_ptr impl); + + // Bulk-add positions sharing high-32-bit `key`. Internal hook for + // `PositionDeleteIndex::BulkAddForKey`; per-key grouping is the caller's + // job, keeping this a thin wrapper around CRoaring's `addMany`. + void AddManyForKey(int32_t key, const uint32_t* positions, size_t n); + friend class PositionDeleteIndex; }; } // namespace iceberg diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 41a5c2dd3..9e9de0776 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -146,6 +146,7 @@ iceberg_data_sources = files( 'data/position_delete_writer.cc', 'data/writer.cc', 'deletes/position_delete_index.cc', + 'deletes/position_delete_range_consumer.cc', 'deletes/roaring_position_bitmap.cc', 'puffin/file_metadata.cc', 'puffin/json_serde.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 6b98951ad..7e3805037 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -129,6 +129,7 @@ add_iceberg_test(util_test location_util_test.cc roaring_position_bitmap_test.cc position_delete_index_test.cc + position_delete_range_consumer_test.cc retry_util_test.cc string_util_test.cc struct_like_set_test.cc diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index e168d08bf..7a8ef0261 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -92,6 +92,7 @@ iceberg_tests = { 'lazy_test.cc', 'location_util_test.cc', 'position_delete_index_test.cc', + 'position_delete_range_consumer_test.cc', 'retry_util_test.cc', 'roaring_position_bitmap_test.cc', 'string_util_test.cc', diff --git a/src/iceberg/test/position_delete_range_consumer_test.cc b/src/iceberg/test/position_delete_range_consumer_test.cc new file mode 100644 index 000000000..765e7c87f --- /dev/null +++ b/src/iceberg/test/position_delete_range_consumer_test.cc @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/deletes/position_delete_range_consumer.h" + +#include +#include +#include +#include +#include + +#include + +#include "iceberg/deletes/position_delete_index.h" +#include "iceberg/deletes/roaring_position_bitmap.h" + +namespace iceberg { + +namespace { + +// Reference set: positions a per-pos `Delete(pos)` loop would accept. +std::set ExpectedValidSet(const std::vector& positions) { + std::set expected; + for (int64_t pos : positions) { + if (pos >= 0 && pos <= RoaringPositionBitmap::kMaxPosition) { + expected.insert(pos); + } + } + return expected; +} + +// Strict contents check: cardinality plus per-position membership. +// Weaker checks would miss divergences at the 32-bit key boundary. +void AssertMatchesBaseline(const std::vector& positions) { + PositionDeleteIndex index; + ForEachPositionDelete(std::span(positions), index); + const auto expected = ExpectedValidSet(positions); + ASSERT_EQ(index.Cardinality(), static_cast(expected.size())); + for (int64_t pos : expected) { + ASSERT_TRUE(index.IsDeleted(pos)) << "missing pos=" << pos; + } +} + +} // namespace + +TEST(PositionDeleteRangeConsumerTest, EmptySpan) { AssertMatchesBaseline({}); } + +TEST(PositionDeleteRangeConsumerTest, SinglePosition) { AssertMatchesBaseline({42}); } + +TEST(PositionDeleteRangeConsumerTest, FullyContiguousRunBecomesSingleRange) { + std::vector positions; + for (int64_t i = 100; i < 200; ++i) { + positions.push_back(i); + } + AssertMatchesBaseline(positions); +} + +TEST(PositionDeleteRangeConsumerTest, AlternatingPositionsProduceNoCoalescing) { + std::vector positions; + for (int64_t i = 0; i < 50; ++i) { + positions.push_back(i * 2); + } + AssertMatchesBaseline(positions); +} + +TEST(PositionDeleteRangeConsumerTest, MixedShortAndLongRuns) { + AssertMatchesBaseline({1, 2, 3, 7, 10, 11, 20, 30, 31, 32, 33, 34}); +} + +TEST(PositionDeleteRangeConsumerTest, UnsortedInputStillCorrect) { + AssertMatchesBaseline({10, 5, 11, 12, 4, 13, 100}); +} + +TEST(PositionDeleteRangeConsumerTest, DuplicatesAreIdempotent) { + AssertMatchesBaseline({5, 5, 5, 6, 6, 7}); +} + +TEST(PositionDeleteRangeConsumerTest, InvalidPositionsSilentlySkipped) { + // Invalids at the edges, mid-run, and mixed with valid contiguous runs + // must all be dropped without breaking coalescing around them. We stay + // well below `kMaxPosition` to avoid forcing the bitmap to resize its + // backing vector to ~2^31 empty containers. + AssertMatchesBaseline({std::numeric_limits::min(), -5, -4, 10, 11, + -999, 12, 13, RoaringPositionBitmap::kMaxPosition + 1, + std::numeric_limits::max()}); +} + +TEST(PositionDeleteRangeConsumerTest, ContiguousRunAcrossKeyBoundary) { + // Pins `last_position + 1` and the adjacency check at a non-zero + // high-32 key. The coalesced run must survive the key transition. + constexpr int64_t kBoundary = int64_t{1} << 32; + std::vector positions; + for (int64_t i = kBoundary - 3; i < kBoundary + 3; ++i) { + positions.push_back(i); + } + AssertMatchesBaseline(positions); +} + +TEST(PositionDeleteRangeConsumerTest, DispatcherAgreesAtBothDensities) { + // Above the sniff threshold at densities below and above the 10% + // cutoff. We can't observe the choice directly; agreement with the + // baseline is the contract. + std::vector low_density; + std::vector high_density; + int64_t lo = 0; + int64_t hi = 0; + for (int64_t i = 0; i < 2'048; ++i) { + low_density.push_back(lo); + ++lo; + if ((i + 1) % 20 == 0) { + lo += 5; + } + high_density.push_back(hi); + hi += ((i % 5 == 0) ? 5 : 1); + } + AssertMatchesBaseline(low_density); + AssertMatchesBaseline(high_density); +} + +TEST(PositionDeleteRangeConsumerTest, DispatcherSkipsSniffOnSmallInputs) { + // Below the 64-element threshold the dispatcher bypasses the sniff. + // Exercise both a scattered tiny input (where bulk would win at large + // n) and a contiguous tiny input (the range path always wins). + std::vector scattered; + std::vector contiguous; + for (int64_t i = 0; i < 32; ++i) { + scattered.push_back(i * 100); + contiguous.push_back(i); + } + AssertMatchesBaseline(scattered); + AssertMatchesBaseline(contiguous); +} + +TEST(PositionDeleteRangeConsumerTest, DispatcherAgreesAtThresholdBoundary) { + // The dispatcher selects the bulk path when + // boundaries * 100 > (sniff - 1) * kBulkThresholdPercent + // With `sniff = 1024` and `kBulkThresholdPercent = 10`, the cutoff is + // 102.3 boundaries: 102 stays on coalesce, 103 flips to bulk. Both + // inputs must still produce the same cardinality and membership as + // the per-position baseline; this test guards against arithmetic + // regressions around the threshold constant. + auto build = [](int64_t target_boundaries) { + std::vector positions; + positions.reserve(1024); + int64_t pos = 0; + positions.push_back(pos); + for (int64_t i = 1; i < 1024; ++i) { + pos += (i <= target_boundaries) ? 2 : 1; + positions.push_back(pos); + } + return positions; + }; + AssertMatchesBaseline(build(/*target_boundaries=*/102)); + AssertMatchesBaseline(build(/*target_boundaries=*/103)); +} + +} // namespace iceberg