From 63f269c9a97eacd8d512ebdfdf5880253d04db51 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 2 May 2026 12:41:51 +0000 Subject: [PATCH 1/6] Iteration 302: add squeeze, hash_pandas_object, infer_objects benchmarks Run: https://github.com/githubnext/tsessebe/actions/runs/25251927678 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benchmarks/pandas/bench_hash_pandas_object.py | 30 ++++++++++++++++++ benchmarks/pandas/bench_infer_objects.py | 29 +++++++++++++++++ benchmarks/pandas/bench_squeeze.py | 31 +++++++++++++++++++ benchmarks/tsb/bench_hash_pandas_object.ts | 31 +++++++++++++++++++ benchmarks/tsb/bench_infer_objects.ts | 30 ++++++++++++++++++ benchmarks/tsb/bench_squeeze.ts | 30 ++++++++++++++++++ 6 files changed, 181 insertions(+) create mode 100644 benchmarks/pandas/bench_hash_pandas_object.py create mode 100644 benchmarks/pandas/bench_infer_objects.py create mode 100644 benchmarks/pandas/bench_squeeze.py create mode 100644 benchmarks/tsb/bench_hash_pandas_object.ts create mode 100644 benchmarks/tsb/bench_infer_objects.ts create mode 100644 benchmarks/tsb/bench_squeeze.ts diff --git a/benchmarks/pandas/bench_hash_pandas_object.py b/benchmarks/pandas/bench_hash_pandas_object.py new file mode 100644 index 00000000..beb286a2 --- /dev/null +++ b/benchmarks/pandas/bench_hash_pandas_object.py @@ -0,0 +1,30 @@ +import pandas as pd +import numpy as np +import json +import time + +N = 10_000 +nums = list(range(N)) +strs = [f"label_{i}" for i in range(N)] + +num_series = pd.Series(nums, dtype=float) +df = pd.DataFrame({"a": nums, "b": strs}) + +# Warm-up +for _ in range(10): + pd.util.hash_pandas_object(num_series) + pd.util.hash_pandas_object(df) + +iterations = 50 +start = time.perf_counter() +for _ in range(iterations): + pd.util.hash_pandas_object(num_series) + pd.util.hash_pandas_object(df) +total_ms = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "hash_pandas_object", + "mean_ms": total_ms / iterations, + "iterations": iterations, + "total_ms": total_ms, +})) diff --git a/benchmarks/pandas/bench_infer_objects.py b/benchmarks/pandas/bench_infer_objects.py new file mode 100644 index 00000000..6557d275 --- /dev/null +++ b/benchmarks/pandas/bench_infer_objects.py @@ -0,0 +1,29 @@ +import pandas as pd +import numpy as np +import json +import time + +N = 100_000 +object_data = [None if i % 10 == 0 else i for i in range(N)] + +obj_series = pd.Series(object_data, dtype=object) +obj_df = pd.DataFrame({"a": object_data, "b": [None if v is None else v * 2 for v in object_data]}) + +# Warm-up +for _ in range(10): + obj_series.infer_objects() + obj_df.infer_objects() + +iterations = 100 +start = time.perf_counter() +for _ in range(iterations): + obj_series.infer_objects() + obj_df.infer_objects() +total_ms = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "infer_objects", + "mean_ms": total_ms / iterations, + "iterations": iterations, + "total_ms": total_ms, +})) diff --git a/benchmarks/pandas/bench_squeeze.py b/benchmarks/pandas/bench_squeeze.py new file mode 100644 index 00000000..1f0c7c3a --- /dev/null +++ b/benchmarks/pandas/bench_squeeze.py @@ -0,0 +1,31 @@ +import pandas as pd +import numpy as np +import json +import time + +N = 100_000 +data = list(range(N)) + +# For Series.squeeze: multi-element returns self unchanged +big_series = pd.Series(data, dtype=float) +# For DataFrame.squeeze(axis=1): single-column DataFrame +single_col_df = pd.DataFrame({"a": data}) + +# Warm-up +for _ in range(20): + big_series.squeeze() + single_col_df.squeeze(axis=1) + +iterations = 500 +start = time.perf_counter() +for _ in range(iterations): + big_series.squeeze() + single_col_df.squeeze(axis=1) +total_ms = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "squeeze", + "mean_ms": total_ms / iterations, + "iterations": iterations, + "total_ms": total_ms, +})) diff --git a/benchmarks/tsb/bench_hash_pandas_object.ts b/benchmarks/tsb/bench_hash_pandas_object.ts new file mode 100644 index 00000000..d7f41480 --- /dev/null +++ b/benchmarks/tsb/bench_hash_pandas_object.ts @@ -0,0 +1,31 @@ +import { Series, DataFrame, hashPandasObject } from "../../src/index.ts"; + +const N = 10_000; +const nums = Float64Array.from({ length: N }, (_, i) => i); +const strs: string[] = Array.from({ length: N }, (_, i) => `label_${i}`); + +const numSeries = new Series({ data: nums }); +const df = DataFrame.fromColumns({ a: nums, b: strs }); + +// Warm-up +for (let i = 0; i < 10; i++) { + hashPandasObject(numSeries); + hashPandasObject(df); +} + +const iterations = 50; +const start = performance.now(); +for (let i = 0; i < iterations; i++) { + hashPandasObject(numSeries); + hashPandasObject(df); +} +const total_ms = performance.now() - start; + +console.log( + JSON.stringify({ + function: "hash_pandas_object", + mean_ms: total_ms / iterations, + iterations, + total_ms, + }), +); diff --git a/benchmarks/tsb/bench_infer_objects.ts b/benchmarks/tsb/bench_infer_objects.ts new file mode 100644 index 00000000..669d2467 --- /dev/null +++ b/benchmarks/tsb/bench_infer_objects.ts @@ -0,0 +1,30 @@ +import { Series, DataFrame, inferObjectsSeries, inferObjectsDataFrame } from "../../src/index.ts"; + +const N = 100_000; +// Object-dtype series with mixed integer values (infer_objects will infer int dtype) +const objectData: (number | null)[] = Array.from({ length: N }, (_, i) => (i % 10 === 0 ? null : i)); +const objSeries = new Series({ data: objectData }); +const objDf = DataFrame.fromColumns({ a: objectData, b: objectData.map((v) => (v !== null ? v * 2 : null)) }); + +// Warm-up +for (let i = 0; i < 10; i++) { + inferObjectsSeries(objSeries, { objectOnly: false }); + inferObjectsDataFrame(objDf, { objectOnly: false }); +} + +const iterations = 100; +const start = performance.now(); +for (let i = 0; i < iterations; i++) { + inferObjectsSeries(objSeries, { objectOnly: false }); + inferObjectsDataFrame(objDf, { objectOnly: false }); +} +const total_ms = performance.now() - start; + +console.log( + JSON.stringify({ + function: "infer_objects", + mean_ms: total_ms / iterations, + iterations, + total_ms, + }), +); diff --git a/benchmarks/tsb/bench_squeeze.ts b/benchmarks/tsb/bench_squeeze.ts new file mode 100644 index 00000000..d5061402 --- /dev/null +++ b/benchmarks/tsb/bench_squeeze.ts @@ -0,0 +1,30 @@ +import { Series, DataFrame, squeezeSeries, squeezeDataFrame } from "../../src/index.ts"; + +const N = 100_000; +// For squeezeSeries: a multi-element Series (returns self unchanged) +const bigSeries = new Series({ data: Float64Array.from({ length: N }, (_, i) => i) }); +// For squeezeDataFrame: a single-column DataFrame (axis=1 squeezes to Series) +const singleColDf = DataFrame.fromColumns({ a: Float64Array.from({ length: N }, (_, i) => i) }); + +// Warm-up +for (let i = 0; i < 20; i++) { + squeezeSeries(bigSeries); + squeezeDataFrame(singleColDf, 1); +} + +const iterations = 500; +const start = performance.now(); +for (let i = 0; i < iterations; i++) { + squeezeSeries(bigSeries); + squeezeDataFrame(singleColDf, 1); +} +const total_ms = performance.now() - start; + +console.log( + JSON.stringify({ + function: "squeeze", + mean_ms: total_ms / iterations, + iterations, + total_ms, + }), +); From 938756fadb8fe7dd38171e5f3f816554662760a7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 2 May 2026 12:41:54 +0000 Subject: [PATCH 2/6] ci: trigger checks From 2411b05eb45e5b8daafaea57de52212a4eb10403 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 3 May 2026 07:15:38 +0000 Subject: [PATCH 3/6] Iteration 303: Add benchmarks for keepTrue/keepFalse, mergeOrdered, and Styler Run: https://github.com/githubnext/tsessebe/actions/runs/25272621402 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benchmarks/pandas/bench_keep_true_false.py | 31 +++++++++++++++++++ benchmarks/pandas/bench_merge_ordered.py | 32 +++++++++++++++++++ benchmarks/pandas/bench_styler.py | 34 ++++++++++++++++++++ benchmarks/tsb/bench_keep_true_false.ts | 33 ++++++++++++++++++++ benchmarks/tsb/bench_merge_ordered.ts | 36 ++++++++++++++++++++++ benchmarks/tsb/bench_styler.ts | 32 +++++++++++++++++++ 6 files changed, 198 insertions(+) create mode 100644 benchmarks/pandas/bench_keep_true_false.py create mode 100644 benchmarks/pandas/bench_merge_ordered.py create mode 100644 benchmarks/pandas/bench_styler.py create mode 100644 benchmarks/tsb/bench_keep_true_false.ts create mode 100644 benchmarks/tsb/bench_merge_ordered.ts create mode 100644 benchmarks/tsb/bench_styler.ts diff --git a/benchmarks/pandas/bench_keep_true_false.py b/benchmarks/pandas/bench_keep_true_false.py new file mode 100644 index 00000000..c5b9dee7 --- /dev/null +++ b/benchmarks/pandas/bench_keep_true_false.py @@ -0,0 +1,31 @@ +"""Benchmark: keepTrue / keepFalse equivalent — boolean mask filtering on a 100k-element Series""" +import json +import time +import pandas as pd +import numpy as np + +N = 100_000 +WARMUP = 2 +ITERATIONS = 5 + +data = list(range(N)) +mask = [i % 2 == 0 for i in range(N)] +s = pd.Series(data, dtype=float) +bool_mask = pd.array(mask, dtype=bool) + +for _ in range(WARMUP): + s[bool_mask] + s[~bool_mask] + +start = time.perf_counter() +for _ in range(ITERATIONS): + s[bool_mask] + s[~bool_mask] +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "keep_true_false", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_merge_ordered.py b/benchmarks/pandas/bench_merge_ordered.py new file mode 100644 index 00000000..34ee8f19 --- /dev/null +++ b/benchmarks/pandas/bench_merge_ordered.py @@ -0,0 +1,32 @@ +"""Benchmark: merge_ordered — ordered merge of two 10k-row DataFrames on a key column""" +import json +import time +import pandas as pd +import numpy as np + +N = 10_000 +WARMUP = 2 +ITERATIONS = 5 + +keys1 = list(range(0, N * 2, 2)) +vals1 = [i * 1.0 for i in range(N)] +keys2 = list(range(0, N * 3, 3)) +vals2 = [i * 2.0 for i in range(N)] + +df1 = pd.DataFrame({"key": keys1, "val1": vals1}) +df2 = pd.DataFrame({"key": keys2, "val2": vals2}) + +for _ in range(WARMUP): + pd.merge_ordered(df1, df2, on="key") + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.merge_ordered(df1, df2, on="key") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "merge_ordered", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_styler.py b/benchmarks/pandas/bench_styler.py new file mode 100644 index 00000000..e8ae731b --- /dev/null +++ b/benchmarks/pandas/bench_styler.py @@ -0,0 +1,34 @@ +"""Benchmark: Styler — highlight max/min and background gradient on a 1000-row DataFrame""" +import json +import time +import math +import pandas as pd +import numpy as np + +N = 1_000 +WARMUP = 2 +ITERATIONS = 5 + +a = [i * 1.0 for i in range(N)] +b = [(N - i) * 2.0 for i in range(N)] +c = [math.sin(i / 100) * 100 for i in range(N)] +df = pd.DataFrame({"a": a, "b": b, "c": c}) + +def run_styler(): + styler = df.style.highlight_max().highlight_min().background_gradient() + styler.to_html() # force rendering + +for _ in range(WARMUP): + run_styler() + +start = time.perf_counter() +for _ in range(ITERATIONS): + run_styler() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "styler", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/tsb/bench_keep_true_false.ts b/benchmarks/tsb/bench_keep_true_false.ts new file mode 100644 index 00000000..02cb2ce9 --- /dev/null +++ b/benchmarks/tsb/bench_keep_true_false.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: keepTrue / keepFalse — boolean mask filtering on a 100k-element Series + */ +import { Series, keepTrue, keepFalse } from "../../src/index.js"; + +const N = 100_000; +const WARMUP = 2; +const ITERATIONS = 5; + +const data = Array.from({ length: N }, (_, i) => i * 1.0); +const mask = Array.from({ length: N }, (_, i) => i % 2 === 0); +const s = new Series({ data }); + +for (let i = 0; i < WARMUP; i++) { + keepTrue(s, mask); + keepFalse(s, mask); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + keepTrue(s, mask); + keepFalse(s, mask); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "keep_true_false", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_merge_ordered.ts b/benchmarks/tsb/bench_merge_ordered.ts new file mode 100644 index 00000000..45ed012f --- /dev/null +++ b/benchmarks/tsb/bench_merge_ordered.ts @@ -0,0 +1,36 @@ +/** + * Benchmark: mergeOrdered — ordered merge of two 10k-row DataFrames on a key column + */ +import { DataFrame, mergeOrdered } from "../../src/index.js"; + +const N = 10_000; +const WARMUP = 2; +const ITERATIONS = 5; + +// Two sorted DataFrames sharing some keys +const keys1 = Array.from({ length: N }, (_, i) => i * 2); +const vals1 = Array.from({ length: N }, (_, i) => i * 1.0); +const keys2 = Array.from({ length: N }, (_, i) => i * 3); +const vals2 = Array.from({ length: N }, (_, i) => i * 2.0); + +const df1 = DataFrame.fromColumns({ key: keys1, val1: vals1 }); +const df2 = DataFrame.fromColumns({ key: keys2, val2: vals2 }); + +for (let i = 0; i < WARMUP; i++) { + mergeOrdered(df1, df2, { on: "key" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + mergeOrdered(df1, df2, { on: "key" }); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "merge_ordered", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_styler.ts b/benchmarks/tsb/bench_styler.ts new file mode 100644 index 00000000..dae1fca3 --- /dev/null +++ b/benchmarks/tsb/bench_styler.ts @@ -0,0 +1,32 @@ +/** + * Benchmark: Styler — highlight max/min and background gradient on a 1000-row DataFrame + */ +import { DataFrame, dataFrameStyle } from "../../src/index.js"; + +const N = 1_000; +const WARMUP = 2; +const ITERATIONS = 5; + +const a = Array.from({ length: N }, (_, i) => i * 1.0); +const b = Array.from({ length: N }, (_, i) => (N - i) * 2.0); +const c = Array.from({ length: N }, (_, i) => Math.sin(i / 100) * 100); +const df = DataFrame.fromColumns({ a, b, c }); + +for (let i = 0; i < WARMUP; i++) { + dataFrameStyle(df).highlightMax().highlightMin().backgroundGradient().exportStyles(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + dataFrameStyle(df).highlightMax().highlightMin().backgroundGradient().exportStyles(); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "styler", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); From a097726e2c85e0c5f8d0facd4bcc2e0f640c9c66 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 3 May 2026 07:15:40 +0000 Subject: [PATCH 4/6] ci: trigger checks From 3f0e36f69047a9373b6f55f82fc5cfc2c834b102 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 4 May 2026 01:21:37 +0000 Subject: [PATCH 5/6] Iteration 304: Add corrwith, dot_matmul, eval_query benchmarks Run: https://github.com/githubnext/tsessebe/actions/runs/25296135822 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benchmarks/pandas/bench_corrwith.py | 30 ++++++++++++++++ benchmarks/pandas/bench_dot_matmul.py | 39 +++++++++++++++++++++ benchmarks/pandas/bench_eval_query.py | 31 +++++++++++++++++ benchmarks/tsb/bench_corrwith.ts | 38 +++++++++++++++++++++ benchmarks/tsb/bench_dot_matmul.ts | 49 +++++++++++++++++++++++++++ benchmarks/tsb/bench_eval_query.ts | 35 +++++++++++++++++++ 6 files changed, 222 insertions(+) create mode 100644 benchmarks/pandas/bench_corrwith.py create mode 100644 benchmarks/pandas/bench_dot_matmul.py create mode 100644 benchmarks/pandas/bench_eval_query.py create mode 100644 benchmarks/tsb/bench_corrwith.ts create mode 100644 benchmarks/tsb/bench_dot_matmul.ts create mode 100644 benchmarks/tsb/bench_eval_query.ts diff --git a/benchmarks/pandas/bench_corrwith.py b/benchmarks/pandas/bench_corrwith.py new file mode 100644 index 00000000..b3c4523b --- /dev/null +++ b/benchmarks/pandas/bench_corrwith.py @@ -0,0 +1,30 @@ +"""Benchmark: autoCorr and corrWith on 10k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 10_000 +WARMUP = 3 +ITERATIONS = 10 + +rng = np.random.default_rng(42) +data = np.sin(np.arange(ROWS) * 0.05) * 50 + rng.random(ROWS) * 10 +s = pd.Series(data) +s2 = pd.Series(np.cos(np.arange(ROWS) * 0.05) * 30 + rng.random(ROWS) * 5) + +for _ in range(WARMUP): + s.autocorr(lag=1) + s.corr(s2) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.autocorr(lag=1) + s.corr(s2) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "corrwith", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_dot_matmul.py b/benchmarks/pandas/bench_dot_matmul.py new file mode 100644 index 00000000..523c1631 --- /dev/null +++ b/benchmarks/pandas/bench_dot_matmul.py @@ -0,0 +1,39 @@ +"""Benchmark: Series.dot and DataFrame.dot""" +import json, time +import numpy as np +import pandas as pd + +N = 1_000 +K = 10 +WARMUP = 3 +ITERATIONS = 10 + +a = np.arange(N) * 0.1 +b = (N - np.arange(N)) * 0.2 +sa = pd.Series(a) +sb = pd.Series(b) + +# dfA: N rows × K columns (colnames 0..K-1) +# dfB: K rows (index 0..K-1) × K columns +colsA = {str(c): (np.arange(N) + c) * 0.01 for c in range(K)} +dfA = pd.DataFrame(colsA) + +colsB = {str(c): [(i * K + c) * 0.1 for i in range(K)] for c in range(K)} +dfB = pd.DataFrame(colsB) + +for _ in range(WARMUP): + sa.dot(sb) + dfA.dot(dfB) + +start = time.perf_counter() +for _ in range(ITERATIONS): + sa.dot(sb) + dfA.dot(dfB) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "dot_matmul", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_eval_query.py b/benchmarks/pandas/bench_eval_query.py new file mode 100644 index 00000000..d4ac845c --- /dev/null +++ b/benchmarks/pandas/bench_eval_query.py @@ -0,0 +1,31 @@ +"""Benchmark: DataFrame.query and DataFrame.eval on a 100k-row DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +df = pd.DataFrame({ + "a": np.arange(ROWS) * 0.5, + "b": (ROWS - np.arange(ROWS)) * 0.3, + "c": (np.arange(ROWS) % 100) * 1.0, +}) + +for _ in range(WARMUP): + df.query("a > 10000 and b < 20000") + df.eval("a + b * 2") + +start = time.perf_counter() +for _ in range(ITERATIONS): + df.query("a > 10000 and b < 20000") + df.eval("a + b * 2") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "eval_query", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/tsb/bench_corrwith.ts b/benchmarks/tsb/bench_corrwith.ts new file mode 100644 index 00000000..6ef2fb0b --- /dev/null +++ b/benchmarks/tsb/bench_corrwith.ts @@ -0,0 +1,38 @@ +/** + * Benchmark: autoCorr on a 10k-element Series and corrWith on a DataFrame + */ +import { Series, DataFrame, autoCorr, corrWith } from "../../src/index.js"; + +const ROWS = 10_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05) * 50 + (i % 7) * 2.0); +const s = new Series(data); +const df = DataFrame.fromColumns({ + a: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.03) * 40), + b: Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.07) * 20), + c: Float64Array.from({ length: ROWS }, (_, i) => (i % 5) * 3.0), +}); +const other = new Series(Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.04) * 35)); + +for (let i = 0; i < WARMUP; i++) { + autoCorr(s, 1); + corrWith(df, other); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + autoCorr(s, 1); + corrWith(df, other); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "corrwith", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_dot_matmul.ts b/benchmarks/tsb/bench_dot_matmul.ts new file mode 100644 index 00000000..e106e109 --- /dev/null +++ b/benchmarks/tsb/bench_dot_matmul.ts @@ -0,0 +1,49 @@ +/** + * Benchmark: seriesDotSeries and dataFrameDotDataFrame + */ +import { Series, DataFrame, seriesDotSeries, dataFrameDotDataFrame } from "../../src/index.js"; + +const N = 1_000; +const K = 10; +const WARMUP = 3; +const ITERATIONS = 10; + +const a = Float64Array.from({ length: N }, (_, i) => i * 0.1); +const b = Float64Array.from({ length: N }, (_, i) => (N - i) * 0.2); +const sa = new Series(a); +const sb = new Series(b); + +// dfA: N rows × K columns (colnames 0..K-1) +// dfB: K rows (index 0..K-1) × K columns — so left.columns aligns with right.index +const colsA: Record = {}; +for (let c = 0; c < K; c++) { + colsA[String(c)] = Float64Array.from({ length: N }, (_, i) => (i + c) * 0.01); +} +const dfA = DataFrame.fromColumns(colsA); + +const colsB: Record = {}; +for (let c = 0; c < K; c++) { + colsB[String(c)] = Array.from({ length: K }, (_, i) => (i * K + c) * 0.1); +} +const dfB = DataFrame.fromColumns(colsB); + +for (let i = 0; i < WARMUP; i++) { + seriesDotSeries(sa, sb); + dataFrameDotDataFrame(dfA, dfB); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + seriesDotSeries(sa, sb); + dataFrameDotDataFrame(dfA, dfB); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "dot_matmul", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_eval_query.ts b/benchmarks/tsb/bench_eval_query.ts new file mode 100644 index 00000000..7b8288ee --- /dev/null +++ b/benchmarks/tsb/bench_eval_query.ts @@ -0,0 +1,35 @@ +/** + * Benchmark: queryDataFrame and evalDataFrame on a 100k-row DataFrame + */ +import { DataFrame, queryDataFrame, evalDataFrame } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const df = DataFrame.fromColumns({ + a: Float64Array.from({ length: ROWS }, (_, i) => i * 0.5), + b: Float64Array.from({ length: ROWS }, (_, i) => (ROWS - i) * 0.3), + c: Float64Array.from({ length: ROWS }, (_, i) => (i % 100) * 1.0), +}); + +for (let i = 0; i < WARMUP; i++) { + queryDataFrame(df, "a > 10000 and b < 20000"); + evalDataFrame(df, "a + b * 2"); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + queryDataFrame(df, "a > 10000 and b < 20000"); + evalDataFrame(df, "a + b * 2"); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "eval_query", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); From 8fb468c809e753817519187f332a9e335f97d615 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 01:21:38 +0000 Subject: [PATCH 6/6] ci: trigger checks