githubnext · mrjf · May 4, 2026 · May 2, 2026 · May 2, 2026 · May 3, 2026
diff --git a/benchmarks/pandas/bench_corrwith.py b/benchmarks/pandas/bench_corrwith.py
@@ -0,0 +1,30 @@
+"""Benchmark: autoCorr and corrWith on 10k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+rng = np.random.default_rng(42)
+data = np.sin(np.arange(ROWS) * 0.05) * 50 + rng.random(ROWS) * 10
+s = pd.Series(data)
+s2 = pd.Series(np.cos(np.arange(ROWS) * 0.05) * 30 + rng.random(ROWS) * 5)
+
+for _ in range(WARMUP):
+    s.autocorr(lag=1)
+    s.corr(s2)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.autocorr(lag=1)
+    s.corr(s2)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "corrwith",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dot_matmul.py b/benchmarks/pandas/bench_dot_matmul.py
@@ -0,0 +1,39 @@
+"""Benchmark: Series.dot and DataFrame.dot"""
+import json, time
+import numpy as np
+import pandas as pd
+
+N = 1_000
+K = 10
+WARMUP = 3
+ITERATIONS = 10
+
+a = np.arange(N) * 0.1
+b = (N - np.arange(N)) * 0.2
+sa = pd.Series(a)
+sb = pd.Series(b)
+
+# dfA: N rows × K columns (colnames 0..K-1)
+# dfB: K rows (index 0..K-1) × K columns
+colsA = {str(c): (np.arange(N) + c) * 0.01 for c in range(K)}
+dfA = pd.DataFrame(colsA)
+
+colsB = {str(c): [(i * K + c) * 0.1 for i in range(K)] for c in range(K)}
+dfB = pd.DataFrame(colsB)
+
+for _ in range(WARMUP):
+    sa.dot(sb)
+    dfA.dot(dfB)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    sa.dot(sb)
+    dfA.dot(dfB)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dot_matmul",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_eval_query.py b/benchmarks/pandas/bench_eval_query.py
@@ -0,0 +1,31 @@
+"""Benchmark: DataFrame.query and DataFrame.eval on a 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+df = pd.DataFrame({
+    "a": np.arange(ROWS) * 0.5,
+    "b": (ROWS - np.arange(ROWS)) * 0.3,
+    "c": (np.arange(ROWS) % 100) * 1.0,
+})
+
+for _ in range(WARMUP):
+    df.query("a > 10000 and b < 20000")
+    df.eval("a + b * 2")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.query("a > 10000 and b < 20000")
+    df.eval("a + b * 2")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "eval_query",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_hash_pandas_object.py b/benchmarks/pandas/bench_hash_pandas_object.py
@@ -0,0 +1,30 @@
+import pandas as pd
+import numpy as np
+import json
+import time
+
+N = 10_000
+nums = list(range(N))
+strs = [f"label_{i}" for i in range(N)]
+
+num_series = pd.Series(nums, dtype=float)
+df = pd.DataFrame({"a": nums, "b": strs})
+
+# Warm-up
+for _ in range(10):
+    pd.util.hash_pandas_object(num_series)
+    pd.util.hash_pandas_object(df)
+
+iterations = 50
+start = time.perf_counter()
+for _ in range(iterations):
+    pd.util.hash_pandas_object(num_series)
+    pd.util.hash_pandas_object(df)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "hash_pandas_object",
+    "mean_ms": total_ms / iterations,
+    "iterations": iterations,
+    "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_infer_objects.py b/benchmarks/pandas/bench_infer_objects.py
@@ -0,0 +1,29 @@
+import pandas as pd
+import numpy as np
+import json
+import time
+
+N = 100_000
+object_data = [None if i % 10 == 0 else i for i in range(N)]
+
+obj_series = pd.Series(object_data, dtype=object)
+obj_df = pd.DataFrame({"a": object_data, "b": [None if v is None else v * 2 for v in object_data]})
+
+# Warm-up
+for _ in range(10):
+    obj_series.infer_objects()
+    obj_df.infer_objects()
+
+iterations = 100
+start = time.perf_counter()
+for _ in range(iterations):
+    obj_series.infer_objects()
+    obj_df.infer_objects()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "infer_objects",
+    "mean_ms": total_ms / iterations,
+    "iterations": iterations,
+    "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_keep_true_false.py b/benchmarks/pandas/bench_keep_true_false.py
@@ -0,0 +1,31 @@
+"""Benchmark: keepTrue / keepFalse equivalent — boolean mask filtering on a 100k-element Series"""
+import json
+import time
+import pandas as pd
+import numpy as np
+
+N = 100_000
+WARMUP = 2
+ITERATIONS = 5
+
+data = list(range(N))
+mask = [i % 2 == 0 for i in range(N)]
+s = pd.Series(data, dtype=float)
+bool_mask = pd.array(mask, dtype=bool)
+
+for _ in range(WARMUP):
+    s[bool_mask]
+    s[~bool_mask]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s[bool_mask]
+    s[~bool_mask]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "keep_true_false",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge_ordered.py b/benchmarks/pandas/bench_merge_ordered.py
@@ -0,0 +1,32 @@
+"""Benchmark: merge_ordered — ordered merge of two 10k-row DataFrames on a key column"""
+import json
+import time
+import pandas as pd
+import numpy as np
+
+N = 10_000
+WARMUP = 2
+ITERATIONS = 5
+
+keys1 = list(range(0, N * 2, 2))
+vals1 = [i * 1.0 for i in range(N)]
+keys2 = list(range(0, N * 3, 3))
+vals2 = [i * 2.0 for i in range(N)]
+
+df1 = pd.DataFrame({"key": keys1, "val1": vals1})
+df2 = pd.DataFrame({"key": keys2, "val2": vals2})
+
+for _ in range(WARMUP):
+    pd.merge_ordered(df1, df2, on="key")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.merge_ordered(df1, df2, on="key")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "merge_ordered",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_squeeze.py b/benchmarks/pandas/bench_squeeze.py
@@ -0,0 +1,31 @@
+import pandas as pd
+import numpy as np
+import json
+import time
+
+N = 100_000
+data = list(range(N))
+
+# For Series.squeeze: multi-element returns self unchanged
+big_series = pd.Series(data, dtype=float)
+# For DataFrame.squeeze(axis=1): single-column DataFrame
+single_col_df = pd.DataFrame({"a": data})
+
+# Warm-up
+for _ in range(20):
+    big_series.squeeze()
+    single_col_df.squeeze(axis=1)
+
+iterations = 500
+start = time.perf_counter()
+for _ in range(iterations):
+    big_series.squeeze()
+    single_col_df.squeeze(axis=1)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "squeeze",
+    "mean_ms": total_ms / iterations,
+    "iterations": iterations,
+    "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_styler.py b/benchmarks/pandas/bench_styler.py
@@ -0,0 +1,34 @@
+"""Benchmark: Styler — highlight max/min and background gradient on a 1000-row DataFrame"""
+import json
+import time
+import math
+import pandas as pd
+import numpy as np
+
+N = 1_000
+WARMUP = 2
+ITERATIONS = 5
+
+a = [i * 1.0 for i in range(N)]
+b = [(N - i) * 2.0 for i in range(N)]
+c = [math.sin(i / 100) * 100 for i in range(N)]
+df = pd.DataFrame({"a": a, "b": b, "c": c})
+
+def run_styler():
+    styler = df.style.highlight_max().highlight_min().background_gradient()
+    styler.to_html()  # force rendering
+
+for _ in range(WARMUP):
+    run_styler()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    run_styler()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "styler",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/tsb/bench_corrwith.ts b/benchmarks/tsb/bench_corrwith.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: autoCorr on a 10k-element Series and corrWith on a DataFrame
+ */
+import { Series, DataFrame, autoCorr, corrWith } from "../../src/index.js";
+
+const ROWS = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05) * 50 + (i % 7) * 2.0);
+const s = new Series(data);
+const df = DataFrame.fromColumns({
+  a: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.03) * 40),
+  b: Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.07) * 20),
+  c: Float64Array.from({ length: ROWS }, (_, i) => (i % 5) * 3.0),
+});
+const other = new Series(Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.04) * 35));
+
+for (let i = 0; i < WARMUP; i++) {
+  autoCorr(s, 1);
+  corrWith(df, other);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  autoCorr(s, 1);
+  corrWith(df, other);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "corrwith",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dot_matmul.ts b/benchmarks/tsb/bench_dot_matmul.ts
@@ -0,0 +1,49 @@
+/**
+ * Benchmark: seriesDotSeries and dataFrameDotDataFrame
+ */
+import { Series, DataFrame, seriesDotSeries, dataFrameDotDataFrame } from "../../src/index.js";
+
+const N = 1_000;
+const K = 10;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Float64Array.from({ length: N }, (_, i) => i * 0.1);
+const b = Float64Array.from({ length: N }, (_, i) => (N - i) * 0.2);
+const sa = new Series(a);
+const sb = new Series(b);
+
+// dfA: N rows × K columns (colnames 0..K-1)
+// dfB: K rows (index 0..K-1) × K columns — so left.columns aligns with right.index
+const colsA: Record<string, Float64Array> = {};
+for (let c = 0; c < K; c++) {
+  colsA[String(c)] = Float64Array.from({ length: N }, (_, i) => (i + c) * 0.01);
+}
+const dfA = DataFrame.fromColumns(colsA);
+
+const colsB: Record<string, number[]> = {};
+for (let c = 0; c < K; c++) {
+  colsB[String(c)] = Array.from({ length: K }, (_, i) => (i * K + c) * 0.1);
+}
+const dfB = DataFrame.fromColumns(colsB);
+
+for (let i = 0; i < WARMUP; i++) {
+  seriesDotSeries(sa, sb);
+  dataFrameDotDataFrame(dfA, dfB);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  seriesDotSeries(sa, sb);
+  dataFrameDotDataFrame(dfA, dfB);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dot_matmul",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);