Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions benchmarks/pandas/bench_corrwith.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Benchmark: autoCorr and corrWith on 10k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10

rng = np.random.default_rng(42)
data = np.sin(np.arange(ROWS) * 0.05) * 50 + rng.random(ROWS) * 10
s = pd.Series(data)
s2 = pd.Series(np.cos(np.arange(ROWS) * 0.05) * 30 + rng.random(ROWS) * 5)

for _ in range(WARMUP):
s.autocorr(lag=1)
s.corr(s2)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.autocorr(lag=1)
s.corr(s2)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "corrwith",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
39 changes: 39 additions & 0 deletions benchmarks/pandas/bench_dot_matmul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Benchmark: Series.dot and DataFrame.dot"""
import json, time
import numpy as np
import pandas as pd

N = 1_000
K = 10
WARMUP = 3
ITERATIONS = 10

a = np.arange(N) * 0.1
b = (N - np.arange(N)) * 0.2
sa = pd.Series(a)
sb = pd.Series(b)

# dfA: N rows × K columns (colnames 0..K-1)
# dfB: K rows (index 0..K-1) × K columns
colsA = {str(c): (np.arange(N) + c) * 0.01 for c in range(K)}
dfA = pd.DataFrame(colsA)

colsB = {str(c): [(i * K + c) * 0.1 for i in range(K)] for c in range(K)}
dfB = pd.DataFrame(colsB)

for _ in range(WARMUP):
sa.dot(sb)
dfA.dot(dfB)

start = time.perf_counter()
for _ in range(ITERATIONS):
sa.dot(sb)
dfA.dot(dfB)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "dot_matmul",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
31 changes: 31 additions & 0 deletions benchmarks/pandas/bench_eval_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Benchmark: DataFrame.query and DataFrame.eval on a 100k-row DataFrame"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

df = pd.DataFrame({
"a": np.arange(ROWS) * 0.5,
"b": (ROWS - np.arange(ROWS)) * 0.3,
"c": (np.arange(ROWS) % 100) * 1.0,
})

for _ in range(WARMUP):
df.query("a > 10000 and b < 20000")
df.eval("a + b * 2")

start = time.perf_counter()
for _ in range(ITERATIONS):
df.query("a > 10000 and b < 20000")
df.eval("a + b * 2")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "eval_query",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
30 changes: 30 additions & 0 deletions benchmarks/pandas/bench_hash_pandas_object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
import json
import time

N = 10_000
nums = list(range(N))
strs = [f"label_{i}" for i in range(N)]

num_series = pd.Series(nums, dtype=float)
df = pd.DataFrame({"a": nums, "b": strs})

# Warm-up
for _ in range(10):
pd.util.hash_pandas_object(num_series)
pd.util.hash_pandas_object(df)

iterations = 50
start = time.perf_counter()
for _ in range(iterations):
pd.util.hash_pandas_object(num_series)
pd.util.hash_pandas_object(df)
total_ms = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "hash_pandas_object",
"mean_ms": total_ms / iterations,
"iterations": iterations,
"total_ms": total_ms,
}))
29 changes: 29 additions & 0 deletions benchmarks/pandas/bench_infer_objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pandas as pd
import numpy as np
import json
import time

N = 100_000
object_data = [None if i % 10 == 0 else i for i in range(N)]

obj_series = pd.Series(object_data, dtype=object)
obj_df = pd.DataFrame({"a": object_data, "b": [None if v is None else v * 2 for v in object_data]})

# Warm-up
for _ in range(10):
obj_series.infer_objects()
obj_df.infer_objects()

iterations = 100
start = time.perf_counter()
for _ in range(iterations):
obj_series.infer_objects()
obj_df.infer_objects()
total_ms = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "infer_objects",
"mean_ms": total_ms / iterations,
"iterations": iterations,
"total_ms": total_ms,
}))
31 changes: 31 additions & 0 deletions benchmarks/pandas/bench_keep_true_false.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Benchmark: keepTrue / keepFalse equivalent — boolean mask filtering on a 100k-element Series"""
import json
import time
import pandas as pd
import numpy as np

N = 100_000
WARMUP = 2
ITERATIONS = 5

data = list(range(N))
mask = [i % 2 == 0 for i in range(N)]
s = pd.Series(data, dtype=float)
bool_mask = pd.array(mask, dtype=bool)

for _ in range(WARMUP):
s[bool_mask]
s[~bool_mask]

start = time.perf_counter()
for _ in range(ITERATIONS):
s[bool_mask]
s[~bool_mask]
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "keep_true_false",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
32 changes: 32 additions & 0 deletions benchmarks/pandas/bench_merge_ordered.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Benchmark: merge_ordered — ordered merge of two 10k-row DataFrames on a key column"""
import json
import time
import pandas as pd
import numpy as np

N = 10_000
WARMUP = 2
ITERATIONS = 5

keys1 = list(range(0, N * 2, 2))
vals1 = [i * 1.0 for i in range(N)]
keys2 = list(range(0, N * 3, 3))
vals2 = [i * 2.0 for i in range(N)]

df1 = pd.DataFrame({"key": keys1, "val1": vals1})
df2 = pd.DataFrame({"key": keys2, "val2": vals2})

for _ in range(WARMUP):
pd.merge_ordered(df1, df2, on="key")

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.merge_ordered(df1, df2, on="key")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "merge_ordered",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
31 changes: 31 additions & 0 deletions benchmarks/pandas/bench_squeeze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pandas as pd
import numpy as np
import json
import time

N = 100_000
data = list(range(N))

# For Series.squeeze: multi-element returns self unchanged
big_series = pd.Series(data, dtype=float)
# For DataFrame.squeeze(axis=1): single-column DataFrame
single_col_df = pd.DataFrame({"a": data})

# Warm-up
for _ in range(20):
big_series.squeeze()
single_col_df.squeeze(axis=1)

iterations = 500
start = time.perf_counter()
for _ in range(iterations):
big_series.squeeze()
single_col_df.squeeze(axis=1)
total_ms = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "squeeze",
"mean_ms": total_ms / iterations,
"iterations": iterations,
"total_ms": total_ms,
}))
34 changes: 34 additions & 0 deletions benchmarks/pandas/bench_styler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Benchmark: Styler — highlight max/min and background gradient on a 1000-row DataFrame"""
import json
import time
import math
import pandas as pd
import numpy as np

N = 1_000
WARMUP = 2
ITERATIONS = 5

a = [i * 1.0 for i in range(N)]
b = [(N - i) * 2.0 for i in range(N)]
c = [math.sin(i / 100) * 100 for i in range(N)]
df = pd.DataFrame({"a": a, "b": b, "c": c})

def run_styler():
styler = df.style.highlight_max().highlight_min().background_gradient()
styler.to_html() # force rendering

for _ in range(WARMUP):
run_styler()

start = time.perf_counter()
for _ in range(ITERATIONS):
run_styler()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "styler",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
38 changes: 38 additions & 0 deletions benchmarks/tsb/bench_corrwith.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* Benchmark: autoCorr on a 10k-element Series and corrWith on a DataFrame
*/
import { Series, DataFrame, autoCorr, corrWith } from "../../src/index.js";

const ROWS = 10_000;
const WARMUP = 3;
const ITERATIONS = 10;

const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05) * 50 + (i % 7) * 2.0);
const s = new Series(data);
const df = DataFrame.fromColumns({
a: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.03) * 40),
b: Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.07) * 20),
c: Float64Array.from({ length: ROWS }, (_, i) => (i % 5) * 3.0),
});
const other = new Series(Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.04) * 35));

for (let i = 0; i < WARMUP; i++) {
autoCorr(s, 1);
corrWith(df, other);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
autoCorr(s, 1);
corrWith(df, other);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "corrwith",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
49 changes: 49 additions & 0 deletions benchmarks/tsb/bench_dot_matmul.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Benchmark: seriesDotSeries and dataFrameDotDataFrame
*/
import { Series, DataFrame, seriesDotSeries, dataFrameDotDataFrame } from "../../src/index.js";

const N = 1_000;
const K = 10;
const WARMUP = 3;
const ITERATIONS = 10;

const a = Float64Array.from({ length: N }, (_, i) => i * 0.1);
const b = Float64Array.from({ length: N }, (_, i) => (N - i) * 0.2);
const sa = new Series(a);
const sb = new Series(b);

// dfA: N rows × K columns (colnames 0..K-1)
// dfB: K rows (index 0..K-1) × K columns — so left.columns aligns with right.index
const colsA: Record<string, Float64Array> = {};
for (let c = 0; c < K; c++) {
colsA[String(c)] = Float64Array.from({ length: N }, (_, i) => (i + c) * 0.01);
}
const dfA = DataFrame.fromColumns(colsA);

const colsB: Record<string, number[]> = {};
for (let c = 0; c < K; c++) {
colsB[String(c)] = Array.from({ length: K }, (_, i) => (i * K + c) * 0.1);
}
const dfB = DataFrame.fromColumns(colsB);

for (let i = 0; i < WARMUP; i++) {
seriesDotSeries(sa, sb);
dataFrameDotDataFrame(dfA, dfB);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
seriesDotSeries(sa, sb);
dataFrameDotDataFrame(dfA, dfB);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "dot_matmul",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
Loading
Loading