diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 0c9c9bd..8eb76c8 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -159,7 +159,7 @@ source .env314/bin/activate # or whichever venv the user picked pip install -e .[test] # editable install with test deps pytest -vv # run full suite pip install -e .[linting] # linting deps -flake8 src/ test/ # lint check +flake8 src/ test/ examples/ # lint check ``` The private `bocpy._internal_test` C extension (used by diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index ae35d77..f829dbd 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -22,6 +22,9 @@ jobs: uses: pypa/cibuildwheel@v3.4.0 env: CIBW_BUILD: ${{ matrix.python }}-win* + CIBW_TEST_REQUIRES: "pytest setuptools wheel" + CIBW_ENVIRONMENT: "BOCPY_TEST_WHEEL=1" + CIBW_TEST_COMMAND: "pytest {project}/test/test_public_c_abi.py -v" with: package-dir: ${{github.workspace}} @@ -58,6 +61,9 @@ jobs: env: CIBW_BUILD: ${{ matrix.python }}-macosx* CIBW_ARCHS_MACOS: arm64 + CIBW_TEST_REQUIRES: "pytest setuptools wheel" + CIBW_ENVIRONMENT: "BOCPY_TEST_WHEEL=1" + CIBW_TEST_COMMAND: "pytest {project}/test/test_public_c_abi.py -v" with: package-dir: ${{github.workspace}} @@ -95,6 +101,9 @@ jobs: CIBW_BUILD: ${{ matrix.python }}-macosx* CIBW_ARCHS_MACOS: x86_64 MACOSX_DEPLOYMENT_TARGET: 14.8.1 + CIBW_TEST_REQUIRES: "pytest setuptools wheel" + CIBW_ENVIRONMENT: "BOCPY_TEST_WHEEL=1" + CIBW_TEST_COMMAND: "pytest {project}/test/test_public_c_abi.py -v" with: package-dir: ${{github.workspace}} @@ -121,6 +130,9 @@ jobs: env: CIBW_BUILD: ${{ matrix.python }}-manylinux* CIBW_BEFORE_ALL: yum makecache + CIBW_TEST_REQUIRES: "pytest setuptools wheel" + CIBW_ENVIRONMENT: "BOCPY_TEST_WHEEL=1" + CIBW_TEST_COMMAND: "pytest {project}/test/test_public_c_abi.py -v" with: package-dir: ${{github.workspace}} @@ -147,6 +159,9 @@ jobs: uses: pypa/cibuildwheel@v3.4.0 env: CIBW_BUILD: ${{ matrix.python }}-musllinux* + CIBW_TEST_REQUIRES: "pytest setuptools wheel" + CIBW_ENVIRONMENT: "BOCPY_TEST_WHEEL=1" + CIBW_TEST_COMMAND: "pytest {project}/test/test_public_c_abi.py -v" with: package-dir: ${{github.workspace}} diff --git a/.github/workflows/pr_gate.yml b/.github/workflows/pr_gate.yml index f392083..55e184c 100644 --- a/.github/workflows/pr_gate.yml +++ b/.github/workflows/pr_gate.yml @@ -29,7 +29,104 @@ jobs: run: pip install -e .[linting] - name: Run flake8 - run: flake8 src/bocpy test + run: flake8 src/bocpy test examples + + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Use Python 3.14 + uses: actions/setup-python@v6 + with: + python-version: 3.14 + + - name: Install docs deps + run: pip install -e .[docs] + + - name: Build Sphinx docs (warnings as errors) + run: sphinx-build -W -b html sphinx/source sphinx/build/html + + c-abi-consumer: + # Build a standalone downstream extension against the bocpy public + # C ABI (templates/c_abi_consumer/) and run its pytest suite. This + # exercises ``bocpy.get_include()`` / ``bocpy.get_sources()`` and + # the ```` umbrella from a fresh process that does + # not share build flags with bocpy itself. The Windows leg also + # compiles ``bocpy_msvc.c`` (returned by ``get_sources()`` on + # win32) end-to-end. + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python_version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Use Python ${{matrix.python_version}} + uses: actions/setup-python@v6 + with: + python-version: ${{matrix.python_version}} + + - name: Install bocpy + # Scrub the workflow-level BOCPY_BUILD_INTERNAL_TESTS=1 here: + # this job never invokes the internal-test extension, so + # building it is pure overhead. + env: + BOCPY_BUILD_INTERNAL_TESTS: "" + run: pip install -e .[test] --verbose + + - name: Build downstream consumer extension + run: pip install --no-build-isolation ./templates/c_abi_consumer + + - name: Run consumer tests + run: pytest -vv templates/c_abi_consumer/test + + sdist: + runs-on: ubuntu-latest + # The new sdist must build cleanly without the internal-test + # opt-in; setuptools excludes those sources from the tarball, so + # leaving the workflow-level env in scope would attempt to build + # missing files via PEP 517. + env: + BOCPY_BUILD_INTERNAL_TESTS: "" + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Use Python 3.14 + uses: actions/setup-python@v6 + with: + python-version: 3.14 + + - name: Install build tool + run: pip install build + + - name: Build sdist + run: python -m build --sdist + + - name: Install from sdist + run: pip install dist/*.tar.gz --verbose + + - name: Smoke-test import + run: python -c "import bocpy; print(bocpy.__name__)" + + - name: Smoke-test example with data file + run: | + python -c "import importlib, importlib.resources as r; \ + m = importlib.import_module('bocpy.examples'); \ + assert (r.files(m) / 'cheese.txt').is_file(), 'cheese.txt missing'; \ + assert (r.files(m) / 'menu.txt').is_file(), 'menu.txt missing'" + + - name: Wheel allow-list (no internal C/H ships) + env: + BOCPY_TEST_WHEEL: "1" + run: | + pip install pytest + pytest -vv test/test_public_c_abi.py cpp-format: runs-on: ubuntu-latest @@ -37,6 +134,7 @@ jobs: matrix: path: - check: src/bocpy + - check: templates/c_abi_consumer/src steps: - name: Checkout uses: actions/checkout@v6 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1033a33..c77dcd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,52 @@ +## 2026-05-10 - Version 0.6.0 +Public C ABI for downstream extensions, enabling C-level participation +in behavior-oriented concurrency across worker sub-interpreters. + +**New Features** + +- **Decorator composition with ``@when``** — decorators stacked below + ``@when`` are now preserved on the generated behavior function and + compose with the behavior body on the worker. Decorators placed + above ``@when`` raise a ``SyntaxError`` at transpile time with + actionable guidance. ``async def`` functions with ``@when`` are + also explicitly rejected. +- **Public C ABI (``)** — downstream C extensions can + now link against bocpy to register custom Python types as + cross-interpreter shareable so :class:`Cown` can carry instances of + them across worker interpreters. The header is C-only, version-gated + via the ``BOCPY_ABI`` macro, and bumped on any incompatible change + to ``bocpy.h`` or ``xidata.h``. Wheels remain CPython-version-tagged + so a runtime ABI mismatch cannot occur. +- **`bocpy.get_include()` / `bocpy.get_sources()`** — Python-level + helpers that downstream ``setup.py`` files use to locate the bocpy + headers and the small set of C sources that must be compiled into + the consuming extension. +- **`templates/c_abi_consumer/`** — a ready-to-copy template for + building a C extension against the bocpy ABI, including a + ``setup.py``, a probe extension exercising the public surface, and + a pytest suite (``test_public_c_abi.py``) that validates the ABI + end-to-end. +- **C source reorganisation** — the per-subsystem translation units + introduced in 0.5.0 have been renamed with a ``boc_`` prefix + (``boc_compat.[ch]``, ``boc_sched.[ch]``, ``boc_tags.[ch]``, + ``boc_terminator.[ch]``, ``boc_noticeboard.[ch]``, ``boc_cown.h``) + to give the public ABI a stable, namespaced identity. ``xidata.h`` + has moved under ``include/bocpy/`` alongside ``bocpy.h``. + +**Documentation** + +- New :doc:`c_abi`, :doc:`messaging`, and :doc:`noticeboard` pages + in the Sphinx site; the API reference has been expanded to cover + the public ABI surface. + +**Breaking Changes** + +- **`noticeboard_version` removed** — the global monotonic version + counter introduced in 0.4.0 has been removed. It exposed an + implementation detail of the snapshot cache that did not survive + the C ABI review and had no use case that was not better served + by ``notice_sync`` plus an explicit ``noticeboard()`` read. + ## 2026-04-29 - Version 0.5.0 Verona-RT-style work-stealing scheduler, C source split into per-subsystem translation units, and a portable atomics / threading layer. diff --git a/CITATION.cff b/CITATION.cff index d331a6a..7a91c56 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -5,6 +5,6 @@ authors: given-names: "Matthew Alastair" orcid: "https://orcid.org/0000-0002-1019-8036" title: "bocpy" -version: 0.5.0 -date-released: 2026-04-29 +version: 0.6.0 +date-released: 2026-05-10 url: "https://github.com/microsoft/bocpy" \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..408bd9f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ +include src/bocpy/*.h +include src/bocpy/*.c +include src/bocpy/include/bocpy/*.h +include src/bocpy/include/bocpy/*.c +include src/bocpy/py.typed +include src/bocpy/*.pyi +recursive-include examples *.txt diff --git a/README.md b/README.md index a9efb44..10fc70a 100644 --- a/README.md +++ b/README.md @@ -104,13 +104,11 @@ xychart-beta title "bocpy speedup vs. worker count (chain-ring benchmark, CPython 3.14)" x-axis "Workers" [1, 2, 3, 4, 5, 6, 7, 8] y-axis "Speedup vs. 1 worker" 0 --> 9 - bar [1.00, 1.97, 2.94, 3.90, 4.87, 5.82, 6.75, 7.54] - line [1, 2, 3, 4, 5, 6, 7, 8] + line [1.00, 1.97, 2.94, 3.90, 4.87, 5.82, 6.75, 7.54] ``` -The line is the ideal `y = x` reference; the bars are measured speedup. Up -to 8 workers, BOC delivers roughly linear scaling on this microbenchmark +Up to 8 workers, BOC delivers roughly linear scaling on this microbenchmark (≈7.5× at 8 workers). Real applications carry serial costs that this benchmark deliberately strips out — see the docstring at the top of [examples/benchmark.py](examples/benchmark.py) for the load-bearing @@ -280,27 +278,32 @@ We provide a few examples to show different ways of using BOC in a program: ## Why BOC for Python? -For many Python programmers, the GIL has established a programming model in which -they do not have to think about the many potential issues that are introduced by -concurrency, in particular data races. One of the best features of BOC is that, due -to the way behaviors interact with concurrently owned data (*cowns*), each behavior -can operate over its data without a need to change this familiar programming model. -Even in a free-threading context, BOC will reduce contention on locks and provide -programs which are data-race free by construction. Our initial research and experiments -with BOC have shown near linear scaling over cores, with up to 32 concurrent worker -sub-interpreters. +Python has always had data races — compound operations like `x += 1` are not +atomic, even under the GIL — and with the arrival of free-threaded builds +(Python 3.13t+) the surface area for concurrency bugs is only growing. BOC +eliminates these problems by construction: because behaviors interact with +shared data exclusively through *cowns*, each behavior operates over its data +as if it were single-threaded. There is no lock ordering to get right, no +forgotten `acquire()`/`release()`, and no possibility of deadlock. This holds +whether your program runs under the GIL, on per-interpreter GIL (3.12+), or +on a free-threaded interpreter. ### This library Our implementation is built on top of the sub-interpreters mechanism and the Cross-Interpreter Data (`XIData`) API. As of Python 3.12 each sub-interpreter has its own GIL, so behaviors scheduled by `bocpy` run truly in parallel. -In addition to the `when` function decorator, the library also exposes -low-level Erlang-style `send` and selective `receive` functions which enable -lock-free communication across threads and sub-interpreters. See the -[`bocpy-primes`](https://github.com/microsoft/bocpy/blob/main/src/bocpy/examples/primes.py) and -[`bocpy-calculator`](https://github.com/microsoft/bocpy/blob/main/src/bocpy/examples/calculator.py) -examples for the usage of these lower-level functions. +The core scheduling engine is written in C — it is **not** a wrapper around +locks, message queues, or `asyncio`. Each `Cown` is backed by a C-level +capsule that embeds an MCS-style queue of pending behaviors. When you call +`@when(a, b)`, the runtime performs **two-phase locking** (2PL) over the +sorted cown IDs entirely in C (releasing the GIL across the lock-free link +loops). Once all cowns in a behavior's request set are acquired, the behavior +is dispatched directly to a worker — there is no central scheduler thread and +no OS-level lock acquisition on the fast path. Releasing a cown unlinks the +MCS node and hands ownership to the next waiting behavior in O(1), which is +then dispatched without touching any shared queue. This gives bocpy the same +deadlock-freedom-by-construction guarantee as the original Verona runtime. For cross-behavior data sharing that does not warrant a `Cown`, the library also provides a small **noticeboard** — a global key-value store of up to 64 @@ -310,6 +313,23 @@ read a frozen snapshot via `noticeboard()` / `notice_read()`. The [`bocpy-prime-factor`](https://github.com/microsoft/bocpy/blob/main/src/bocpy/examples/prime_factor.py) example uses it to coordinate early termination across worker behaviors. +The library also includes lower-level Erlang-style messaging primitives +(`send` / `receive`) for channel-based communication patterns; see the +[API documentation](https://microsoft.github.io/bocpy/messaging.html) for +details. + +### Waiting for completion + +Call `wait()` after scheduling all your behaviors. It blocks the calling +thread until every scheduled behavior has finished, then tears down the +runtime (joins workers, closes the noticeboard). The next `@when` call will +spin up a fresh runtime automatically. + +```python +wait() # block indefinitely +wait(timeout=5) # raise TimeoutError if not done in 5 s +``` + ### Additional Info BOC is built on a solid foundation of serious scholarship and engineering. For further reading, please see: 1. [When Concurrency Matters: Behaviour-Oriented Concurrency](https://dl.acm.org/doi/10.1145/3622852) diff --git a/examples/README.md b/examples/README.md index 779ba8d..b3bea5e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -53,22 +53,3 @@ for a result before doing a batch of trial divisions. When any lane finds a factor it writes to the noticeboard, and the remaining lanes see the result on their next check and stop early. Demonstrates the "behavior loop" pattern and cross-behavior coordination via the noticeboard. - -## Send/Receive -In addition to exposing the higher-level behavior primitives (*i.e.*, -`when`, `Cown`, `wait`), the library also exposes the lower-level functions -[`send`](http://microsoft.github.io/bocpy/sphinx/api.html#bocpy.send) and -[`receive`](http://microsoft.github.io/bocpy/sphinx/api.html#bocpy.receive), which provide -lock-free Erlang-style send and selective receive. As this paradigm may be -unfamiliar, we provide a few examples for this lower-level API as well. - -### Calculator -In this example, several clients send arithmetic commands concurrently in -parallel to a calculator server, which performs the operations and prints the -result. Shows basic `send`/`receive` functionality and how to provide timeout -information. - -### Primes -In this example, you have a coordination thread producing work (in this case, -batches of integers) and worker threads doing work (here, counting primes). -Shows how to use `send`/`receive` to share work across multiple worker threads. diff --git a/examples/bank.py b/examples/bank.py index 2110993..3478d39 100644 --- a/examples/bank.py +++ b/examples/bank.py @@ -34,6 +34,12 @@ def do_transfer(src: Cown[Account], dst: Cown[Account]): else: print("failure") + # Schedule follow-up behaviors that each acquire only a single + # account cown. These demonstrate that a behavior body can + # schedule further behaviors on a subset of its cowns — the + # inner behaviors will run after the outer one releases. The + # two inner behaviors are independent (they hold disjoint + # cowns) and may run in either order or in parallel. @when(src) def _(a: Cown[Account]): print("src (after transfer):", a.value) diff --git a/examples/benchmark.py b/examples/benchmark.py index e0e9710..0f66a3d 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -21,6 +21,8 @@ """ import argparse +from dataclasses import asdict, dataclass, field +from datetime import datetime import json import os import socket @@ -28,13 +30,11 @@ import subprocess import sys import time -from dataclasses import asdict, dataclass, field -from datetime import datetime from typing import Optional -from bocpy import (Cown, Matrix, noticeboard, notice_write, receive, send, - start, wait, when) from bocpy import _core +from bocpy import (Cown, Matrix, notice_write, noticeboard, receive, send, + start, wait, when) # Sentinels for the parent/child JSON protocol. Uppercase so the # transpiler keeps them as module-level constants in the worker export. diff --git a/examples/fanout_benchmark.py b/examples/fanout_benchmark.py index 012ef8d..475559b 100644 --- a/examples/fanout_benchmark.py +++ b/examples/fanout_benchmark.py @@ -25,6 +25,8 @@ """ import argparse +from dataclasses import asdict, dataclass, field +from datetime import datetime import json import os import socket @@ -32,8 +34,6 @@ import subprocess import sys import time -from dataclasses import asdict, dataclass, field -from datetime import datetime from typing import Optional from bocpy import Cown, Matrix, receive, send, start, wait, when diff --git a/pyproject.toml b/pyproject.toml index d4d1584..dc7501e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "bocpy" -version = "0.5.0" +version = "0.6.0" authors = [ {name = "bocpy Team", email="bocpy@microsoft.com"} ] @@ -27,7 +27,7 @@ documentation = "https://microsoft.github.io/bocpy/sphinx/index.html" issues = "https://github.com/microsoft/bocpy/issues" [project.optional-dependencies] -test = ["pytest-md", "pytest-emoji", "pytest-cov", "pytest"] +test = ["pytest-md", "pytest-emoji", "pytest-cov", "pytest", "setuptools", "wheel"] linting = ["flake8", "flake8-bugbear", "flake8-builtins", "flake8-docstrings", "flake8-import-order", "flake8-quotes", "pep8-naming"] docs = ["sphinx", "sphinx-autodoc-typehints", "enum-tools[sphinx]"] boids = ["pyglet"] @@ -47,10 +47,24 @@ bocpy-sketches = "bocpy.examples.sketches:main" [tool.setuptools] packages = ["bocpy", "bocpy.examples"] +include-package-data = false [tool.setuptools.package-dir] "" = "src" "bocpy.examples" = "examples" [tool.setuptools.package-data] +"bocpy" = [ + "py.typed", + "*.pyi", + "include/bocpy/bocpy.h", + "include/bocpy/xidata.h", + "include/bocpy/bocpy_msvc.c", +] "bocpy.examples" = ["*.txt"] + +[tool.pytest.ini_options] +# Confine pytest collection to the in-tree test suite. The downstream +# template at templates/c_abi_consumer/ has its own pytest suite that +# the dedicated `c-abi-consumer` PR-gate job invokes explicitly. +testpaths = ["test"] diff --git a/setup.py b/setup.py index a7721f8..0656fbf 100644 --- a/setup.py +++ b/setup.py @@ -36,15 +36,40 @@ and not _building_distribution ) +_headers = [ + "src/bocpy/include/bocpy/bocpy.h", + "src/bocpy/include/bocpy/xidata.h", + "src/bocpy/boc_compat.h", + "src/bocpy/boc_cown.h", + "src/bocpy/boc_noticeboard.h", + "src/bocpy/boc_sched.h", + "src/bocpy/boc_tags.h", + "src/bocpy/boc_terminator.h", +] + +# Both directories are on the include path: +# - ``src/bocpy/include`` is the public root, so internal C files +# refer to public headers as ```` and downstream +# consumers resolved via ``bocpy.get_include()`` see exactly the +# same surface. +# - ``src/bocpy`` is the private root, scoped by the ``boc_`` prefix +# to avoid colliding with system headers (``sched.h``, ``tags.h``, +# etc.) when this directory ends up on a downstream ``-I`` path. +_include_dirs = ["src/bocpy/include", "src/bocpy"] + _ext_modules = [ Extension( name="bocpy._core", - sources=["src/bocpy/_core.c", "src/bocpy/compat.c", "src/bocpy/noticeboard.c", - "src/bocpy/sched.c", "src/bocpy/tags.c", "src/bocpy/terminator.c"], + sources=["src/bocpy/_core.c", "src/bocpy/boc_compat.c", "src/bocpy/boc_noticeboard.c", + "src/bocpy/boc_sched.c", "src/bocpy/boc_tags.c", "src/bocpy/boc_terminator.c"], + depends=_headers, + include_dirs=_include_dirs, ), Extension( name="bocpy._math", - sources=["src/bocpy/_math.c", "src/bocpy/compat.c"], + sources=["src/bocpy/_math.c", "src/bocpy/boc_compat.c"], + depends=_headers, + include_dirs=_include_dirs, ), ] @@ -57,9 +82,11 @@ "src/bocpy/_internal_test_atomics.c", "src/bocpy/_internal_test_bq.c", "src/bocpy/_internal_test_wsq.c", - "src/bocpy/compat.c", - "src/bocpy/sched.c", + "src/bocpy/boc_compat.c", + "src/bocpy/boc_sched.c", ], + depends=_headers, + include_dirs=_include_dirs, ) ) diff --git a/sphinx/source/api.rst b/sphinx/source/api.rst index ac2c8f5..ae94893 100644 --- a/sphinx/source/api.rst +++ b/sphinx/source/api.rst @@ -14,19 +14,80 @@ Behaviors :members: :undoc-members: -.. autofunction:: wait .. autodecorator:: when +.. autofunction:: wait .. autofunction:: start +Cown Groups +^^^^^^^^^^^ + +In addition to passing individual cowns to ``@when``, you can pass a +**list of cowns** to acquire an entire group atomically. The list is +delivered to the behavior parameter as a ``list[Cown]``:: + + from bocpy import Cown, when, wait + + items = [Cown(i) for i in range(5)] + + @when(items) + def _(items): + # `items` is a list[Cown] — all five acquired together + total = sum(c.value for c in items) + print("Sum:", total) + + wait() + +You can mix individual cowns and groups freely:: + + summary = Cown(0) + items = [Cown(i) for i in range(5)] + + @when(summary, items) + def _(summary, items): + summary.value = sum(c.value for c in items) + +Each argument to ``@when`` becomes one parameter of the decorated function: +a single :class:`Cown` is passed directly, while a list is delivered as a +``list[Cown]``. + +Runtime Lifecycle +^^^^^^^^^^^^^^^^^ + +The bocpy runtime follows a simple lifecycle: + +1. **Start** — the first ``@when`` call (or an explicit :func:`start`) spawns + the worker sub-interpreters and the noticeboard thread. +2. **Schedule** — ``@when`` / :func:`whencall` schedules behaviors against + cowns. Scheduling and release run on the caller and worker threads; there + is no central scheduler thread. +3. **Wait** — :func:`wait` blocks until all scheduled behaviors complete, then + tears down the runtime (joins workers, closes the noticeboard). +4. **Re-start** — after ``wait()`` returns, the next ``@when`` call spins up + a fresh runtime. The noticeboard is cleared and worker statistics are + reset; existing :class:`Cown` objects survive and can be scheduled + against the new runtime. + +.. autodata:: WORKER_COUNT + + +Advanced +^^^^^^^^ + +.. autofunction:: whencall + Noticeboard ----------- +See the :ref:`noticeboard` guide for a conceptual overview, consistency model, +and worked examples. + .. autofunction:: notice_write .. autofunction:: notice_update .. autofunction:: notice_delete .. autofunction:: noticeboard .. autofunction:: notice_read +.. autofunction:: notice_sync .. autodata:: REMOVED @@ -42,7 +103,21 @@ Math Messaging --------- +See the :ref:`messaging` guide for a conceptual overview, the selective-receive +pattern, timeouts, and a worked calculator example. + .. autofunction:: send .. autofunction:: receive .. autofunction:: set_tags .. autofunction:: drain +.. autodata:: TIMEOUT + + +C ABI +----- + +See :ref:`c-abi` for the full usage contract for downstream C extensions +that want to interoperate with bocpy at the C level. + +.. autofunction:: get_include +.. autofunction:: get_sources diff --git a/sphinx/source/c_abi.rst b/sphinx/source/c_abi.rst new file mode 100644 index 0000000..03a48e5 --- /dev/null +++ b/sphinx/source/c_abi.rst @@ -0,0 +1,498 @@ +.. _c-abi: + +C ABI +===== + +This page documents the public C ABI shipped with bocpy. Use it when +writing a downstream C extension that needs to participate in +behavior-oriented concurrency at the C level — typically by registering +a custom Python type as cross-interpreter shareable so :class:`Cown` +can hand instances of it across worker interpreters. + +When do I need this? +-------------------- + +You do **not** need this header to use a custom Python type with +:class:`Cown`. Any type that has been registered as cross-interpreter +shareable through CPython's ``_PyXIData_*`` / ``_PyCrossInterpreterData_*`` +machinery — by whatever means — is automatically handled by bocpy's +scheduler and message queue. Registration is what makes a type usable +across worker sub-interpreters; bocpy does not impose any additional +requirement on top of that. + +The public C ABI is provided as a **convenience** for extension +authors who want to do that registration from C. It exists for two +reasons: + +- **Cross-platform atomics.** ```` exposes a small + sequentially-consistent atomics surface + (``atomic_load`` / ``atomic_store`` / ``atomic_fetch_add`` / + ``atomic_compare_exchange_strong`` over ``atomic_int_least64_t``, + plus a ``thread_local`` macro) that compiles on MSVC as well as + every toolchain that ships ````. On non-MSVC builds + the umbrella simply pulls in ````; on MSVC it provides + the missing prototypes so the same source compiles unchanged. +- **Portability across Python versions.** The ``_PyXIData_*`` API has + changed shape several times between CPython 3.12, 3.13, 3.14, and + 3.15 (free-threaded builds included), and differs again on the + legacy ``BOC_NO_MULTIGIL`` path. ```` and + ```` paper over those differences with a single + set of macros, so one source file builds unchanged across every + supported Python version that bocpy itself supports. + +If neither of those concerns applies to your extension, you can ignore +the C ABI entirely and rely on whatever cross-interpreter registration +your type already has. + +.. note:: + + The bocpy public C ABI is **C only**. Including ``bocpy.h`` from a + C++ translation unit is not supported in this release. C++ consumers + must wrap the bocpy ABI in a thin C translation unit and call into + that from C++. + +Quickstart +---------- + +In your downstream ``setup.py``: + +.. code-block:: python + + import bocpy + from setuptools import setup, Extension + + setup( + ext_modules=[ + Extension( + "myext", + sources=["myext.c"] + bocpy.get_sources(), + include_dirs=[bocpy.get_include()], + ) + ], + ) + +In your C source: + +.. code-block:: c + + #include + + /* includes internally and is + * order-insensitive with respect to itself (which is + * idempotent). It must still appear *before* any system header + * (, , ...) in the same translation unit, the + * same way must — CPython forbids system headers + * before Python.h. */ + +ABI versioning +-------------- + +``bocpy.h`` defines a single integer macro ``BOCPY_ABI``. Compare it +with ``>=`` if you want to gate code on a minimum bocpy ABI revision. +The value is bumped on any incompatible change to ``bocpy.h`` or +``xidata.h``. Wheels are CPython-version-tagged (currently ``cp310``, +``cp311``, ``cp312``, ``cp313``, ``cp314``), so a runtime ABI mismatch +between bocpy and its host CPython cannot occur: each wheel embeds +the ``xidata.h`` ladder arm appropriate for its target CPython. + +Atomic surface +-------------- + +The atomic surface is a minimal, sequentially-consistent shim over +``int_least64_t``. On non-MSVC compilers it is just ````; +on MSVC the four functions below have out-of-line bodies in +``bocpy_msvc.c``, which downstream extensions pick up automatically +via :func:`bocpy.get_sources`. + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - Symbol + - Description + * - ``atomic_int_least64_t`` + - Type alias for a 64-bit atomic integer. + * - ``atomic_load(ptr)`` + - Sequentially-consistent load of ``*ptr``. + * - ``atomic_store(ptr, value)`` + - Sequentially-consistent store of ``value`` into ``*ptr``. + * - ``atomic_fetch_add(ptr, value)`` + - Sequentially-consistent ``*ptr += value`` returning the old value. + * - ``atomic_compare_exchange_strong(ptr, expected, desired)`` + - Sequentially-consistent CAS. Returns ``true`` on success; + on failure writes the observed value through ``expected``. + +All operations are sequentially consistent on every supported MSVC +target (x86, x64, ARM64). The MSVC shim implements ``atomic_load`` +via ``InterlockedOr64(ptr, 0)`` and ``atomic_store`` via +``InterlockedExchange64`` on x64/ARM64 (full barriers); on x86 both +go through ``InterlockedCompareExchange64``. The RMW ops are +``InterlockedExchangeAdd64`` / ``InterlockedCompareExchange64`` on +x64/ARM64 and CAS-loops on x86 — all already full barriers. The shim +deliberately does not expose ``_explicit`` variants or weaker memory +orders. + +Ownership helpers +----------------- + +The XIData callbacks shown in the worked example below flip a single +``atomic_int_least64_t`` owner field as the resource crosses +interpreter boundaries. ``bocpy.h`` exposes two helpers for that +pattern so downstream code does not have to redefine them: + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - Symbol + - Description + * - ``BOCPY_NO_OWNER`` + - Sentinel value (``-2``) meaning "no interpreter currently owns + this resource". Use it as the initial value of an owner field + and as the CAS target during the producer-side + ``XIDATA_GETDATA_FUNC`` callback. Negative so it never collides + with a real ``PyInterpreterState_GetID()`` return value. + * - ``bocpy_interpid()`` + - ``static inline int_least64_t``: returns the running + interpreter's ID, pre-typed for the + ``atomic_compare_exchange_strong`` parameter list. Must be + called with the GIL held (or while attached to an interpreter + on free-threaded builds) — same contract as the underlying + ``PyInterpreterState_GetID(PyInterpreterState_Get())``. + +The two are designed to be used together: producer-side, CAS the +owner from ``bocpy_interpid()`` to ``BOCPY_NO_OWNER`` before calling +``XIDATA_INIT``; consumer-side, CAS it back from ``BOCPY_NO_OWNER`` to +``bocpy_interpid()`` inside the ``new_object`` callback. See the +worked example below. + +Proto-Region semantics +---------------------- + +The ownership pattern shown in the worked example is a deliberately +narrow approximation of the **Region** discipline from Pyrona's +*Lungfish* model (Stoldt et al., `Dynamic Region Ownership for +Concurrency Safety +`__, +PLDI 2025). Lungfish is a dynamic ownership model for Python in which mutable state is grouped into +*regions*: at any point in time at most one thread has access to a +region, transferring a region into a `cown` makes it sharable +between threads, and acquiring the cown moves the region into the +acquiring thread for the duration of the behavior. The bocpy public +ABI does not implement Lungfish — there is no region graph, no +freeze, no merge, no borrow tracking — but the ``BOCPY_NO_OWNER`` / +``bocpy_interpid()`` pair gives downstream C extensions enough +machinery to model the *single most important* invariant a region +provides: **a mutable C resource is owned by exactly one interpreter +at a time, and any other interpreter that still holds a wrapper +around it cannot read or write its contents.** + +What proto-Region buys you +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Wrapping a C struct in a refcounted Python type and registering it +through ``XIDATA_REGISTERCLASS`` already moves the *pointer* between +sub-interpreters efficiently — no copy, no pickle. But pointers alone +are unsafe: nothing stops a worker from racing the previous owner on +the same impl after the handoff, exactly the unrestricted-shared- +mutable-state hazard regions are designed to eliminate (see Pyrona §1 +and Fig. 1: ``share(x, T2)`` between threads is unsafe). The owner +field, flipped atomically as the impl crosses the XIData boundary, +turns that hazard into a deterministic ``RuntimeError`` rather than a +data race. + +The contract +~~~~~~~~~~~~ + +A custom C resource opting into proto-Region semantics commits to all +of the following: + +1. **Single owner field.** The resource carries one + ``atomic_int_least64_t owner`` field, initialised to + ``bocpy_interpid()`` of the constructing interpreter. ``Matrix``'s + ``matrix_impl`` and the consumer template's ``counter_impl`` are + the canonical examples. +2. **CAS in the producer callback.** ``XIDATA_GETDATA_FUNC`` CASes + the owner from ``bocpy_interpid()`` to ``BOCPY_NO_OWNER`` before + calling ``XIDATA_INIT``. A failed CAS surfaces as a + ``RuntimeError`` and aborts the handoff: the resource is not + transferred. +3. **CAS in the consumer callback.** The ``new_object`` callback + CASes the owner from ``BOCPY_NO_OWNER`` to ``bocpy_interpid()`` + before constructing the new wrapper. If wrapper allocation fails + after the CAS succeeds, the callback must store the owner back to + ``BOCPY_NO_OWNER`` so a future retry of the handoff can succeed. +4. **Ownership check on data accessors.** Any method or getter that + reads or writes the resource's payload must verify + ``bocpy_interpid() == atomic_load(&impl->owner)`` and raise + ``RuntimeError`` otherwise. ``Matrix``'s + ``impl_check_acquired`` and the consumer template's + ``counter_impl_check_acquired`` are the canonical helpers. + Identity-only accessors (e.g. ``Counter.address``, + ``Counter.refcount``) are allowed to skip the check — the same way + you may print the address of a Lungfish bridge object without + acquiring its region. +5. **No raw send of the cown's value.** Inside a ``@when``, + ``send("tag", c.value)`` is *not* the right primitive for shipping + a proto-Region resource to another behavior: it would atomically + move the impl out of the cown mid-behavior and leave the worker + unable to release the cown afterwards. Send a copy + (``c.value.copy()``, like ``examples/boids.py``) or send primitive + summary data (``c.value.address``, ``c.value.count``). The cown + itself is the right primitive for handing the resource to a + different behavior — schedule a downstream ``@when`` on the same + cown. + +What proto-Region does **not** give you +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bocpy ABI does **not** implement these parts of Lungfish, and a +downstream extension that wants them must build them on top: + +* **Transitive closure.** Lungfish regions enforce isolation across a + whole object graph; proto-Region tracks ownership of a single + ``impl`` pointer. If your resource holds further heap-allocated + state, you are responsible for keeping that state private (no + outgoing references) or applying the same owner-CAS pattern to + each piece. ``Matrix`` does this implicitly: ``matrix_impl`` + encapsulates ``data`` and ``row_ptrs``, both private to the impl. +* **Freezing and immutability.** There is no equivalent of + ``freeze(b)``. Resources are either owned-and-mutable or in flight. +* **Merge / region trees.** There is no nesting; one resource = one + owner field. +* **Borrowed references from a "local region".** The bocpy worker + loop is the closest equivalent — a worker behavior is the period + during which the impl is owned by that interpreter — but there is + no first-class borrow type, and a borrowed-style reference held + past the end of a behavior is undefined behavior. + +In short: proto-Region is the **smallest** thing that turns a +shareable C struct into a Region-like resource, and it slots into +the existing ``XIDATA_REGISTERCLASS`` lifecycle without any +additional machinery beyond the two ABI symbols +``BOCPY_NO_OWNER`` and ``bocpy_interpid()``. + +XIData ladder +------------- + +The cross-interpreter data ("XIData") API is a thin macro ladder over +CPython's internal cross-interpreter data primitives, smoothing over +the rename from ``_PyCrossInterpreterData`` (3.12, 3.13) to +``_PyXIData`` (3.14+). All macros and the typedef below are exposed +by ``bocpy.h`` via its ``#include "xidata.h"``. + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - Symbol + - Description + * - ``XIDATA_T`` + - Opaque struct holding a serialised cross-interpreter handoff. + * - ``XIDATA_NEW()`` + - Allocate and zero a fresh ``XIDATA_T *``. + * - ``XIDATA_INIT(xidata, interp, data, obj, new_object)`` + - Initialise an allocated ``XIDATA_T``. See safety contract. + * - ``XIDATA_GETXIDATA(value, xidata)`` + - Ask CPython to populate ``xidata`` from the producer-side + Python object ``value``. + * - ``XIDATA_NEWOBJECT`` + - Field type used for the consumer-side reconstruction callback. + * - ``XIDATA_FREE(xidata)`` + - Release any resources owned by ``xidata`` and free the buffer. + * - ``XIDATA_SET_FREE(xidata, fn)`` + - Install a custom free callback into ``xidata``. + * - ``XIDATA_REGISTERCLASS(type, cb)`` + - Register a Python ``type`` as cross-interpreter shareable with + producer-side callback ``cb``. See safety contract. + +The lifecycle is **register once, then per-handoff init/get/free**: +register each shareable type at module init; on the producer side, +allocate an ``XIDATA_T``, populate it with ``XIDATA_GETXIDATA`` (which +internally calls the registered callback that will end up calling +``XIDATA_INIT``), hand the buffer to the consumer interpreter; on the +consumer side, call the ``new_object`` callback recorded during init +to reconstruct the Python object, then ``XIDATA_FREE`` to release. + +Safety contract +--------------- + +These contracts are mirrored from the canonical doc-comments in +``xidata.h``; the C header is the single source of truth. + +* ``XIDATA_INIT(xidata, interp, …)`` — ``interp`` must be the + interpreter that currently owns ``data``. Passing the wrong + ``interp`` produces a use-after-free across the worker handoff. + The ``xidata`` buffer must be freshly allocated (or zeroed) and + must not have been initialised before; double-init is undefined. + +* ``XIDATA_REGISTERCLASS(type, cb)`` — must be called once per + ``(type, cb)`` pair **per interpreter**, from inside that + interpreter's module-exec slot (or equivalent module-init code that + runs on every import in every interpreter). The standard idiom is + to call it from a ``Py_mod_exec`` slot — see ``_core.c`` / + ``_math.c`` for the canonical pattern, mirrored by the consumer + template under ``templates/c_abi_consumer/``. Registering the same type + twice with different callbacks in the same interpreter is + undefined. + +* ``BOC_NO_MULTIGIL`` — internal marker. Defined only on CPython + <3.12, where the host interpreter has no per-interpreter GIL and + the bocpy runtime falls back to a single-interpreter mode. The + ``XIDATA_*`` ladder still exposes the same macros on these + versions; downstream consumers do not need to special-case this + macro themselves. (The ``XIDATA_GETDATA_FUNC`` macro — see below — + hides the only consumer-visible signature change for you.) + +Worked example: ``bocpy.Matrix`` +-------------------------------- + +The bocpy-shipped :class:`Matrix` type uses every entry in the table +above. The annotated extracts below come from ``src/bocpy/_math.c``. + +**1. Module init: register the type from the exec slot.** This runs +once per interpreter, on every import — main interpreter and every +worker sub-interpreter that imports the module. Use multi-phase +initialisation (``Py_mod_exec``) and declare +``Py_MOD_PER_INTERPRETER_GIL_SUPPORTED`` so the bocpy runtime can +load the module inside worker sub-interpreters: + +.. code-block:: c + + /* _math_module_exec */ + if (XIDATA_REGISTERCLASS(state->matrix_type, _matrix_shared)) { + Py_FatalError( + "could not register MatrixObject for cross-interpreter sharing"); + return -1; + } + +A single-phase ``PyModule_Create`` module that registers from +``PyInit`` will load fine in the main interpreter but cannot satisfy +``Py_MOD_PER_INTERPRETER_GIL_SUPPORTED``; if a transpiled ``@when`` +body imports it, the worker sub-interpreter import will not run the +registration and the consumer callback will see no type registered +in its registry. See ``templates/c_abi_consumer/src/_bocpy_probe.c`` for +the full multi-phase template. + +**2. Producer side: prepare the underlying C matrix for handoff.** +``_matrix_shared`` is the ``cb`` registered above; CPython invokes it +when ``XIDATA_GETXIDATA`` is called against a ``MatrixObject`` from +the producer interpreter. Declare the callback with +``XIDATA_GETDATA_FUNC`` so the body has the same +``(tstate, obj, xidata)`` parameter list on every supported CPython — +the macro emits a small trampoline on Python <3.12 (where the runtime +calls the callback with ``(obj, xidata)`` only) so the body never has +to special-case ``BOC_NO_MULTIGIL``: + +.. code-block:: c + + XIDATA_GETDATA_FUNC(_matrix_shared) { + MatrixObject *matrix = (MatrixObject *)obj; + matrix_impl *impl = matrix->impl; + + /* Atomically transfer ownership: this interpreter -> BOCPY_NO_OWNER. */ + int_least64_t expected = bocpy_interpid(); + int_least64_t desired = BOCPY_NO_OWNER; + if (!atomic_compare_exchange_strong(&impl->owner, + &expected, desired)) { + PyErr_Format(PyExc_RuntimeError, /* … */); + return -1; + } + + XIDATA_INIT(xidata, tstate->interp, impl, obj, _new_matrix_object); + return 0; + } + +Note the use of ``atomic_compare_exchange_strong`` from the atomic +surface to flip the matrix's owner field, and ``XIDATA_INIT`` to wire +``xidata`` to the C-level ``impl``, the original Python ``obj``, and +the consumer-side reconstruction callback ``_new_matrix_object``. + +**3. Consumer side: reconstruct a Python wrapper for the C matrix.** +``_new_matrix_object`` is the ``new_object`` callback recorded by +``XIDATA_INIT``; CPython invokes it on the consumer interpreter: + +.. code-block:: c + + static PyObject *_new_matrix_object(XIDATA_T *xidata) { + matrix_impl *impl = (matrix_impl *)xidata->data; + + /* Atomically take ownership: BOCPY_NO_OWNER -> this interpreter. */ + int_least64_t expected = BOCPY_NO_OWNER; + int_least64_t desired = bocpy_interpid(); + if (!atomic_compare_exchange_strong(&impl->owner, + &expected, desired)) { + PyErr_Format(PyExc_RuntimeError, /* … */); + return NULL; + } + + PyTypeObject *type = LOCAL_STATE->matrix_type; + MatrixObject *matrix = (MatrixObject *)type->tp_alloc(type, 0); + /* … wrap and return … */ + } + +Consumer modules and worker sub-interpreters +-------------------------------------------- + +Workers always run in sub-interpreters, on every supported CPython. +What varies across versions is whether each sub-interpreter owns its +own GIL (3.12+) or shares the legacy global GIL (3.10/3.11, marked +internally by ``BOC_NO_MULTIGIL``). The execution model — +per-interpreter module state, multi-phase init, the per-interpreter +``XIDATA_REGISTERCLASS`` registry — is identical across versions. + +Because every ``XIDATA_REGISTERCLASS`` ladder lives in a per-interpreter +exec slot (see step 1 above), a consumer extension's ``Matrix``-like +type is only registered in interpreters that actually imported the +extension. Any consumer module whose XIData wrappers will travel into +a ``@when`` body must be **imported at module scope** in the file the +worker exec'd from. + +The transpiler propagates module-scope ``import`` statements into the +exported per-worker module, but it does **not** see runtime imports +(``importlib.import_module(...)``, ``pytest.importorskip(...)``, +``__import__(...)`` from inside a function, …). A worker that imports +the transpiled body without the consumer extension will load the +shared object via the OS loader but skip the per-interpreter exec +slot, leaving its ``LOCAL_STATE`` (or equivalent module-state cache) +NULL. The consumer callback will then segfault on the first reconstruction. + +Practical rule for downstream authors: + +* Use a top-level ``import _your_extension`` in any test or example + file that schedules ``@when`` bodies which observe your extension's + types. ``pytest.importorskip`` is not transpiler-visible. +* Mirror the per-interpreter state pattern (heap-allocated type from + ``PyType_FromModuleAndSpec``, per-module state, ``thread_local`` + cache primed in the exec slot) shown in + ``templates/c_abi_consumer/src/_bocpy_probe.c``. + +What is NOT public +------------------ + +The wheel ships only ``bocpy.h``, ``xidata.h``, and ``bocpy_msvc.c`` +under the package directory. The following internal headers and +surfaces are **not** part of the public C ABI; do not depend on them: + +* Internal headers: ``boc_compat.h``, ``boc_cown.h``, ``boc_sched.h``, ``boc_tags.h``, + ``boc_terminator.h``, ``boc_noticeboard.h``. As a general rule, + every ``boc_*`` file in the package directory is private — only + ``bocpy.h``, ``xidata.h``, and ``bocpy_msvc.c`` are public. +* Ordered atomics (``boc_atomic_*_explicit`` and the typed + ``boc_atomic_*_u64`` / ``_intptr`` API). +* BOC mutex / condition-variable types (``BOCMutex``, ``BOCCond``) + and the ``boc_mtx_*`` / ``boc_cnd_*`` helpers. +* ``boc_yield``, ``boc_now_s``, ``boc_now_ns``, ``boc_sleep_ns``, the + physical-CPU helpers, and any other ``boc_``-prefixed function not + exposed via ``bocpy.h``. + +C++ consumer support is also a non-goal for this release. + +CPython version skew +-------------------- + +bocpy wheels are tagged cp310 / cp311 / cp312 / cp313 / cp314. +Downstream extensions ship the same per-CPython matrix and thereby +pick up the matching ``bocpy.h`` view of the cross-interpreter data +ladder. ``BOCPY_ABI`` is bumped on any incompatible change to +``bocpy.h`` or ``xidata.h``. diff --git a/sphinx/source/conf.py b/sphinx/source/conf.py index 3526c8e..4a433df 100644 --- a/sphinx/source/conf.py +++ b/sphinx/source/conf.py @@ -14,7 +14,7 @@ project = 'bocpy' copyright = '2026, Microsoft' author = 'Microsoft' -release = '0.5.0' +release = '0.6.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/sphinx/source/index.rst b/sphinx/source/index.rst index aaccde1..c387e92 100644 --- a/sphinx/source/index.rst +++ b/sphinx/source/index.rst @@ -85,11 +85,17 @@ behavior once its cowns are free. The two transfers serialise on the ``Alice``/``Bob`` cowns, so their effects are interleaved in a deadlock-free, data-race-free order chosen by the runtime. +For cross-behavior shared state see :ref:`noticeboard`. For lower-level +Erlang-style ``send`` / ``receive`` channels see :ref:`messaging`. + .. toctree:: :maxdepth: 2 :caption: Contents: api + noticeboard + messaging + c_abi Indices and Tables ================== diff --git a/sphinx/source/messaging.rst b/sphinx/source/messaging.rst new file mode 100644 index 0000000..bcc8a89 --- /dev/null +++ b/sphinx/source/messaging.rst @@ -0,0 +1,269 @@ +.. _messaging: + +Messaging +========= + +.. module:: bocpy + :noindex: + +bocpy includes an Erlang-style **message-passing** subsystem built on top of +lock-free multi-producer single-consumer (MPSC) ring buffers implemented in C. +Messages can be sent from any thread or sub-interpreter and received by any +other — they are the primary mechanism for communication that does *not* +require shared ownership of a cown. + +.. note:: + + Messaging is a **lower-level** facility than ``@when`` / :class:`Cown`. + Most programs should model coordination through cowns and behaviors; reach + for ``send`` / ``receive`` when you need a channel-like pattern (producer– + consumer queues, heartbeat loops, event buses) or need to communicate with + code running outside the behavior runtime (plain threads, the main thread + before ``wait()``). + +Concepts +-------- + +Tags +^^^^ + +Every message carries a **tag** — a short string label (max 63 UTF-8 bytes) +that acts as a routing key. The runtime maintains 16 internal queues; each +tag is assigned to the first free slot the first time it is used. Receivers +specify one or more tags and only dequeue messages whose tag matches. + +There is no declaration step: the first ``send("my-tag", ...)`` auto-assigns +the tag to a queue. If you want deterministic queue assignment (useful for +benchmarks or when you need to isolate traffic), call :func:`set_tags` before +any sends. + +Selective Receive +^^^^^^^^^^^^^^^^^ + +:func:`receive` blocks the calling thread until a message with a matching tag +arrives. You can pass a single tag or a sequence of tags to listen on multiple +channels simultaneously:: + + # Wait for whichever arrives first + msg = receive(["order-ready", "order-cancelled"]) + +The return value is a two-element list ``[tag, contents]``. + +Timeouts and the ``after`` Callback +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`receive` accepts an optional **timeout** (in seconds). When the timeout +fires: + +- If an ``after`` callback is provided, the runtime calls ``after()`` and + returns its result **directly** as the value of ``receive(...)``. By + convention the callback returns a ``(tag, contents)`` tuple so the caller + can pattern-match it the same way as a normal message, but the runtime + itself does not interpret the value — nothing is enqueued and no other + receiver sees it. +- If no ``after`` is provided, ``receive`` returns ``(TIMEOUT, None)``. + :data:`TIMEOUT` is the *tag* slot of the synthetic two-element result; + compare ``msg[0]`` against it. + +:: + + from bocpy import receive, TIMEOUT + + msg = receive("heartbeat", timeout=2.0) + if msg[0] == TIMEOUT: + print("No heartbeat in 2 seconds") + +With an ``after`` callback:: + + def after(): + return "heartbeat", "self-tick" + + msg = receive("heartbeat", timeout=1.0, after=after) + # msg == ("heartbeat", "self-tick") if nothing arrived in 1 s + +The ``after`` function may return any value; the ``(tag, contents)`` +shape is purely a convention so the caller can pattern-match it +uniformly with normal ``receive`` results. + +Worked Example: Calculator Service +----------------------------------- + +The following example (adapted from +`examples/calculator.py `_) +demonstrates a concurrent calculator service. Multiple client threads send +arithmetic operations to a server thread via the ``"calculator"`` tag. The +server uses selective receive with a timeout to detect when clients have gone +silent. + +.. code-block:: python + + """Concurrent calculator using message-passing channels.""" + + import random + from threading import Thread + import time + + from bocpy import receive, send + + + def client(num_operations: int): + """Send random arithmetic operations to the calculator channel.""" + actions = ["+", "-", "/", "*"] + for _ in range(num_operations): + time.sleep(random.random() * 0.1) + action = random.choice(actions) + value = random.random() * 10 - 5 + send("calculator", (action, value)) + + + def server(timeout): + """Receive and process arithmetic operations until stopped.""" + value = 0 + num_operations = 0 + running = True + + def after(): + return "calculator", ("print", True) + + while running: + match receive("calculator", timeout, after): + case [_, ("+", x)]: + num_operations += 1 + value += x + + case [_, ("-", x)]: + num_operations += 1 + value -= x + + case [_, ("*", x)]: + num_operations += 1 + value *= x + + case [_, ("/", x)]: + num_operations += 1 + value /= x + + case [_, ("print", _)]: + print("Total operations:", num_operations) + print("Final value:", value) + running = False + + + # Start the server with a 2-second idle timeout + server_thread = Thread(target=server, args=(2.0,)) + server_thread.start() + + # Spawn 4 clients, each sending 5 operations + clients = [Thread(target=client, args=(5,)) for _ in range(4)] + for c in clients: + c.start() + for c in clients: + c.join() + + # Once clients finish, send a shutdown signal (or let the timeout fire) + send("calculator", ("print", False)) + server_thread.join() + +**Key observations:** + +- ``send("calculator", ...)`` is non-blocking and thread-safe — all four + clients fire concurrently. +- The server's ``match`` on ``receive(...)`` is a *selective receive*: it + pattern-matches on the message contents, not just the tag. +- The ``after`` callback fires when no message arrives within ``timeout`` + seconds, causing the server to print results and exit gracefully. +- No locks, no shared mutable state — the only coordination is the message + queue. + +Pre-assigning Tags +^^^^^^^^^^^^^^^^^^ + +For performance-sensitive applications where you know your tag set ahead of +time, :func:`set_tags` pins tags to specific internal queues, avoiding hash +collisions:: + + from bocpy import set_tags + + set_tags(["orders", "heartbeat", "shutdown"]) + +Calling ``set_tags`` **clears all queued messages** and reassigns the queue +layout. Call it once at startup, before any sends. + +Draining Queues +^^^^^^^^^^^^^^^ + +:func:`drain` discards all pending messages for one or more tags:: + + from bocpy import drain + + drain("calculator") # clear one tag + drain(["orders", "events"]) # clear multiple tags + +This is useful for cleanup between test runs or when resetting a subsystem. + +.. warning:: + + If new messages are arriving faster than they can be drained, ``drain`` + may not return promptly. + +Sending Custom Types Across Sub-interpreters +-------------------------------------------- + +Messages cross sub-interpreter boundaries through CPython's +**cross-interpreter data** (XIData) machinery, with a **pickle fallback** +when no XIData handler is registered for the payload's type. The runtime +makes no attempt to ship class definitions along with the message — the +receiver must already be able to resolve the type by its fully qualified +name. + +In practice this means: + +- **Builtins and stdlib containers just work.** Numbers, strings, bytes, + ``tuple``, ``list``, ``dict``, ``set``, ``frozenset`` and similar types + either have a native XIData handler or pickle cleanly to types every + interpreter already knows about. +- **C extension types can register a custom XIData handler** to transfer + ownership directly without going through pickle. :class:`Cown` and + :class:`Matrix` use this path; see :ref:`c-abi` for how to expose your + own type through the same mechanism. +- **Pure-Python custom classes fall back to pickle.** Unpickling only + succeeds if the receiving interpreter can already import the class by + its fully qualified name. If a worker has never executed + ``import my_pkg.my_module``, then receiving an instance of + ``my_pkg.my_module.MyClass`` will fail with a ``ModuleNotFoundError`` or + ``AttributeError`` raised from inside ``receive``. +- **Closures, lambdas, and locally-defined classes cannot be sent at all** + — the pickle fallback cannot resolve them by qualified name from any + interpreter, and they have no XIData handler. + +Inside ``@when`` behaviors the :ref:`transpiler ` handles the +import-side of this automatically: it rewrites the decorated module so +each worker imports the same set of names the caller had in scope, and +any class referenced by a behavior is therefore resolvable on the worker +side. When you use ``send`` / ``receive`` from a *plain* thread, a +sub-interpreter spawned outside the behavior runtime, or from inside a +behavior body but with a type that was not part of the captured +environment, **you are responsible for ensuring the class is importable +on the receiver** (or for registering an XIData handler that bypasses +pickle entirely). + +The simplest way to satisfy the pickle path is to define message payload +types at module scope in a module that every participating interpreter +imports at startup — for example, a shared ``messages.py`` that the main +program, the worker bootstrap, and any auxiliary threads all import +before the first ``send``. + + +API Reference +------------- + +.. autofunction:: send + :no-index: +.. autofunction:: receive + :no-index: +.. autofunction:: set_tags + :no-index: +.. autofunction:: drain + :no-index: +.. autodata:: TIMEOUT + :no-index: diff --git a/sphinx/source/noticeboard.rst b/sphinx/source/noticeboard.rst new file mode 100644 index 0000000..519f2cb --- /dev/null +++ b/sphinx/source/noticeboard.rst @@ -0,0 +1,253 @@ +.. _noticeboard: + +Noticeboard +=========== + +.. module:: bocpy + :noindex: + +The **noticeboard** is a global key-value store (up to 64 entries) designed for +cross-behavior data sharing that does not warrant a dedicated :class:`Cown`. +It is eventually consistent: writes are fire-and-forget, and readers see a +snapshot that may lag behind the latest committed state. + +When to Use the Noticeboard +--------------------------- + +Use the noticeboard when: + +- Multiple behaviors need to observe shared configuration or summary data + without taking exclusive ownership. +- You want to broadcast a value (*e.g.,* "stop" flag, running totals, discovered + results) that many independent behaviors can poll. +- The data does not need strict read-your-writes ordering between behaviors. + +If you need strict sequencing or exclusive access, use a :class:`Cown` instead. + +Consistency Model +----------------- + +All mutations (``notice_write``, ``notice_update``, ``notice_delete``) are +serialized through a dedicated **noticeboard thread**. The calling behavior (or +thread) hands off the mutation and returns immediately — this is the +"fire-and-forget" property. + +Readers call :func:`noticeboard` or :func:`notice_read` to take a **snapshot** +that is cached for the lifetime of the behavior. The snapshot is consistent +(all entries come from the same committed version), but may not reflect writes +that were posted after the snapshot was taken — or even writes posted *before* +it, if the noticeboard thread has not yet committed them. + +.. important:: + + The noticeboard is **not** a synchronization channel. Do not rely on a + subsequent behavior seeing a prior behavior's write just because the two + are chained through a cown. If you need read-your-writes ordering, model + the shared state as a :class:`Cown` instead. + +Worked Example: Early Termination +---------------------------------- + +The +`prime_factor example `_ +uses the noticeboard to coordinate early termination across parallel worker +behaviors. A simplified version of the pattern: + +.. note:: + + ``expensive_computation``, ``is_final_answer`` and ``get_work`` below are + placeholders for application logic; substitute your own when adapting + the example. + +.. code-block:: python + + from functools import partial + from bocpy import (Cown, notice_read, notice_update, notice_write, + receive, send, wait, when) + + + def append_result(existing, new_item): + """Append new_item to the shared partials list (used with notice_update).""" + return existing + [new_item] + + + class WorkerState: + def __init__(self, worker_id, items): + self.worker_id = worker_id + self.items = items + + + def process_batch(state: Cown[WorkerState]): + @when(state) + def _(state): + # Check if another worker already signalled completion. + if notice_read("done", False): + return + + item = state.value.items.pop(0) + result = expensive_computation(item) + + if is_final_answer(result): + # Signal all other workers to stop and publish the answer + # over a message channel (the noticeboard is torn down by + # wait(), so it cannot carry the result back to main). + notice_write("done", True) + send("answer", result) + else: + # Record the partial result for diagnostics, then continue. + notice_update("partials", partial(append_result, new_item=result), + default=[]) + if state.value.items: + process_batch(state) + + + # Launch parallel workers. + workers = [Cown(WorkerState(i, get_work(i))) for i in range(4)] + for w in workers: + process_batch(w) + + # Collect the first final answer produced by any worker, then drain. + answer = receive("answer")[1] + wait() + print("final answer:", answer) + # After wait(), the noticeboard is torn down; "partials" is no longer + # readable. Snapshot it from inside a behavior before wait() returns + # if you need to inspect it. + +**Key points:** + +- ``notice_write("done", True)`` is non-blocking — the worker doesn't wait + for the write to commit. +- Other workers poll ``notice_read("done", False)`` at the start of each + batch. They will *eventually* see the flag and stop. +- ``notice_update("partials", append_result, ...)`` shows the read-modify- + write pattern: ``append_result`` is run atomically against the current + list, so concurrent appends from different workers don't lose entries. +- The final answer is delivered over the message queue rather than the + noticeboard, because :func:`wait` tears the noticeboard down before + control returns to the main thread. +- The pattern is cooperative: there is no hard cancellation. Workers stop + at the next polling point. + +Reading the Noticeboard +----------------------- + +From inside a behavior, call :func:`noticeboard` to get a read-only mapping +of all entries, or :func:`notice_read` for a single key:: + + from bocpy import noticeboard, notice_read, when, Cown + + c = Cown(None) + + @when(c) + def _(c): + # Full snapshot + snap = noticeboard() + for key, value in snap.items(): + print(f"{key} = {value}") + + # Single key with a default + threshold = notice_read("threshold", 0.5) + +The snapshot is taken once per behavior and cached — multiple calls to +:func:`noticeboard` or :func:`notice_read` within the same behavior return +data from the same point in time. + +Writing and Updating +-------------------- + +:func:`notice_write` sets a key unconditionally:: + + from bocpy import notice_write + + notice_write("config.max_retries", 3) + notice_write("status", "running") + +:func:`notice_update` performs an atomic read-modify-write. The function +``fn`` receives the current value (or ``default`` if the key is absent) and +returns the new value:: + + from functools import partial + from operator import add + from bocpy import notice_update + + # Increment a counter + notice_update("counter", partial(add, 1), default=0) + + # Append to a list + def append_item(lst, item): + return lst + [item] + + notice_update("results", partial(append_item, item="found!"), default=[]) + +.. warning:: + + ``fn`` must be **picklable** — lambdas and closures are not. + Use ``functools.partial`` with module-level functions, or ``operator`` + functions. + +If ``fn`` returns :data:`REMOVED`, the entry is deleted:: + + from bocpy import notice_update, REMOVED + + def clear_if_empty(value): + return REMOVED if not value else value + + notice_update("buffer", clear_if_empty, default=[]) + +Deleting Entries +---------------- + +:func:`notice_delete` removes a single key (no-op if absent):: + + from bocpy import notice_delete + + notice_delete("temporary_flag") + +``notice_sync`` (Testing Only) +------------------------------- + +:func:`notice_sync` blocks until every mutation the calling thread has +posted so far has been committed by the noticeboard thread. It exists to +make the noticeboard's eventual consistency tractable for **tests** — a +test can write a value, call ``notice_sync()``, and then assert that a +subsequently scheduled behavior observes the write — not as a primitive +for application code. + +.. warning:: + + Outside of tests, reaching for ``notice_sync`` is almost always an + anti-pattern. The guarantee it provides is much weaker than it looks: + + - It only orders the **calling thread's prior writes** against the + **next per-behavior snapshot** taken on any thread. Snapshots are + captured once per behavior, so a behavior already executing when + ``notice_sync`` returns will keep seeing its existing snapshot. + - It does **not** refresh the calling behavior's own snapshot — you + cannot ``notice_sync`` and then ``notice_read`` to see your write. + - It establishes no happens-before relationship between unrelated + behaviors and is not a substitute for cown-mediated ordering. + + If application code needs read-your-writes ordering, model the shared + state as a :class:`Cown`. If you find yourself wanting + ``notice_sync`` outside a test, that is a strong signal the noticeboard + is the wrong primitive for the problem. + + +API Reference +------------- + +.. autofunction:: notice_write + :no-index: +.. autofunction:: notice_update + :no-index: +.. autofunction:: notice_delete + :no-index: +.. autofunction:: noticeboard + :no-index: +.. autofunction:: notice_read + :no-index: +.. autofunction:: notice_sync + :no-index: +.. autodata:: REMOVED + :no-index: diff --git a/src/bocpy/__init__.py b/src/bocpy/__init__.py index 5501a15..97e87f5 100644 --- a/src/bocpy/__init__.py +++ b/src/bocpy/__init__.py @@ -1,14 +1,55 @@ """Behavior-oriented Concurrency.""" +import os +import sys + from ._core import drain, receive, send, set_tags, TIMEOUT from ._math import Matrix from .behaviors import (Behaviors, Cown, notice_delete, notice_read, notice_sync, notice_update, notice_write, noticeboard, - noticeboard_version, REMOVED, + REMOVED, start, wait, when, whencall, WORKER_COUNT) + +def get_include() -> str: + """Return the absolute path to the bocpy public C header root. + + Use the returned path as an additional ``include_dirs`` entry on a + downstream :class:`setuptools.Extension` so its translation units + can ``#include ``. The directory contains a single + ``bocpy/`` subdirectory holding the public ABI surface; bocpy's + private headers are not exposed. + + :return: Absolute filesystem path to the include root (the parent + of the ``bocpy/`` subdirectory containing ``bocpy.h`` and + ``xidata.h``). + :rtype: str + """ + return os.path.join(os.path.dirname(os.path.abspath(__file__)), + "include") + + +def get_sources() -> list[str]: + """Return platform-specific extra C sources for downstream extensions. + + On Windows the returned list contains the absolute path to + ``bocpy_msvc.c``, which provides MSVC out-of-line bodies for the + atomic ops declared in ````. On non-Windows + platforms the list is empty (```` provides + everything). + + :return: A list of absolute paths to add to a downstream + :class:`setuptools.Extension`'s ``sources=`` list. + :rtype: list[str] + """ + if sys.platform == "win32": + return [os.path.join(get_include(), "bocpy", "bocpy_msvc.c")] + return [] + + __all__ = ["Behaviors", "Cown", "Matrix", "REMOVED", "TIMEOUT", - "WORKER_COUNT", "drain", "notice_delete", "notice_read", + "WORKER_COUNT", "drain", "get_include", "get_sources", + "notice_delete", "notice_read", "notice_sync", "notice_update", "notice_write", "noticeboard", - "noticeboard_version", "receive", + "receive", "send", "set_tags", "start", "wait", "when", "whencall"] diff --git a/src/bocpy/__init__.pyi b/src/bocpy/__init__.pyi index 0f319b8..267d643 100644 --- a/src/bocpy/__init__.pyi +++ b/src/bocpy/__init__.pyi @@ -7,6 +7,10 @@ TIMEOUT: str REMOVED: object """Sentinel returned by a ``notice_update`` fn to delete the entry.""" +WORKER_COUNT: int +"""Default worker-pool size used when :func:`start` is called without an +explicit ``workers`` argument (CPU count - 1, minimum 1).""" + def drain(tags: Union[str, Sequence[str]]) -> None: """Drain all messages associated with one or more tags. @@ -514,33 +518,7 @@ def notice_read(key: str, default: Any = None) -> Any: """ -def noticeboard_version() -> int: - """Return the current noticeboard version counter. - - The counter is incremented every time the noticeboard is - successfully written, updated, or cleared. Two reads returning the - same value mean no commit happened between them; a strictly larger - value means at least one commit happened. - - The counter is global (across all threads and interpreters) and - monotonic. Useful as a *hint* for detecting noticeboard changes - without taking a full snapshot. - - .. note:: - - This is *not* a synchronization primitive. Because - :func:`notice_write`, :func:`notice_update`, and - :func:`notice_delete` are fire-and-forget, the version may not - have advanced yet when a behavior that depends on a write - observes the noticeboard. For strict read-your-writes ordering, - use :func:`notice_sync`. - - :return: The current noticeboard version. - :rtype: int - """ - - -def notice_sync(timeout: Optional[float] = 30.0) -> int: +def notice_sync(timeout: Optional[float] = 30.0) -> None: """Block until the caller's prior noticeboard mutations are committed. Because :func:`notice_write`, :func:`notice_update`, and @@ -560,8 +538,6 @@ def notice_sync(timeout: Optional[float] = 30.0) -> int: :raises TimeoutError: If the barrier does not complete within *timeout* seconds. :raises RuntimeError: If the runtime is not started. - :return: The :func:`noticeboard_version` after the flush. - :rtype: int """ @@ -640,6 +616,21 @@ def when(*cowns): the result of executing the behavior. This :class:`Cown` can be used for further coordination. + Decorators **below** ``@when`` compose with the behavior body and run + on the worker (e.g. ``@when(x) @my_decorator def f(x): ...``). + Decorators **above** ``@when`` are not supported and will raise a + ``SyntaxError`` at transpile time. ``async def`` functions are also + rejected — there is no event loop on workers to drive coroutines. + ``@staticmethod`` / ``@classmethod`` / ``@property`` below ``@when`` + are also rejected because the generated behavior runs as a + module-level function, where these descriptors are not callable. + + .. note:: + + The transpiler matches ``@when`` by literal name. Aliasing the + import (``from bocpy import when as boc_when``) is not + supported — the rewrite will not fire and the worker will fail. + :param cowns: Zero or more :class:`Cown` objects or ``list[Cown]`` groups to acquire before running the decorated function. Each argument becomes one parameter of the decorated function: a single @@ -679,3 +670,34 @@ def whencall(thunk: str, args: list[Union[Cown, list[Cown]]], captures: list[Any :return: A :class:`Cown` that will hold the behavior's return value. :rtype: Cown """ + + +def get_include() -> str: + """Return the absolute path to the bocpy public C header root. + + Use the returned path as an additional ``include_dirs`` entry on a + downstream :class:`setuptools.Extension` so its translation units + can ``#include ``. The directory contains a single + ``bocpy/`` subdirectory holding the public ABI surface; bocpy's + private headers are not exposed. + + :return: Absolute filesystem path to the include root (the parent + of the ``bocpy/`` subdirectory containing ``bocpy.h`` and + ``xidata.h``). + :rtype: str + """ + + +def get_sources() -> list[str]: + """Return platform-specific extra C sources for downstream extensions. + + On Windows the returned list contains the absolute path to + ``bocpy_msvc.c``, which provides MSVC out-of-line bodies for the + atomic ops declared in ````. On non-Windows + platforms the list is empty (```` provides + everything). + + :return: A list of absolute paths to add to a downstream + :class:`setuptools.Extension`'s ``sources=`` list. + :rtype: list[str] + """ diff --git a/src/bocpy/_core.c b/src/bocpy/_core.c index 97019f5..705413e 100644 --- a/src/bocpy/_core.c +++ b/src/bocpy/_core.c @@ -1,12 +1,12 @@ #define PY_SSIZE_T_CLEAN -#include "compat.h" -#include "cown.h" -#include "noticeboard.h" -#include "sched.h" -#include "tags.h" -#include "terminator.h" -#include "xidata.h" +#include "boc_compat.h" +#include "boc_cown.h" +#include "boc_noticeboard.h" +#include "boc_sched.h" +#include "boc_tags.h" +#include "boc_terminator.h" +#include // Forward declaration — BOCQueue is defined below. typedef struct boc_queue BOCQueue; @@ -46,7 +46,6 @@ static inline void boc_park_wait(BOCQueue *q); const struct timespec SLEEP_TS = {0, 1000}; const char *BOC_TIMEOUT = "__timeout__"; const int BOC_CAPACITY = 1024 * 16; -const PY_INT64_T NO_OWNER = -2; atomic_int_least64_t BOC_COUNT = 0; atomic_int_least64_t BOC_COWN_COUNT = 0; @@ -125,7 +124,7 @@ typedef struct boc_queue { // boc_enqueue / boc_dequeue. Read by `_core.queue_stats()`. Grouped // and padded so they sit on their own cacheline and do not // false-share with the hot head/tail/state above. Typed via - // `compat.h` so the build works on MSVC (which has no `_Atomic`). + // `boc_compat.h` so the build works on MSVC (which has no `_Atomic`). /// @brief CAS retries observed by enqueuers contending on @c tail. boc_atomic_u64_t enqueue_cas_retries; /// @brief CAS retries observed by dequeuers contending on @c head. @@ -287,14 +286,6 @@ static void boc_ref_tracking(bool is_cown, int_least64_t delta) { #define BOC_REF_TRACKING_REPORT(...) #endif -/// @brief Convenience method to obtain the interpreter ID -/// @return the ID of the currently running interpreter -static inline PY_INT64_T get_interpid() { - PyThreadState *ts = PyThreadState_GET(); - PyInterpreterState *is = PyThreadState_GetInterpreter(ts); - return PyInterpreterState_GetID(is); -} - /// @brief Allocates a new MPSC RecycleQueue. /// @details This will add it to the queue list and then return the reference. /// @return A new RecycleQueue @@ -614,23 +605,6 @@ static PyObject *_core_noticeboard_cache_clear(PyObject *self, Py_RETURN_NONE; } -/// @brief Return the current noticeboard version counter -/// @details The counter is incremented under @ref Noticeboard::mutex on -/// every successful @c notice_write, @c notice_delete, or -/// @c noticeboard_clear. Read with sequentially-consistent semantics. -/// Two reads returning the same value mean no commit happened between -/// them; a strictly larger value means at least one commit happened. -/// Useful for detecting noticeboard changes without taking a full -/// snapshot. -/// @param self The module (unused) -/// @param args Unused -/// @return A Python int with the current noticeboard version -static PyObject *_core_noticeboard_version(PyObject *self, - PyObject *Py_UNUSED(args)) { - BOC_STATE_SET(self); - return PyLong_FromLongLong((long long)noticeboard_version()); -} - /// @brief Register the calling thread as the noticeboard mutator thread /// @details Must be called from the noticeboard thread before it processes /// any noticeboard mutation messages. Subsequent calls to @@ -1001,11 +975,11 @@ static void BOCRecycleQueue_enqueue(BOCRecycleQueue *queue, XIDATA_T *xidata); // release hot path costs measurable throughput. Mirror CPython's // Py_INCREF (inline header macro) vs _Py_IncRef (out-of-line ABI export) // pattern: keep `static inline` bodies as the in-TU implementation, -// expose extern wrappers under the names declared in `cown.h` for -// noticeboard.c, and override the macros from cown.h to bind locally to +// expose extern wrappers under the names declared in `boc_cown.h` for +// boc_noticeboard.c, and override the macros from boc_cown.h to bind locally to // the inline versions. The one earlier callsite (the write_direct error // rollback above this point) is on an error path and stays bound to the -// extern wrapper from cown.h — not hot. +// extern wrapper from boc_cown.h — not hot. static inline int_least64_t cown_decref_inline(BOCCown *cown) { int_least64_t rc = atomic_fetch_add(&cown->rc, -1) - 1; @@ -1023,7 +997,7 @@ static inline int_least64_t cown_decref_inline(BOCCown *cown) { // we can clear the object and recycle the xidata if (cown->value != NULL) { - assert(cown->owner == get_interpid()); + assert(cown->owner == bocpy_interpid()); Py_CLEAR(cown->value); } @@ -1036,7 +1010,7 @@ static inline int_least64_t cown_decref_inline(BOCCown *cown) { return 0; } -/// @brief Out-of-line export consumed by other TUs (see @ref cown.h). +/// @brief Out-of-line export consumed by other TUs (see @ref boc_cown.h). int_least64_t cown_decref(BOCCown *cown) { return cown_decref_inline(cown); } #define COWN_WEAK_DECREF(c) cown_weak_decref(c) @@ -1051,7 +1025,7 @@ static inline int_least64_t cown_incref_inline(BOCCown *cown) { return rc; } -/// @brief Out-of-line export consumed by other TUs (see @ref cown.h). +/// @brief Out-of-line export consumed by other TUs (see @ref boc_cown.h). int_least64_t cown_incref(BOCCown *cown) { return cown_incref_inline(cown); } // Rebind COWN_INCREF / COWN_DECREF to the inline forms so every @@ -1126,7 +1100,7 @@ static BOCCown *BOCCown_new(PyObject *value) { atomic_store(&cown->weak_rc, 1); cown_set_value(cown, value); assert(cown->value != NULL); - atomic_store(&cown->owner, get_interpid()); + atomic_store(&cown->owner, bocpy_interpid()); PRINTDBG("BOCCown_new(cid=%" PRIdLEAST64 ", value=", cown->id); PRINTOBJDBG(value); PRINTFDBG(")\n"); @@ -1383,7 +1357,7 @@ static int CownCapsule_init(PyObject *op, PyObject *args, /// @param set_error Whether to set an error message /// @return whether the currently running interpreter has acquired the cown static bool cown_check_acquired(BOCCown *cown, bool set_error) { - PY_INT64_T current_id = get_interpid(); + PY_INT64_T current_id = bocpy_interpid(); if (current_id != atomic_load(&cown->owner)) { if (set_error) { PyErr_SetString(PyExc_RuntimeError, @@ -1474,17 +1448,17 @@ static PyObject *CownCapsule_acquired(PyObject *op, /// @brief Attempts to acquire the cown /// @note On failure, the cown's owner is restored to its prior value: either -/// NO_OWNER (if deserialisation failed after the CAS succeeded) or the actual -/// owning interpreter (if the CAS itself failed). Callers can therefore rely -/// on the invariant that a -1 return never leaves the cown in a half-acquired -/// (owner=me, value=NULL, xidata non-NULL) state. This is required by the -/// worker-side recovery arm in `worker.run_behavior`, which calls -/// `behavior.release()` after an acquire failure. +/// BOCPY_NO_OWNER (if deserialisation failed after the CAS succeeded) or the +/// actual owning interpreter (if the CAS itself failed). Callers can therefore +/// rely on the invariant that a -1 return never leaves the cown in a +/// half-acquired (owner=me, value=NULL, xidata non-NULL) state. This is +/// required by the worker-side recovery arm in `worker.run_behavior`, which +/// calls `behavior.release()` after an acquire failure. /// @param cown The cown to acquire /// @return -1 if failure, 0 if success static int cown_acquire(BOCCown *cown) { - int_least64_t expected = NO_OWNER; - int_least64_t desired = get_interpid(); + int_least64_t expected = BOCPY_NO_OWNER; + int_least64_t desired = bocpy_interpid(); if (!atomic_compare_exchange_strong(&cown->owner, &expected, desired)) { if (expected == desired) { // already acquired by this interpreter @@ -1503,13 +1477,13 @@ static int cown_acquire(BOCCown *cown) { cown->value = xidata_to_object(cown->xidata, cown->pickled); if (cown->value == NULL) { - // Deserialisation failed. We CAS'd owner from NO_OWNER to desired above, - // so we must roll it back; otherwise the cown is permanently stuck in a - // (owner=me, value=NULL, xidata non-NULL) half-acquired state and any + // Deserialisation failed. We CAS'd owner from BOCPY_NO_OWNER to desired + // above, so we must roll it back; otherwise the cown is permanently stuck + // in a (owner=me, value=NULL, xidata non-NULL) half-acquired state and any // future acquire from any interpreter (including the worker-side // recovery arm) sees "already acquired by N" instead of being able to // retry. xidata stays in place for a future retry. - atomic_store(&cown->owner, (int_least64_t)NO_OWNER); + atomic_store(&cown->owner, (int_least64_t)BOCPY_NO_OWNER); return -1; } @@ -1541,10 +1515,10 @@ static PyObject *CownCapsule_acquire(PyObject *op, PyObject *Py_UNUSED(dummy)) { /// @param cown The cown to release /// @return -1 if error, 0 otherwise static int cown_release(BOCCown *cown) { - int_least64_t expected = get_interpid(); + int_least64_t expected = bocpy_interpid(); int_least64_t owner = atomic_load(&cown->owner); if (owner != expected) { - if (owner == NO_OWNER) { + if (owner == BOCPY_NO_OWNER) { // already released return 0; } @@ -1572,7 +1546,7 @@ static int cown_release(BOCCown *cown) { cown->pickled = Py_IsTrue(pickled); Py_CLEAR(cown->value); - int_least64_t desired = NO_OWNER; + int_least64_t desired = BOCPY_NO_OWNER; if (!atomic_compare_exchange_strong(&cown->owner, &expected, desired)) { // this should never happen PyErr_SetString(PyExc_RuntimeError, @@ -1602,13 +1576,13 @@ static PyObject *CownCapsule_release(PyObject *op, PyObject *Py_UNUSED(dummy)) { } /// @brief Abandons the cown value without serializing it -/// @details Clears the value and resets ownership to NO_OWNER. This is used -/// during worker cleanup to safely discard orphan cowns before the owning +/// @details Clears the value and resets ownership to BOCPY_NO_OWNER. This is +/// used during worker cleanup to safely discard orphan cowns before the owning /// interpreter is destroyed. /// @param cown The cown to disown /// @return -1 if error, 0 otherwise static int cown_disown(BOCCown *cown) { - int_least64_t expected = get_interpid(); + int_least64_t expected = bocpy_interpid(); int_least64_t owner = atomic_load(&cown->owner); if (owner != expected) { PyErr_Format(PyExc_RuntimeError, @@ -1623,7 +1597,7 @@ static int cown_disown(BOCCown *cown) { Py_CLEAR(cown->value); - int_least64_t desired = NO_OWNER; + int_least64_t desired = BOCPY_NO_OWNER; if (!atomic_compare_exchange_strong(&cown->owner, &expected, desired)) { PyErr_SetString(PyExc_RuntimeError, "Panic: contention on cown during disown"); @@ -1940,18 +1914,9 @@ static PyObject *_new_cown_object(XIDATA_T *xidata) { /// @brief Initialises an xidata that shares a cown. /// @param tstate the state of the current thread /// @param obj the CownCapsule object -/// @param fallback a fallback xidata method /// @param xidata the xidata object /// @return 0 if successful -static int _cown_shared( -#ifndef BOC_NO_MULTIGIL - PyThreadState *tstate, -#endif - PyObject *obj, XIDATA_T *xidata) { -#ifdef BOC_NO_MULTIGIL - PyThreadState *tstate = PyThreadState_GET(); -#endif - +XIDATA_GETDATA_FUNC(_cown_shared) { CownCapsuleObject *capsule = (CownCapsuleObject *)obj; BOCCown *cown = capsule->cown; @@ -2362,7 +2327,7 @@ static BOCMessage *boc_message_new(PyObject *tag, PyObject *contents) { /// consumer draining -- in practice this only happens for a tag /// where producers vastly outpace consumers. Behaviour dispatch /// does not go through a tag at all (it routes through per-worker -/// queues in @c sched.c). +/// queues in @c boc_sched.c). /// /// On overflow this returns -1 without setting a Python exception; the /// caller (typically @c behavior_resolve_one) reports the error. Once @@ -3347,7 +3312,7 @@ static int BehaviorCapsule_init(PyObject *op, PyObject *args, /// /// **Cown-side residue on dispatch failure.** When the count==0 /// transition fires here AND @c boc_sched_dispatch returns -1 -/// (runtime-down sentinel; see @c boc_sched_dispatch in @c sched.c), +/// (runtime-down sentinel; see @c boc_sched_dispatch in @c boc_sched.c), /// the behavior's BOCRequest array has already been linked onto every /// target cown's MCS chain by the link/finish 2PL phases. The /// rollback below DECREFs only the queue-owned BEHAVIOR_INCREF; it @@ -3670,11 +3635,11 @@ static PyObject *BehaviorCapsule_set_exception(PyObject *op, PyObject *args) { /// @brief Mark a never-executed behavior's result Cown with a drop exception. /// @details For behaviors drained during stop() that never had a chance to /// run. The result Cown is in the published-and-released state -/// (owner=NO_OWNER, xidata=set, value=NULL) that ``Cown(None)``'s +/// (owner=BOCPY_NO_OWNER, xidata=set, value=NULL) that ``Cown(None)``'s /// constructor leaves it in. Mirrors the worker exception path /// (``worker.py``: acquire → set_exception → release) but condensed into /// one C call: cown_acquire takes ownership on the main thread, the -/// exception is stored, then cown_release pickles back to NO_OWNER so a +/// exception is stored, then cown_release pickles back to BOCPY_NO_OWNER so a /// caller awaiting ``cown.value`` / ``cown.exception`` after stop() /// observes a clear diagnostic instead of a permanent ``None``. /// @param op The BehaviorCapsule object @@ -3951,15 +3916,7 @@ static PyObject *_new_behavior_object(XIDATA_T *xidata) { return (PyObject *)capsule; } -static int _behavior_shared( -#ifndef BOC_NO_MULTIGIL - PyThreadState *tstate, -#endif - PyObject *obj, XIDATA_T *xidata) { -#ifdef BOC_NO_MULTIGIL - PyThreadState *tstate = PyThreadState_GET(); -#endif - +XIDATA_GETDATA_FUNC(_behavior_shared) { BehaviorCapsuleObject *capsule = (BehaviorCapsuleObject *)obj; BOCBehavior *behavior = capsule->behavior; @@ -4592,7 +4549,7 @@ static PyObject *_core_scheduler_runtime_start(PyObject *Py_UNUSED(module), // are zero-initialised so every refcount / cown-array field is the // safe NULL state, and `is_token = 1` discriminates them at the // worker-pop site. Allocation lives here (and not in - // `boc_sched_init`) because `sched.c` deliberately treats + // `boc_sched_init`) because `boc_sched.c` deliberately treats // `BOCBehavior` as opaque. for (Py_ssize_t i = 0; i < (Py_ssize_t)n; ++i) { BOCBehavior *token = (BOCBehavior *)PyMem_RawCalloc(1, sizeof(BOCBehavior)); @@ -4959,9 +4916,6 @@ static PyMethodDef _core_module_methods[] = { {"noticeboard_cache_clear", _core_noticeboard_cache_clear, METH_NOARGS, "noticeboard_cache_clear($module, /)" "\n--\n\nClears the thread-local snapshot cache."}, - {"noticeboard_version", _core_noticeboard_version, METH_NOARGS, - "noticeboard_version($module, /)" - "\n--\n\nReturns the global noticeboard version counter."}, {"set_noticeboard_thread", _core_set_noticeboard_thread, METH_NOARGS, "set_noticeboard_thread($module, /)" "\n--\n\nRegisters the calling thread as the noticeboard mutator " diff --git a/src/bocpy/_internal_test.c b/src/bocpy/_internal_test.c index 32953e9..ed46957 100644 --- a/src/bocpy/_internal_test.c +++ b/src/bocpy/_internal_test.c @@ -14,7 +14,7 @@ /// (`_internal_test_bq.c`). /// /// The module deliberately does NOT link against `_core` or `_math`. -/// It links only the units it tests (`compat.c`, `sched.c`) so the +/// It links only the units it tests (`boc_compat.c`, `boc_sched.c`) so the /// test surface stays minimal and there is no sub-interpreter /// machinery in the way of the test threads. diff --git a/src/bocpy/_internal_test_atomics.c b/src/bocpy/_internal_test_atomics.c index 0550285..7f24c6b 100644 --- a/src/bocpy/_internal_test_atomics.c +++ b/src/bocpy/_internal_test_atomics.c @@ -1,7 +1,7 @@ /// @file _internal_test_atomics.c /// @brief Atomics-domain tests for the `bocpy._internal_test` extension. /// -/// Exposes the typed `boc_atomic_*_explicit` API from `compat.h` to +/// Exposes the typed `boc_atomic_*_explicit` API from `boc_compat.h` to /// Python so `test/test_compat_atomics.py` can drive the inline /// atomic primitives from real Python threads (which give us true /// parallelism either via free-threaded CPython or via @@ -18,7 +18,7 @@ #include #include -#include "compat.h" +#include "boc_compat.h" // Single shared block of atomic slots, accessed by every test entry // point through a PyCapsule handle. Cacheline-sized (64B) to avoid diff --git a/src/bocpy/_internal_test_bq.c b/src/bocpy/_internal_test_bq.c index 64246c6..59bcde0 100644 --- a/src/bocpy/_internal_test_bq.c +++ b/src/bocpy/_internal_test_bq.c @@ -2,7 +2,7 @@ /// @brief BQ-domain (Verona MPMC behaviour queue) tests for /// `bocpy._internal_test`. /// -/// Exposes the `boc_bq_*` API from `sched.h` to Python so +/// Exposes the `boc_bq_*` API from `boc_sched.h` to Python so /// `test/test_internal_mpmcq.py` can stress the queue from multiple /// real threads. Methods are registered on the `bocpy._internal_test` /// module under the `bq_*` prefix. @@ -21,8 +21,8 @@ #include #include -#include "compat.h" -#include "sched.h" +#include "boc_compat.h" +#include "boc_sched.h" // --------------------------------------------------------------------------- // Node and queue capsule helpers @@ -213,7 +213,7 @@ static PyObject *bq_dequeue_all(PyObject *Py_UNUSED(self), PyObject *args) { } // Walk the segment via segment_take_one. take_one returns NULL for // three reasons (mpmcq.h:67-89, also documented at - // sched.c::boc_sched_steal): + // boc_sched.c::boc_sched_steal): // 1. fully empty (impossible here — guarded above), // 2. singleton segment (end == &start->next_in_queue) — append // start as the tail and return, diff --git a/src/bocpy/_internal_test_wsq.c b/src/bocpy/_internal_test_wsq.c index e6576d4..fba715a 100644 --- a/src/bocpy/_internal_test_wsq.c +++ b/src/bocpy/_internal_test_wsq.c @@ -2,7 +2,7 @@ /// @brief WSQ-domain (work-stealing queue cursor + spread) tests for /// `bocpy._internal_test`. /// -/// Exposes the inline `boc_wsq_*` helpers from `sched.h` so +/// Exposes the inline `boc_wsq_*` helpers from `boc_sched.h` so /// `test/test_internal_wsq.py` can verify the cursor-wrap arithmetic /// and the `enqueue_spread` distribution invariant directly, without /// going through the full scheduler runtime. @@ -28,8 +28,8 @@ #include #include -#include "compat.h" -#include "sched.h" +#include "boc_compat.h" +#include "boc_sched.h" // --------------------------------------------------------------------------- // Worker fixture capsule diff --git a/src/bocpy/_math.c b/src/bocpy/_math.c index 058b105..2468c42 100644 --- a/src/bocpy/_math.c +++ b/src/bocpy/_math.c @@ -7,21 +7,12 @@ #include #include -#include "compat.h" -#include "xidata.h" +#include #ifndef _WIN32 #include #endif -/// @brief Convenience method to obtain the interpreter ID -/// @return the ID of the currently running interpreter -static inline PY_INT64_T get_interpid() { - PyThreadState *ts = PyThreadState_GET(); - PyInterpreterState *is = PyThreadState_GetInterpreter(ts); - return PyInterpreterState_GetID(is); -} - /// @brief Underlying C-based matrix implementation typedef struct boc_matrix_impl { /// @brief The raw double values of the matrix @@ -119,7 +110,7 @@ static matrix_impl *impl_new(size_t rows, size_t columns) { matrix->rows = rows; matrix->columns = columns; - atomic_store(&matrix->owner, get_interpid()); + atomic_store(&matrix->owner, bocpy_interpid()); atomic_store(&matrix->rc, 0); if (update_row_ptrs(matrix) < 0) { @@ -669,7 +660,7 @@ static matrix_impl *impl_new_from_sequence(PyObject *sequence, bool as_column) { } static bool impl_check_acquired(matrix_impl *matrix, bool set_error) { - PY_INT64_T current_id = get_interpid(); + PY_INT64_T current_id = bocpy_interpid(); if (current_id != atomic_load(&matrix->owner)) { if (set_error) { PyErr_SetString(PyExc_RuntimeError, @@ -2386,8 +2377,6 @@ static PyType_Spec Matrix_Spec = {.name = "bocpy._math.Matrix", Py_TPFLAGS_IMMUTABLETYPE, .slots = Matrix_slots}; -const PY_INT64_T NO_OWNER = -2; - /// @brief Wraps a matrix sent from another interpreter. /// @details The underlying C matrix, when it arrives at another interpreter, is /// wrapped by this method in a MatrixObject so that it can be used from that @@ -2398,8 +2387,8 @@ static PyObject *_new_matrix_object(XIDATA_T *xidata) { matrix_impl *impl = (matrix_impl *)xidata->data; // take ownership of the C matrix - int_least64_t expected = NO_OWNER; - int_least64_t desired = get_interpid(); + int_least64_t expected = BOCPY_NO_OWNER; + int_least64_t desired = bocpy_interpid(); if (!atomic_compare_exchange_strong(&impl->owner, &expected, desired)) { PyErr_Format(PyExc_RuntimeError, "%" PRIdLEAST64 @@ -2415,7 +2404,7 @@ static PyObject *_new_matrix_object(XIDATA_T *xidata) { if (matrix == NULL) { // attempt to roll back the ownership change int_least64_t rollback_expected = desired; - desired = NO_OWNER; + desired = BOCPY_NO_OWNER; atomic_compare_exchange_strong(&impl->owner, &rollback_expected, desired); return NULL; } @@ -2428,30 +2417,22 @@ static PyObject *_new_matrix_object(XIDATA_T *xidata) { } /// @brief Prepare the underlying C matrix for sharing with another interpreter. -/// @param tstate The thread state of the current interpreter (> 3.11) +/// @param tstate The thread state of the current interpreter /// @param obj The MatrixObject instance /// @param xidata An empty xidata package -/// @return 0 if successful, < o on error -static int _matrix_shared( -#ifndef BOC_NO_MULTIGIL - PyThreadState *tstate, -#endif - PyObject *obj, XIDATA_T *xidata) { -#ifdef BOC_NO_MULTIGIL - PyThreadState *tstate = PyThreadState_GET(); -#endif - +/// @return 0 if successful, < 0 on error +XIDATA_GETDATA_FUNC(_matrix_shared) { MatrixObject *matrix = (MatrixObject *)obj; matrix_impl *impl = matrix->impl; // put the underlying C matrix in an ownerless state during transport - int_least64_t expected = get_interpid(); - int_least64_t desired = NO_OWNER; + int_least64_t expected = bocpy_interpid(); + int_least64_t desired = BOCPY_NO_OWNER; if (!atomic_compare_exchange_strong(&impl->owner, &expected, desired)) { PyErr_Format(PyExc_RuntimeError, "%" PRIdLEAST64 " cannot release matrix (acquired by %" PRIdLEAST64 ")", - get_interpid(), expected); + bocpy_interpid(), expected); return -1; } diff --git a/src/bocpy/behaviors.py b/src/bocpy/behaviors.py index aa902e4..8f41aa6 100644 --- a/src/bocpy/behaviors.py +++ b/src/bocpy/behaviors.py @@ -240,14 +240,39 @@ def __init__(self, num_workers: Optional[int]): self.final_cowns: tuple[Cown, ...] = () self.bid = 0 - def lookup_behavior(self, line_number: int) -> BehaviorInfo: - """Resolve behavior info from a source line number.""" + def lookup_behavior(self, line_number: int, max_decorator_stack=32) -> BehaviorInfo: + """Resolve behavior info from a source line number. + + ``behavior_lookup`` is keyed by the line of the ``@when(...)`` + decorator as it appears in the AST. The runtime frame line we + get from ``inspect.currentframe().f_back.f_lineno`` depends on + the CPython version: + + - Python >= 3.11 attributes each decorator's application to + that decorator's own source line, so the frame line equals + the lookup key. + - Python <= 3.10 attributes all decorator applications on a + ``def`` to the ``def`` line itself, so the frame line is + ``def_line``, which is ``len(decorators)`` greater than the + ``@when`` decorator's line. + + Walking from ``line_number`` downward to the largest key + ``<= line_number`` covers both cases for any decorator stack + height. We bound the walk so a stale frame deep in unrelated + code cannot silently mis-resolve to a distant earlier + behavior. + """ if line_number in self.behavior_lookup: return self.behavior_lookup[line_number] - # 3.10: Might be off by one - if line_number - 1 in self.behavior_lookup: - return self.behavior_lookup[line_number - 1] + # Bound the backward search: a decorator stack of depth N + # leaves the @when line N below the def line in 3.10, but + # realistic stacks are tiny. 32 is plenty and still small + # enough to catch a stale-frame mis-resolution before it + # silently returns the wrong behavior. + for offset in range(1, max_decorator_stack + 1): + if line_number - offset in self.behavior_lookup: + return self.behavior_lookup[line_number - offset] return None @@ -1117,6 +1142,10 @@ def when(*cowns): The function itself will be replaced by a Cown which will hold the result of executing the behavior. This Cown can be used for further coordination. + + Note: the transpiler matches ``@when`` by literal name. Aliasing + the import (``from bocpy import when as boc_when``) is not + supported -- the rewrite will not fire and the worker will fail. """ def when_factory(func): @@ -1464,35 +1493,7 @@ def notice_read(key: str, default: Any = None) -> Any: return _core.noticeboard_snapshot().get(key, default) -def noticeboard_version() -> int: - """Return the current noticeboard version counter. - - The counter is incremented every time the noticeboard is - successfully written, updated, or cleared. Two reads returning the - same value mean no commit happened between them; a strictly larger - value means at least one commit happened. - - The counter is global (across all threads and interpreters) and - monotonic. Useful as a *hint* for detecting noticeboard changes - without taking a full snapshot — for example, polling for any - change before deciding whether to refresh a derived view. - - .. note:: - - This is *not* a synchronization primitive. Because - :func:`notice_write`, :func:`notice_update`, and - :func:`notice_delete` are fire-and-forget, the version may not - have advanced yet when a behavior that depends on a write - observes the noticeboard. For strict read-your-writes ordering, - use :func:`notice_sync`. - - :return: The current noticeboard version. - :rtype: int - """ - return _core.noticeboard_version() - - -def notice_sync(timeout: Optional[float] = 30.0) -> int: +def notice_sync(timeout: Optional[float] = 30.0) -> None: """Block until the caller's prior noticeboard mutations are committed. Because :func:`notice_write`, :func:`notice_update`, and @@ -1514,8 +1515,6 @@ def notice_sync(timeout: Optional[float] = 30.0) -> int: :raises TimeoutError: If the noticeboard thread does not drain the caller's sentinel within *timeout* seconds. :raises RuntimeError: If the runtime is not started. - :return: The :func:`noticeboard_version` after the flush. - :rtype: int """ if _core.is_primary() and BEHAVIORS is None: raise RuntimeError("cannot notice_sync before the runtime is started") @@ -1523,4 +1522,3 @@ def notice_sync(timeout: Optional[float] = 30.0) -> int: _core.send("boc_noticeboard", ("sync", seq)) if not _core.notice_sync_wait(seq, timeout): raise TimeoutError(f"notice_sync({timeout}s) timed out waiting for seq={seq}") - return _core.noticeboard_version() diff --git a/src/bocpy/compat.c b/src/bocpy/boc_compat.c similarity index 90% rename from src/bocpy/compat.c rename to src/bocpy/boc_compat.c index 312edaa..daf1990 100644 --- a/src/bocpy/compat.c +++ b/src/bocpy/boc_compat.c @@ -1,16 +1,21 @@ -/// @file compat.c +/// @file boc_compat.c /// @brief Out-of-line definitions for the cross-platform shims declared in -/// `compat.h`. +/// `boc_compat.h`. /// /// On POSIX the C11 `` machinery is fully header-only, so this /// translation unit is essentially empty there. On MSVC the `atomic_*` /// functions on `int_least64_t` are kept as out-of-line definitions /// (linked into `_core.o` and `_math.o` from `compat.o`). -#include "compat.h" +#include "boc_compat.h" #ifdef _WIN32 +/* The bytes between @atomic-bodies-begin and @atomic-bodies-end must + * be byte-identical to the marker region in + * src/bocpy/include/bocpy/bocpy_msvc.c (enforced by + * test_msvc_bodies_in_lockstep). */ +/* @atomic-bodies-begin */ int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value) { #if defined(_M_IX86) int_least64_t old = *ptr; @@ -25,14 +30,6 @@ int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value) { #endif } -int_least64_t atomic_fetch_sub(atomic_int_least64_t *ptr, int_least64_t value) { -#if defined(_M_IX86) - return atomic_fetch_add(ptr, -value); -#else - return InterlockedExchangeAdd64(ptr, -value); -#endif -} - bool atomic_compare_exchange_strong(atomic_int_least64_t *ptr, atomic_int_least64_t *expected, int_least64_t desired) { @@ -50,35 +47,49 @@ int_least64_t atomic_load(atomic_int_least64_t *ptr) { #if defined(_M_IX86) return InterlockedCompareExchange64(ptr, 0, 0); #else - return *ptr; + /* Seq-cst load. Plain `*ptr` is acquire/release at best on x64 + * and gives no ordering on ARM64; InterlockedOr64(ptr, 0) is a + * full barrier on every supported MSVC target. */ + return InterlockedOr64(ptr, 0); #endif } -int_least64_t atomic_exchange(atomic_int_least64_t *ptr, int_least64_t value) { +void atomic_store(atomic_int_least64_t *ptr, int_least64_t value) { #if defined(_M_IX86) int_least64_t old = *ptr; for (;;) { int_least64_t prev = InterlockedCompareExchange64(ptr, value, old); if (prev == old) - return old; + return; old = prev; } #else - return InterlockedExchange64(ptr, value); + /* Seq-cst store. Plain `*ptr = value` does not forbid StoreLoad + * reordering on x64/ARM64; InterlockedExchange64 is a full barrier. */ + (void)InterlockedExchange64(ptr, value); #endif } +/* @atomic-bodies-end */ -void atomic_store(atomic_int_least64_t *ptr, int_least64_t value) { +int_least64_t atomic_fetch_sub(atomic_int_least64_t *ptr, int_least64_t value) { +#if defined(_M_IX86) + return atomic_fetch_add(ptr, -value); +#else + return InterlockedExchangeAdd64(ptr, -value); +#endif +} + +int_least64_t atomic_exchange(atomic_int_least64_t *ptr, int_least64_t value) { #if defined(_M_IX86) int_least64_t old = *ptr; for (;;) { int_least64_t prev = InterlockedCompareExchange64(ptr, value, old); if (prev == old) - return; + return old; old = prev; } #else - *ptr = value; + return InterlockedExchange64(ptr, value); #endif } @@ -90,7 +101,7 @@ void thrd_sleep(const struct timespec *duration, struct timespec *remaining) { } // --------------------------------------------------------------------------- -// Physical CPU detection (Windows arm). See compat.h for contract. +// Physical CPU detection (Windows arm). See boc_compat.h for contract. // --------------------------------------------------------------------------- int boc_physical_cpu_count(void) { @@ -128,7 +139,7 @@ int boc_physical_cpu_count(void) { #elif defined(__APPLE__) // --------------------------------------------------------------------------- -// Physical CPU detection (macOS arm). See compat.h for contract. +// Physical CPU detection (macOS arm). See boc_compat.h for contract. // --------------------------------------------------------------------------- #include @@ -152,7 +163,7 @@ int boc_physical_cpu_count(void) { #else // assume Linux / glibc-compatible // --------------------------------------------------------------------------- -// Physical CPU detection (Linux arm). See compat.h for contract. +// Physical CPU detection (Linux arm). See boc_compat.h for contract. // --------------------------------------------------------------------------- #include diff --git a/src/bocpy/compat.h b/src/bocpy/boc_compat.h similarity index 99% rename from src/bocpy/compat.h rename to src/bocpy/boc_compat.h index 0b22207..c98b381 100644 --- a/src/bocpy/compat.h +++ b/src/bocpy/boc_compat.h @@ -1,14 +1,14 @@ -/// @file compat.h +/// @file boc_compat.h /// @brief Cross-platform portability shims for bocpy C extensions. /// /// Centralises the platform-specific atomic, mutex, condition-variable, /// thread-local, sleep, and monotonic-time primitives used by `_core.c`, -/// `_math.c`, and `sched.c`. +/// `_math.c`, and `boc_sched.c`. /// /// **Linkage:** all heavy-weight platform primitives are exposed as /// `static inline` wrappers around the platform's native API, except for /// the MSVC `atomic_*` functions on `int_least64_t` (kept as out-of-line -/// definitions in `compat.c` to preserve their original symbol shape). +/// definitions in `boc_compat.c` to preserve their original symbol shape). /// /// Also exposes the `boc_atomic_*_explicit` typed atomics API that the /// work-stealing scheduler depends on for ARM64-correct memory ordering diff --git a/src/bocpy/cown.h b/src/bocpy/boc_cown.h similarity index 93% rename from src/bocpy/cown.h rename to src/bocpy/boc_cown.h index 5ff10ec..cfb1a74 100644 --- a/src/bocpy/cown.h +++ b/src/bocpy/boc_cown.h @@ -1,8 +1,8 @@ -/// @file cown.h +/// @file boc_cown.h /// @brief Minimal cross-TU surface for the cown refcount API. /// /// This header exists so that translation units other than `_core.c` -/// (for now: `noticeboard.c`) can hold strong references to a +/// (for now: `boc_noticeboard.c`) can hold strong references to a /// `BOCCown` without needing to know its layout. The full struct /// definition and the implementation of @ref cown_incref / @ref /// cown_decref live in `_core.c`. The per-call cost of the indirect diff --git a/src/bocpy/noticeboard.c b/src/bocpy/boc_noticeboard.c similarity index 98% rename from src/bocpy/noticeboard.c rename to src/bocpy/boc_noticeboard.c index 77fe629..7c6f4be 100644 --- a/src/bocpy/noticeboard.c +++ b/src/bocpy/boc_noticeboard.c @@ -1,7 +1,7 @@ -/// @file noticeboard.c +/// @file boc_noticeboard.c /// @brief Implementation of the global noticeboard subsystem. /// -/// See @ref noticeboard.h for the public API and the thread/PyErr +/// See @ref boc_noticeboard.h for the public API and the thread/PyErr /// discipline. This TU owns: /// /// - The fixed-capacity entry table @c NB plus its mutex. @@ -12,7 +12,7 @@ /// - The notice_sync barrier primitives (@c NB_SYNC_REQUESTED, /// @c NB_SYNC_PROCESSED, @c NB_SYNC_MUTEX, @c NB_SYNC_COND). -#include "noticeboard.h" +#include "boc_noticeboard.h" #include @@ -133,7 +133,7 @@ void noticeboard_destroy(void) { mtx_destroy(&NB.mutex); // NB_SYNC_MUTEX / NB_SYNC_COND are SRWLOCK / CONDITION_VARIABLE on // Windows (no destroy needed) and pthread / mtx_t on POSIX (handled - // by mtx_destroy / cnd_destroy in compat.h shims). The original + // by mtx_destroy / cnd_destroy in boc_compat.h shims). The original // _core.c module-free path never destroyed these; preserve that // behaviour to keep the symbol-additions-only invariant. } @@ -190,8 +190,6 @@ void noticeboard_drop_local_cache(void) { void noticeboard_cache_clear_for_behavior(void) { NB_VERSION_CHECKED = false; } -int_least64_t noticeboard_version(void) { return atomic_load(&NB_VERSION); } - // --------------------------------------------------------------------------- // Pin helper. // --------------------------------------------------------------------------- diff --git a/src/bocpy/noticeboard.h b/src/bocpy/boc_noticeboard.h similarity index 94% rename from src/bocpy/noticeboard.h rename to src/bocpy/boc_noticeboard.h index 0d097c6..1352cea 100644 --- a/src/bocpy/noticeboard.h +++ b/src/bocpy/boc_noticeboard.h @@ -1,4 +1,4 @@ -/// @file noticeboard.h +/// @file boc_noticeboard.h /// @brief Public API for the global cross-behavior key-value noticeboard. /// /// The noticeboard is a fixed-capacity table (max @ref NB_MAX_ENTRIES @@ -14,13 +14,13 @@ /// Python-level read-modify-write helpers (e.g. @c notice_update). /// Snapshot reads (@ref noticeboard_snapshot) are unrestricted — /// readers cache the result thread-locally and revalidate against -/// @ref noticeboard_version once per behavior boundary. +/// the version counter once per behavior boundary. /// /// **PyErr discipline.** Functions that interact with the Python C /// API (@ref noticeboard_snapshot, @ref nb_pin_cowns, /// @ref noticeboard_write, @ref noticeboard_delete) set a Python /// exception and return -1 / NULL on failure. Functions that are -/// pure C (@ref noticeboard_clear, @ref noticeboard_version, +/// pure C (@ref noticeboard_clear, /// @ref notice_sync_*) cannot fail. #ifndef BOCPY_NOTICEBOARD_H @@ -32,9 +32,9 @@ #include #include -#include "compat.h" -#include "cown.h" -#include "xidata.h" +#include "boc_compat.h" +#include "boc_cown.h" +#include /// @brief Maximum number of entries the noticeboard can hold. #define NB_MAX_ENTRIES 64 @@ -75,13 +75,10 @@ void noticeboard_drop_local_cache(void); /// @brief Mark the calling thread's cache as needing one version check. /// @details Called by the worker loop at every behavior boundary so /// the next @ref noticeboard_snapshot in this thread does exactly one -/// atomic load against @ref noticeboard_version before reusing the +/// atomic load against the version counter before reusing the /// cached proxy. Cheaper than dropping the cache outright. void noticeboard_cache_clear_for_behavior(void); -/// @brief Read the noticeboard's monotonic version counter. -int_least64_t noticeboard_version(void); - /// @brief Walk a Python sequence of integer cown pointers, returning the /// underlying @ref BOCCown array. /// @details Each pointer in @p cowns is interpreted as a raw @@ -125,7 +122,7 @@ int noticeboard_write(const char *key, Py_ssize_t key_len, XIDATA_T *xidata, int noticeboard_delete(const char *key, Py_ssize_t key_len); /// @brief Drop every entry, freeing XIData and pins. -/// @details Bumps @ref noticeboard_version. Cannot fail. +/// @details Bumps the version counter. Cannot fail. void noticeboard_clear(void); /// @brief Build (or reuse) the calling thread's read-only snapshot proxy. diff --git a/src/bocpy/sched.c b/src/bocpy/boc_sched.c similarity index 99% rename from src/bocpy/sched.c rename to src/bocpy/boc_sched.c index 941f061..63545dc 100644 --- a/src/bocpy/sched.c +++ b/src/bocpy/boc_sched.c @@ -1,4 +1,4 @@ -// sched.c — Work-stealing scheduler. +// boc_sched.c — Work-stealing scheduler. // // Owns the per-worker MPMC queues, parking protocol, work-stealing, // and per-worker fairness tokens. @@ -9,7 +9,7 @@ // `incarnation` counter; pause/unpause epoch protocol), and // `core.h` (fairness token). -#include "sched.h" +#include "boc_sched.h" #include #include @@ -195,7 +195,7 @@ bool boc_bq_is_empty(boc_bq_t *q) { // Per-worker scheduler state // =========================================================================== -// The per-worker struct (`boc_sched_worker_t`) is defined in `sched.h` +// The per-worker struct (`boc_sched_worker_t`) is defined in `boc_sched.h` // so dispatch and pop call sites can refer to its fields without an // extra indirection. Cacheline padding and `static_assert`s live with // the type definition. @@ -239,12 +239,12 @@ static boc_atomic_u64_t INCARNATION = 0; // Each scheduler-aware thread (worker sub-interpreter, or any other // thread that calls boc_sched_dispatch from a worker context) keeps // its dispatch state in TLS slots rather than in `boc_sched_worker_t` -// fields. The bocpy precedent: this matches `noticeboard.c`'s +// fields. The bocpy precedent: this matches `boc_noticeboard.c`'s // `nb_cache_*` thread-locals. Verona equivalent: the same fields // are members of `SchedulerThread`, which is itself one-per-OS-thread // — TLS is the same effect with one fewer indirection. // -// All slots use the `compat.h` `thread_local` macro (`_Thread_local` +// All slots use the `boc_compat.h` `thread_local` macro (`_Thread_local` // on POSIX, `__declspec(thread)` on MSVC) with the **default** TLS // model. @@ -360,7 +360,7 @@ int boc_sched_init(Py_ssize_t worker_count) { // per-interpreter, so an allocation made in interpreter A would // be invalid (and unfreeable) from interpreter B. The raw // allocator is process-wide and GIL-independent. Zero-init gives - // every counter, every typed atomic slot (compat.h + // every counter, every typed atomic slot (boc_compat.h // `boc_atomic_*_t` are layout-compatible with the underlying // scalar; zero is the well-defined "false" / NULL / 0 state on // every supported platform), and every reserved slot the correct @@ -374,7 +374,7 @@ int boc_sched_init(Py_ssize_t worker_count) { // Per-worker non-trivial initialisation: bq queue, mutex, // condvar, owner-interp placeholder, and the ring-link. - // Mutex and condvar wrappers come from `compat.h` (pthread on + // Mutex and condvar wrappers come from `boc_compat.h` (pthread on // POSIX, SRWLock / CONDITION_VARIABLE on MSVC). for (Py_ssize_t i = 0; i < worker_count; ++i) { boc_sched_worker_t *w = &WORKERS[i]; @@ -1300,7 +1300,7 @@ static boc_bq_node_t *boc_sched_steal(boc_sched_worker_t *self) { } // Brief sleep so two concurrently-failing thieves do not pin - // their cores. Using `boc_sleep_ns` (compat.h) rather than + // their cores. Using `boc_sleep_ns` (boc_compat.h) rather than // `sched_yield` because we want a hard backoff: a yield is // ineffective when there is no other runnable thread (the // case during quiescence). diff --git a/src/bocpy/sched.h b/src/bocpy/boc_sched.h similarity index 99% rename from src/bocpy/sched.h rename to src/bocpy/boc_sched.h index 1b4b0a2..10e2898 100644 --- a/src/bocpy/sched.h +++ b/src/bocpy/boc_sched.h @@ -1,4 +1,4 @@ -/// @file sched.h +/// @file boc_sched.h /// @brief Work-stealing scheduler: per-worker MPMC queues, parking, stats. /// /// This translation unit owns: @@ -22,7 +22,7 @@ #include -#include "compat.h" +#include "boc_compat.h" // --------------------------------------------------------------------------- // Verona MPMC behaviour queue (`boc_bq_*`) @@ -47,7 +47,7 @@ typedef struct boc_bq_node { /// @brief Intrusive forward link, payload type /// `struct boc_bq_node *` stored in a `boc_atomic_ptr_t` slot for - /// MSVC compatibility (see `compat.h`). + /// MSVC compatibility (see `boc_compat.h`). /// @details Reads use @c BOC_MO_ACQUIRE (mpmcq.h:78,145); writes /// use @c BOC_MO_RELEASE (mpmcq.h:113,174) or @c BOC_MO_RELAXED /// (mpmcq.h:103,131) per Verona. @@ -388,10 +388,10 @@ typedef struct boc_sched_stats_atomic { // // Holds the per-worker MPMC queue, the fairness-token slot // (`token_work` / `should_steal_for_fairness`), the parking-protocol -// `cv_mu` / `cv` pair (`compat.h` `BOCMutex` / `BOCCond`, pthread on +// `cv_mu` / `cv` pair (`boc_compat.h` `BOCMutex` / `BOCCond`, pthread on // POSIX, SRWLock on MSVC), the ring-link `next_in_ring` pointer, the // per-worker counter block, and a reserved terminator-delta slot. -// Atomics use the typed `compat.h` shim (`boc_atomic_*_t` + +// Atomics use the typed `boc_compat.h` shim (`boc_atomic_*_t` + // `boc_atomic_*_explicit`) so the layout compiles identically on POSIX // and MSVC ARM64. // @@ -468,7 +468,7 @@ struct boc_sched_worker_payload_ { /// - @c owner_interp_id: sub-interpreter id of the worker that /// called `boc_sched_worker_register` for this slot. Used for /// wrong-thread asserts in `pop`. -/// - @c cv_mu / @c cv: parking-protocol mutex/condvar (compat.h +/// - @c cv_mu / @c cv: parking-protocol mutex/condvar (boc_compat.h /// wrappers). /// - @c next_in_ring: forms a circular singly-linked ring over /// @ref boc_sched_worker_count workers; immutable after @@ -657,7 +657,7 @@ Py_ssize_t boc_sched_worker_count(void); /// @details Returns a non-owning pointer into the @c WORKERS array /// for use with the @c boc_bq_* primitives (e.g. orphan-drain on /// shutdown calls @c boc_bq_dequeue(&boc_sched_worker_at(i)->q) -/// to walk each per-task queue from outside @c sched.c). The +/// to walk each per-task queue from outside @c boc_sched.c). The /// returned pointer is invalidated by @ref boc_sched_shutdown. /// @param worker_index Zero-based worker slot. /// @return Borrowed worker pointer, or NULL if @p worker_index is @@ -741,7 +741,7 @@ Py_ssize_t boc_sched_worker_register(void); /// owning behaviour via the standard `container_of` arithmetic /// (see @c BEHAVIOR_FROM_BQ_NODE in @c _core.c). Keeping the /// scheduler decoupled from the @c BOCBehavior layout avoids a -/// circular header dependency between @c sched.h and +/// circular header dependency between @c boc_sched.h and /// @c _core.c's behaviour struct. boc_bq_node_t *boc_sched_worker_pop_slow(boc_sched_worker_t *self); diff --git a/src/bocpy/tags.c b/src/bocpy/boc_tags.c similarity index 95% rename from src/bocpy/tags.c rename to src/bocpy/boc_tags.c index e3bfdd1..c0395c1 100644 --- a/src/bocpy/tags.c +++ b/src/bocpy/boc_tags.c @@ -1,8 +1,8 @@ -/// @file tags.c +/// @file boc_tags.c /// @brief Out-of-line implementations for the message-tag API. /// /// Hot-path operations (incref / decref / disable check) are -/// `static inline` in `tags.h`; this TU houses the cold helpers +/// `static inline` in `boc_tags.h`; this TU houses the cold helpers /// (alloc / free / unicode bridges / comparisons). #define PY_SSIZE_T_CLEAN @@ -10,7 +10,7 @@ #include #include -#include "tags.h" +#include "boc_tags.h" BOCTag *tag_from_PyUnicode(PyObject *unicode, BOCQueue *queue) { if (!PyUnicode_CheckExact(unicode)) { diff --git a/src/bocpy/tags.h b/src/bocpy/boc_tags.h similarity index 98% rename from src/bocpy/tags.h rename to src/bocpy/boc_tags.h index 7bd6e2c..c2dd774 100644 --- a/src/bocpy/tags.h +++ b/src/bocpy/boc_tags.h @@ -1,4 +1,4 @@ -/// @file tags.h +/// @file boc_tags.h /// @brief Message-tag table API shared between TUs. /// /// A `BOCTag` names a message stream and pins one of the 16 fixed @@ -19,7 +19,7 @@ #include -#include "compat.h" +#include "boc_compat.h" /// @brief Forward declaration. Body defined in `_core.c` (later /// `message_queue.h`); tags only carry a pointer. diff --git a/src/bocpy/terminator.c b/src/bocpy/boc_terminator.c similarity index 96% rename from src/bocpy/terminator.c rename to src/bocpy/boc_terminator.c index c005990..940b1e8 100644 --- a/src/bocpy/terminator.c +++ b/src/bocpy/boc_terminator.c @@ -1,13 +1,13 @@ -/// @file terminator.c +/// @file boc_terminator.c /// @brief Implementation of the process-global rundown counter. /// /// All state lives in file-scope statics so that every sub-interpreter /// in the same process shares one counter, mutex, and condvar. See -/// `terminator.h` for the public API and lifecycle contract. +/// `boc_terminator.h` for the public API and lifecycle contract. -#include "terminator.h" +#include "boc_terminator.h" -#include "compat.h" +#include "boc_compat.h" /// @brief Active behavior count + the Pyrona seed. static atomic_int_least64_t TERMINATOR_COUNT = 0; diff --git a/src/bocpy/terminator.h b/src/bocpy/boc_terminator.h similarity index 97% rename from src/bocpy/terminator.h rename to src/bocpy/boc_terminator.h index b4ad4f6..8165cce 100644 --- a/src/bocpy/terminator.h +++ b/src/bocpy/boc_terminator.h @@ -1,4 +1,4 @@ -/// @file terminator.h +/// @file boc_terminator.h /// @brief Process-global rundown counter API shared between TUs. /// /// The terminator is the C-level barrier that gates `Behaviors.wait()` / @@ -8,7 +8,7 @@ /// the count positive between the runtime starting and `stop()` taking /// it down via @ref terminator_seed_dec. /// -/// State is process-global (file-scope statics in `terminator.c`, NOT +/// State is process-global (file-scope statics in `boc_terminator.c`, NOT /// per-interpreter) so every sub-interpreter sees the same counter, /// mutex, and condvar. /// diff --git a/src/bocpy/include/bocpy/bocpy.h b/src/bocpy/include/bocpy/bocpy.h new file mode 100644 index 0000000..1f68358 --- /dev/null +++ b/src/bocpy/include/bocpy/bocpy.h @@ -0,0 +1,89 @@ +/// @file bocpy.h +/// @brief Public C ABI umbrella header for the bocpy package. +/// +/// This header is the single supported include for downstream C +/// extensions that want to interoperate with bocpy at the C level. It +/// re-exports the cross-interpreter data macros from `xidata.h`, a +/// minimal sequentially-consistent atomic surface compatible with +/// CPython's MSVC builds, and the `BOCPY_NO_OWNER` / `bocpy_interpid()` +/// pair used to flip per-resource ownership during XIData handoffs. +/// +/// **C-only and order-insensitive.** This header may be included before +/// or after ``. Including it from C++ translation units is +/// not supported in this release; downstream C++ consumers must wrap +/// the bocpy ABI in a thin C translation unit. See :ref:`c-abi` for +/// the full usage contract. + +#ifndef BOCPY_H +#define BOCPY_H + +/// Public C ABI revision. Bumped on any incompatible change to this +/// header or `xidata.h`. +#define BOCPY_ABI 1 + +#define PY_SSIZE_T_CLEAN + +#include +#include +#include + +#include "xidata.h" + +#if defined(_MSC_VER) + +#ifndef thread_local +#define thread_local __declspec(thread) +#endif + +typedef volatile int_least64_t atomic_int_least64_t; + +/// @brief Sequentially-consistent fetch-and-add: `*ptr += value`. +/// @return The previous value of `*ptr` (before the add). +int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value); + +/// @brief Sequentially-consistent compare-and-swap. +/// @return ``true`` if the swap happened. On failure, writes the +/// observed value through ``expected``. +bool atomic_compare_exchange_strong(atomic_int_least64_t *ptr, + atomic_int_least64_t *expected, + int_least64_t desired); + +/// @brief Sequentially-consistent load of `*ptr`. +int_least64_t atomic_load(atomic_int_least64_t *ptr); + +/// @brief Sequentially-consistent store of `value` into `*ptr`. +void atomic_store(atomic_int_least64_t *ptr, int_least64_t value); + +#else + +#include + +#ifndef thread_local +#define thread_local _Thread_local +#endif + +#endif + +/// @brief Sentinel owner value meaning "no interpreter currently owns this +/// cross-interpreter resource". +/// +/// Use it as the initial value of any per-resource owner field that downstream +/// code flips with `atomic_compare_exchange_strong` during the producer-side +/// `XIDATA_GETDATA_FUNC` callback (this-interpreter -> `BOCPY_NO_OWNER`) and +/// the consumer-side `new_object` callback (`BOCPY_NO_OWNER` -> +/// this-interpreter). Chosen to be negative so it never collides with a real +/// `PyInterpreterState_GetID()` return value (which is non-negative). +#define BOCPY_NO_OWNER (-2) + +/// @brief Return the running interpreter's ID as `int_least64_t`. +/// +/// Convenience wrapper over +/// `PyInterpreterState_GetID(PyInterpreterState_Get())`, pre-typed for the +/// `atomic_int_least64_t` owner-field pattern paired with `BOCPY_NO_OWNER`. +/// Must be called with the GIL held (or while attached to an interpreter, on +/// free-threaded builds) — same contract as the underlying CPython API. +static inline int_least64_t bocpy_interpid(void) { + return (int_least64_t)PyInterpreterState_GetID(PyInterpreterState_Get()); +} + +#endif // BOCPY_H diff --git a/src/bocpy/include/bocpy/bocpy_msvc.c b/src/bocpy/include/bocpy/bocpy_msvc.c new file mode 100644 index 0000000..13bb9ba --- /dev/null +++ b/src/bocpy/include/bocpy/bocpy_msvc.c @@ -0,0 +1,73 @@ +/* MSVC out-of-line bodies for the four atomic ops declared in + * bocpy.h. This file is package data — compiled by downstream + * extensions via bocpy.get_sources(), NOT linked into any bocpy + * extension itself (boc_compat.c provides identical bodies for the + * bocpy build). + * + * Add this to a downstream setuptools.Extension's sources= list + * only on Windows. The file is a no-op on non-MSVC compilers. */ +#include "bocpy.h" +#if defined(_MSC_VER) +/* `windows.h` provides the `Interlocked*64` intrinsics used in the + * marker region below. The marker region itself must stay byte- + * identical to `boc_compat.c`, so the include lives outside it. */ +#include +/* The bytes between @atomic-bodies-begin and @atomic-bodies-end must + * be byte-identical to the marker region in src/bocpy/boc_compat.c + * (enforced by test_msvc_bodies_in_lockstep). */ +/* @atomic-bodies-begin */ +int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value) { +#if defined(_M_IX86) + int_least64_t old = *ptr; + for (;;) { + int_least64_t prev = InterlockedCompareExchange64(ptr, old + value, old); + if (prev == old) + return old; + old = prev; + } +#else + return InterlockedExchangeAdd64(ptr, value); +#endif +} + +bool atomic_compare_exchange_strong(atomic_int_least64_t *ptr, + atomic_int_least64_t *expected, + int_least64_t desired) { + int_least64_t prev; + prev = InterlockedCompareExchange64(ptr, desired, *expected); + if (prev == *expected) { + return true; + } + + *expected = prev; + return false; +} + +int_least64_t atomic_load(atomic_int_least64_t *ptr) { +#if defined(_M_IX86) + return InterlockedCompareExchange64(ptr, 0, 0); +#else + /* Seq-cst load. Plain `*ptr` is acquire/release at best on x64 + * and gives no ordering on ARM64; InterlockedOr64(ptr, 0) is a + * full barrier on every supported MSVC target. */ + return InterlockedOr64(ptr, 0); +#endif +} + +void atomic_store(atomic_int_least64_t *ptr, int_least64_t value) { +#if defined(_M_IX86) + int_least64_t old = *ptr; + for (;;) { + int_least64_t prev = InterlockedCompareExchange64(ptr, value, old); + if (prev == old) + return; + old = prev; + } +#else + /* Seq-cst store. Plain `*ptr = value` does not forbid StoreLoad + * reordering on x64/ARM64; InterlockedExchange64 is a full barrier. */ + (void)InterlockedExchange64(ptr, value); +#endif +} +/* @atomic-bodies-end */ +#endif diff --git a/src/bocpy/xidata.h b/src/bocpy/include/bocpy/xidata.h similarity index 50% rename from src/bocpy/xidata.h rename to src/bocpy/include/bocpy/xidata.h index 60b0dc7..b658580 100644 --- a/src/bocpy/xidata.h +++ b/src/bocpy/include/bocpy/xidata.h @@ -18,6 +18,94 @@ /// unchanged. Helper functions are `static inline` so a TU that does /// not call (e.g.) `xidata_supported` does not emit an unused-function /// warning. +/// +/// ## Functional overview +/// +/// The `XIDATA_*` macros expose three distinct codepaths. Each is +/// shown end-to-end below; see `_math.c` (Matrix) and the consumer +/// template at `templates/c_abi_consumer/` for working examples. +/// +/// ### 1. Allocate, initialise, and fill an XIData (producer side) +/// +/// Called from inside an `XIDATA_GETDATA_FUNC` callback (which the +/// runtime invokes once per cross-interpreter handoff, on the +/// interpreter that currently owns the object). The job of the +/// callback is to take ownership of the underlying resource on behalf +/// of `XIDATA_T` and record the per-resource `new_object` callback the +/// receiving interpreter will use to reconstruct a Python object. +/// +/// @code +/// XIDATA_GETDATA_FUNC(_my_shared) { +/// MyObj *o = (MyObj *)obj; +/// +/// // 1a. Hand off / refcount the underlying resource so it +/// // survives the source object being decref'd. +/// my_impl *impl = my_impl_acquire(o->impl); +/// +/// // 1b. Initialise the caller-allocated XIDATA_T with the +/// // owning interpreter, the raw payload pointer, the +/// // source PyObject (kept alive via Py_NewRef internally), +/// // and the new_object reconstruction callback. +/// XIDATA_INIT(xidata, tstate->interp, impl, obj, _new_my_object); +/// +/// // 1c. Tell the runtime how to free the payload if the +/// // receiving interpreter never claims it (or claims and +/// // later drops it). Skip if XIDATA_INIT already wired +/// // this for you via new_object's destructor. +/// XIDATA_SET_FREE(xidata, (void (*)(void *))my_impl_release); +/// +/// return 0; +/// } +/// @endcode +/// +/// The `xidata` buffer itself is allocated by the caller of +/// `XIDATA_GETXIDATA` (typically with `XIDATA_NEW()`): +/// +/// @code +/// XIDATA_T *xidata = XIDATA_NEW(); +/// if (XIDATA_GETXIDATA(value, xidata) < 0) { +/// PyMem_RawFree(xidata); +/// return NULL; +/// } +/// // xidata is now ready to be enqueued onto a cross-interpreter +/// // channel / queue / behavior payload. +/// @endcode +/// +/// ### 2. Free an XIData +/// +/// Once a payload has been delivered (or the producer has decided to +/// drop it), call `XIDATA_FREE`. This invokes the `free` callback set +/// during step 1, releases the borrowed `obj` reference, and frees the +/// `XIDATA_T` allocation itself. Always free on the interpreter +/// recorded in `xidata->interp` — not on the receiver — because the +/// `free` callback may touch interpreter-owned state. +/// +/// @code +/// XIDATA_FREE(xidata); +/// @endcode +/// +/// ### 3. Register a class as cross-interpreter shareable +/// +/// The XIData registry is **per interpreter**, so registration must run +/// once in every interpreter that will ever reconstruct an instance of +/// the type. The standard idiom is to call `XIDATA_REGISTERCLASS` from +/// a `Py_mod_exec` slot, so it re-runs on every import in every worker. +/// +/// @code +/// static int my_module_exec(PyObject *module) { +/// my_module_state *state = PyModule_GetState(module); +/// // ... create state->my_type ... +/// if (XIDATA_REGISTERCLASS(state->my_type, _my_shared) < 0) { +/// return -1; +/// } +/// return 0; +/// } +/// @endcode +/// +/// Registering the same type twice with different callbacks in the +/// same interpreter is undefined behaviour. See `XIDATA_GETDATA_FUNC` +/// (below) for the callback signature shim that hides the pre-/post-3.12 +/// argument-list change. #ifndef BOCPY_XIDATA_H #define BOCPY_XIDATA_H @@ -28,8 +116,18 @@ #include #if PY_VERSION_HEX >= 0x030D0000 +/* `internal/pycore_crossinterp.h` requires Py_BUILD_CORE; save and + * restore the prior state so a TU that already had it set is not + * silently turned off after this header. */ +#ifndef Py_BUILD_CORE #define Py_BUILD_CORE +#define BOCPY_INTERNAL_DEFINED_PY_BUILD_CORE +#endif #include +#ifdef BOCPY_INTERNAL_DEFINED_PY_BUILD_CORE +#undef Py_BUILD_CORE +#undef BOCPY_INTERNAL_DEFINED_PY_BUILD_CORE +#endif #endif #if PY_VERSION_HEX >= 0x030E0000 // 3.14 @@ -121,6 +219,17 @@ static inline void xidata_free(void *arg) { #else +/** + * @brief Internal marker: this CPython has no per-interpreter GIL. + * + * Defined only on Python < 3.12. The bocpy runtime uses this internally + * to fall back to a single-interpreter mode on these versions (workers + * still live in sub-interpreters but share the global GIL instead of + * owning per-interpreter ones). Downstream consumers do **not** need + * to special-case this macro: the `xidata.h` ladder exposes the same + * `XIDATA_*` macros on every supported CPython, and with one shared + * GIL there is nothing extra to serialise — the GIL already does. + */ #define BOC_NO_MULTIGIL #define XIDATA_NEWOBJECT _PyCrossInterpreterData_NewObject @@ -203,4 +312,50 @@ static inline PyObject *PyErr_GetRaisedException(void) { #endif +/** + * @brief Declare an `XIDATA_REGISTERCLASS` getdata callback. + * + * The CPython getdata callback signature changes shape across the + * per-interpreter-GIL boundary: + * + * - Python 3.12+ : `(PyThreadState *tstate, PyObject *obj, XIDATA_T *)` + * - Python <3.12 : `(PyObject *obj, XIDATA_T *)` (BOC_NO_MULTIGIL) + * + * On <3.12, `tstate` is not a parameter — the runtime passes only + * `(obj, xidata)`. This macro hides that split by emitting a + * trampoline on the legacy path that calls a `_xi_body` function with + * the unified 3-arg signature. The user writes the body once, in + * `(tstate, obj, xidata)` form, and it works on every supported + * CPython: + * + * @code + * XIDATA_GETDATA_FUNC(_my_shared) { + * MyObj *o = (MyObj *)obj; + * XIDATA_INIT(xidata, tstate->interp, o->impl, obj, + * _new_my_object); + * return 0; + * } + * + * // In the module exec slot: + * XIDATA_REGISTERCLASS(state->my_type, _my_shared); + * @endcode + * + * On <3.12 the macro emits an extra `_xi_body` symbol and one + * stack frame of indirection per callback invocation. `` itself + * is always the public symbol that callers (and `XIDATA_REGISTERCLASS`) + * see, regardless of CPython version. + */ +#ifndef BOC_NO_MULTIGIL +#define XIDATA_GETDATA_FUNC(name) \ + static int name(PyThreadState *tstate, PyObject *obj, XIDATA_T *xidata) +#else +#define XIDATA_GETDATA_FUNC(name) \ + static int name##_xi_body(PyThreadState *, PyObject *, XIDATA_T *); \ + static int name(PyObject *obj, XIDATA_T *xidata) { \ + return name##_xi_body(PyThreadState_GET(), obj, xidata); \ + } \ + static int name##_xi_body(PyThreadState *tstate, PyObject *obj, \ + XIDATA_T *xidata) +#endif + #endif // BOCPY_XIDATA_H diff --git a/src/bocpy/transpiler.py b/src/bocpy/transpiler.py index 4608f69..eafbd06 100644 --- a/src/bocpy/transpiler.py +++ b/src/bocpy/transpiler.py @@ -36,7 +36,7 @@ def clear(self): self.used_vars.clear() self.captured_vars.clear() - def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 + def visit_FunctionDef(self, node): # noqa: N802 """Collect locals and recurse to find captured variables.""" for arg in node.args.args: self.local_vars.add(arg.arg) @@ -48,7 +48,7 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 self.local_vars.add(node.args.kwarg.arg) for stmt in node.body: - if isinstance(stmt, ast.FunctionDef): + if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): self.local_vars.add(stmt.name) # A nested @when is rewritten by WhenTransformer into a # whencall(...) at this position. The cown arguments and the @@ -71,6 +71,8 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 self.captured_vars = self.used_vars - self.local_vars - self.known_vars + visit_AsyncFunctionDef = visit_FunctionDef # noqa: N815 + def visit_Name(self, node: ast.Name): # noqa: N802 """Track variable usage to determine captures.""" if isinstance(node.ctx, ast.Load): @@ -93,11 +95,22 @@ def __init__(self): self.classes = set() self.functions = set() self.imports = set() + self.constants = set() def known_vars(self): """Return identifiers known at module scope for capture exclusion.""" return self.classes | self.functions | self.imports + def module_scope_names(self): + """Return all names available at module scope in the exported module. + + This is a superset of ``known_vars`` that also includes + UPPERCASE constants and literal assignments kept by + ``visit_Assign``. It is used for decorator name-resolution + validation only — NOT for capture exclusion. + """ + return self.classes | self.functions | self.imports | self.constants + def visit_Import(self, node: ast.Import): # noqa: N802 """Record imported names and keep the node.""" for name in node.names: @@ -136,9 +149,26 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 return node + visit_AsyncFunctionDef = visit_FunctionDef # noqa: N815 + + def _record_constant_targets(self, targets): + """Record every ``Name`` (including nested in tuple targets) as a constant.""" + for tgt in targets: + if isinstance(tgt, ast.Name): + self.constants.add(tgt.id) + elif isinstance(tgt, (ast.Tuple, ast.List)): + for elt in tgt.elts: + if isinstance(elt, ast.Name): + self.constants.add(elt.id) + def visit_Assign(self, node: ast.Assign): # noqa: N802 """Add module-level constants.""" if isinstance(node.value, ast.Constant): + # Constant assignments survive in the export. Record every + # target name (including chained ``A = B = 1`` and tuple + # ``A, B = 1, 2``) so the decorator validator can resolve + # them. + self._record_constant_targets(node.targets) return node if len(node.targets) > 1: @@ -149,8 +179,25 @@ def visit_Assign(self, node: ast.Assign): # noqa: N802 if isinstance(name, ast.Name): # use naming convention to allow some non-constant values as well if name.id.isupper(): + self.constants.add(name.id) return node + if isinstance(name, (ast.Tuple, ast.List)) and all( + isinstance(e, ast.Name) and e.id.isupper() for e in name.elts): + for elt in name.elts: + self.constants.add(elt.id) + return node + + return None + + def visit_AnnAssign(self, node: ast.AnnAssign): # noqa: N802 + """Keep annotated module-level constants and uppercase names.""" + if isinstance(node.target, ast.Name): + is_constant = isinstance(node.value, ast.Constant) + is_upper = node.target.id.isupper() + if is_constant or is_upper: + self.constants.add(node.target.id) + return node return None def generic_visit(self, node): @@ -181,14 +228,123 @@ class WhenTransformer(ast.NodeTransformer): the function with a call to `whencall` for that behavior. """ - def __init__(self, known_vars: set, path: str): + # Best-effort early warning for stdlib decorators that produce + # non-callable descriptors at module scope (``staticmethod``, + # ``classmethod``, ``property``). Applied below ``@when``, these + # would silently break worker dispatch — the generated + # ``__behavior__N`` is invoked as a plain function on the worker, + # but the descriptor is not callable that way; ``property`` even + # raises ``TypeError`` at import time. + # + # This is **not** a correctness guarantee. The transpiler can only + # see decorator *syntax*, not what the expression evaluates to at + # import time on the worker, so any third-party decorator with the + # same shape (e.g., ``functools.cached_property``, custom + # descriptor factories) will slip through. Treat the set below as a + # convenience: a precise, actionable error for the few stdlib names + # we can recognise from the AST. Users applying exotic decorators + # below ``@when`` are on their own. + _BANNED_BELOW_DECORATORS = frozenset({"staticmethod", "classmethod", "property"}) + + def __init__(self, known_vars: set, path: str, module_scope_names: set): """Prepare behavior extraction with known identifiers and file path.""" self.known_vars = known_vars + self.module_scope_names = module_scope_names self.cap_finder = CapturedVariableFinder(known_vars) self.nodes = [] self.behaviors = {} self.path = path + def _validate_decorator_names(self, dec: ast.AST): + """Reject free names in ``dec`` that the worker cannot resolve. + + Walks the decorator subtree honoring lexical scope: parameters + of ``Lambda`` and target names of comprehensions / generator + expressions are *local* to those forms and must not be flagged. + Free ``Name(Load)`` references must appear in + ``module_scope_names`` (imports, classes, functions, constants, + builtins) so they resolve when the exported module is imported + on a worker. + """ + bound_stack: list[set] = [] + + def is_bound(name: str) -> bool: + return any(name in s for s in bound_stack) + + def lambda_locals(args: ast.arguments) -> set: + local = set() + for grp in (args.posonlyargs, args.args, args.kwonlyargs): + for a in grp: + local.add(a.arg) + if args.vararg: + local.add(args.vararg.arg) + if args.kwarg: + local.add(args.kwarg.arg) + return local + + def collect_targets(target: ast.AST, into: set) -> None: + if isinstance(target, ast.Name): + into.add(target.id) + elif isinstance(target, (ast.Tuple, ast.List)): + for elt in target.elts: + collect_targets(elt, into) + elif isinstance(target, ast.Starred): + collect_targets(target.value, into) + + def visit(node: ast.AST) -> None: + if isinstance(node, ast.Lambda): + # Defaults are evaluated in the *outer* scope. + for d in node.args.defaults: + visit(d) + for d in node.args.kw_defaults: + if d is not None: + visit(d) + bound_stack.append(lambda_locals(node.args)) + visit(node.body) + bound_stack.pop() + return + + if isinstance(node, (ast.ListComp, ast.SetComp, + ast.GeneratorExp, ast.DictComp)): + local: set = set() + for i, gen in enumerate(node.generators): + # The *first* iter is evaluated in the enclosing + # scope; later iters see prior targets. + if i == 0: + visit(gen.iter) + else: + bound_stack.append(local) + visit(gen.iter) + bound_stack.pop() + collect_targets(gen.target, local) + bound_stack.append(local) + for if_ in gen.ifs: + visit(if_) + bound_stack.pop() + bound_stack.append(local) + if isinstance(node, ast.DictComp): + visit(node.key) + visit(node.value) + else: + visit(node.elt) + bound_stack.pop() + return + + if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): + if not is_bound(node.id) and node.id not in self.module_scope_names: + raise SyntaxError( + f"Decorator references '{node.id}' which is " + f"not defined as an import, class, function, or " + f"constant at module level. Ensure it is " + f"importable in the worker.", + (self.path, node.lineno, node.col_offset, None), + ) + + for child in ast.iter_child_nodes(node): + visit(child) + + visit(dec) + def visit_Module(self, node: ast.Module): # noqa: N802 """Remove when-call expressions and append generated behaviors.""" new_body = [] @@ -222,6 +378,26 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 if when_dec is None: return self.generic_visit(node) + # Reject async functions — there is no event loop on workers. + if isinstance(node, ast.AsyncFunctionDef): + raise SyntaxError( + "@when does not support async functions", + (self.path, node.lineno, node.col_offset, None), + ) + + # Reject decorators above @when — they would wrap the + # scheduling call (a Cown), not the behavior body. + when_idx = node.decorator_list.index(when_dec) + if when_idx > 0: + bad = node.decorator_list[0] + above = [ast.unparse(d) for d in node.decorator_list[:when_idx]] + raise SyntaxError( + "Decorators above @when are not supported — move them " + "below @when to apply them to the behavior body: " + + ", ".join(above), + (self.path, bad.lineno, bad.col_offset, None), + ) + # first create a deep copy of the function behavior_node = copy.deepcopy(node) ast.copy_location(behavior_node, node) @@ -239,9 +415,40 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 for name in captures: behavior_node.args.args.append(ast.Name(id=name)) - # strip the @when decorator (and any other decorators, they are - # not supported) - behavior_node.decorator_list.clear() + # Remove only @when decorators; other decorators compose with + # the behavior body and are preserved in the exported module. + behavior_node.decorator_list = [ + d for d in behavior_node.decorator_list + if not (isinstance(d, ast.Call) + and isinstance(d.func, ast.Name) + and d.func.id == "when") + ] + + # Reject descriptor-producing decorators that would silently + # break worker dispatch when applied to a module-level + # ``__behavior__N`` (the worker calls it as a plain function). + for dec in behavior_node.decorator_list: + banned = None + if isinstance(dec, ast.Name) and dec.id in self._BANNED_BELOW_DECORATORS: + banned = dec.id + elif (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) + and dec.func.id in self._BANNED_BELOW_DECORATORS): + banned = dec.func.id + if banned is not None: + raise SyntaxError( + f"@{banned} is not supported below @when — the generated " + f"behavior runs as a module-level function on the worker, " + f"where {banned} produces a non-callable descriptor.", + (self.path, dec.lineno, dec.col_offset, None), + ) + + # Validate that remaining decorator expressions only reference + # names available at module scope in the worker. Walk only + # *free* variables — names bound by ``Lambda`` / + # comprehension / generator-expression scopes inside the + # decorator are local and must not be flagged. + for dec in behavior_node.decorator_list: + self._validate_decorator_names(dec) # deal with any recursive behaviors within this behavior behavior_node = self.visit(behavior_node) @@ -268,6 +475,8 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 ast.fix_missing_locations(when_call) return ast.Expr(ast.Assign([ast.Name(id=node.name)], when_call)) + visit_AsyncFunctionDef = visit_FunctionDef # noqa: N815 + ExportResult = NamedTuple("ExportResult", [("code", str), ("classes", Set[str]), ("functions", Set[str]), @@ -297,7 +506,11 @@ def export_module(tree: ast.Module, path: str = None) -> ExportResult: boc_export = BOCModuleTransformer() boc_export.visit(tree) - when_transformer = WhenTransformer(boc_export.known_vars() | builtins, path) + when_transformer = WhenTransformer( + boc_export.known_vars() | builtins, + path, + module_scope_names=boc_export.module_scope_names() | builtins, + ) when_transformer.visit(tree) tree.body.extend(when_transformer.nodes) diff --git a/templates/c_abi_consumer/README.md b/templates/c_abi_consumer/README.md new file mode 100644 index 0000000..a2043ab --- /dev/null +++ b/templates/c_abi_consumer/README.md @@ -0,0 +1,74 @@ +# bocpy C-ABI consumer smoke test + +This directory is **both** a CI smoke test for the bocpy public C ABI +and the canonical downstream template for an extension that wants to +build against it. + +## Files + +| File | Purpose | +|----------------------------|-------------------------------------------------------------------------| +| `src/_bocpy_probe.c` | Tiny C extension; `#include "bocpy.h"` only; exercises the atomic surface and the XIData allocator; calls `XIDATA_REGISTERCLASS` once at module init. | +| `setup.py` | Uses `bocpy.get_include()` and `bocpy.get_sources()` — copy this verbatim into your own project and change the module name. | +| `pyproject.toml` | PEP 517 metadata; declares `bocpy` as a build- and run-time dependency. | +| `test/test_consumer.py` | pytest module that imports `_bocpy_probe` and asserts the documented behaviour. | + +## Running locally + +Run all commands from the bocpy repo root: + +```bash +pip install -e .[test] # install bocpy itself +pip install --no-build-isolation ./templates/c_abi_consumer # build and install the consumer +pytest templates/c_abi_consumer/test # run the consumer's tests +``` + +``--no-build-isolation`` is required so the consumer is built +against the same `bocpy` install you import at test time, rather +than whatever PyPI happens to publish. + +CI runs the same three commands on every supported (Python, OS) cell; +if anything in the public C ABI silently regresses (a leaked +`Py_BUILD_CORE`, a renamed atomic op, a `bocpy.get_include()` that no +longer points at `bocpy.h`, …) one of those three steps fails loudly. + +## Using this as a template + +Drop `setup.py` and `pyproject.toml` into your own project, change +`_bocpy_probe` to your module name in **all three** of: + +* `pyproject.toml` (the `[project].name` field, plus the + `[build-system].requires` list if you keep it), +* `setup.py` (the first argument to `Extension(...)`), +* `src/_bocpy_probe.c` (the `PyInit__bocpy_probe` function name and + the `_bocpy_probe_module*` identifiers — they must match the + module name CPython looks up). + +Then replace `src/_bocpy_probe.c` with your own sources. The +`bocpy.get_sources()` call appends the MSVC out-of-line bodies on +Windows and is a no-op elsewhere, so the same build script works on +every platform. + +### Per-interpreter requirements + +`_bocpy_probe.c` uses multi-phase initialisation (`Py_mod_exec`) and +declares `Py_MOD_PER_INTERPRETER_GIL_SUPPORTED`. bocpy workers run in +sub-interpreters on every supported CPython, and +`XIDATA_REGISTERCLASS` registers types into a per-interpreter +registry, so a single-phase `PyModule_Create` module that registers +from `PyInit` will load in the main interpreter but segfault when a +worker reconstructs one of your types. + +Two corollaries for downstream code: + +1. The `Counter` type is heap-allocated via `PyType_FromModuleAndSpec` + and stored on per-module state, with a `thread_local` cache primed + in the exec slot. Mirror this pattern for your own types. +2. Any test or `@when`-scheduling code that reconstructs your types + in a worker must contain a top-level `import` of your extension. + The transpiler propagates module-scope imports into worker + interpreters; runtime helpers like `pytest.importorskip` are + invisible to it. + +See the bocpy C-ABI documentation ("Consumer modules and worker +sub-interpreters") for the full contract. diff --git a/templates/c_abi_consumer/pyproject.toml b/templates/c_abi_consumer/pyproject.toml new file mode 100644 index 0000000..d80e6b2 --- /dev/null +++ b/templates/c_abi_consumer/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +# `bocpy` is listed here so an isolated PEP 517 build can satisfy the +# `import bocpy` in setup.py. Always install with --no-build-isolation +# so the build resolves headers against the bocpy install actually +# being tested. See README.md. +requires = ["setuptools", "wheel", "bocpy"] +build-backend = "setuptools.build_meta" + +[project] +name = "bocpy-c-abi-consumer" +version = "0.0.0" +description = "Smoke test and canonical downstream template for the bocpy public C ABI." +requires-python = ">=3.10" +dependencies = ["bocpy"] diff --git a/templates/c_abi_consumer/setup.py b/templates/c_abi_consumer/setup.py new file mode 100644 index 0000000..490057e --- /dev/null +++ b/templates/c_abi_consumer/setup.py @@ -0,0 +1,21 @@ +"""Build script for the bocpy C-ABI consumer smoke test. + +Doubles as the canonical downstream template: any extension that wants +to interoperate with bocpy at the C level can copy this file and the +neighbouring ``pyproject.toml``, change the module name, and replace +``_bocpy_probe.c`` with their own sources. +""" + +from setuptools import Extension, setup + +import bocpy + +setup( + ext_modules=[ + Extension( + "_bocpy_probe", + sources=["src/_bocpy_probe.c"] + bocpy.get_sources(), + include_dirs=[bocpy.get_include()], + ), + ], +) diff --git a/templates/c_abi_consumer/src/_bocpy_probe.c b/templates/c_abi_consumer/src/_bocpy_probe.c new file mode 100644 index 0000000..3262c13 --- /dev/null +++ b/templates/c_abi_consumer/src/_bocpy_probe.c @@ -0,0 +1,419 @@ +/// @file _bocpy_probe.c +/// @brief Non-trivial downstream consumer of the bocpy public C ABI. +/// +/// This translation unit is built by `templates/c_abi_consumer/setup.py` +/// against the headers reported by `bocpy.get_include()`. It serves +/// two purposes: +/// +/// 1. CI smoke test for the public C ABI (compile + import + behave). +/// 2. Canonical worked example for downstream extension authors. +/// +/// ### Design +/// +/// The extension exposes a `Counter` Python type. Each instance wraps +/// a pointer to a heap-allocated `counter_impl` C struct that lives +/// outside any single Python object's lifetime. `Counter` is +/// registered as cross-interpreter shareable via +/// `XIDATA_REGISTERCLASS` with a producer-side getdata callback and a +/// consumer-side reconstruction callback. The reconstructed wrapper +/// shares the same underlying `counter_impl` pointer; an atomic +/// `count` field on the impl is bumped each time the consumer +/// callback runs, so Python tests can observe round-trip identity +/// and ordering as a `Counter` cown is shipped between workers via +/// `@when`. +/// +/// ### Ownership (proto-Region semantics) +/// +/// `counter_impl` carries an atomic `owner` field tagged with the +/// interpreter ID that may currently read or write it. The producer +/// callback CASes `owner` from `bocpy_interpid()` to `BOCPY_NO_OWNER` +/// before initialising the xidata, and the consumer callback CASes +/// it back from `BOCPY_NO_OWNER` to its own `bocpy_interpid()` before +/// constructing the new wrapper. Reading `count` from a wrapper +/// whose interpreter does not own the impl raises `RuntimeError` — +/// stale wrappers left behind in the producer interpreter cannot +/// observe the value any more. This mirrors `bocpy.Matrix` and is +/// the pattern documented in the C ABI page under "Proto-Region +/// semantics". +/// +/// ### Refcounting +/// +/// `counter_impl` carries its own atomic refcount. Each `Counter` +/// wrapper holds one ref; `Counter.__dealloc__` drops it. The +/// consumer callback creates a fresh wrapper and bumps the refcount +/// for it. The xidata keeps the producer wrapper alive (via the +/// `obj` slot recorded by `XIDATA_INIT`), so the impl cannot be +/// freed mid-handoff. When the last wrapper goes away, the impl is +/// freed. Refcounting is independent of ownership: any interpreter +/// holding a wrapper drops its ref on dealloc, regardless of who +/// currently owns the impl. This mirrors the `Matrix` pattern in +/// `src/bocpy/_math.c`. +/// +/// ### Module init +/// +/// The module uses multi-phase initialisation (`Py_mod_exec`) and +/// declares `Py_MOD_PER_INTERPRETER_GIL_SUPPORTED`. bocpy workers +/// always run in sub-interpreters (sharing the legacy global GIL on +/// 3.10/3.11, owning per-interpreter GILs on 3.12+), and +/// `XIDATA_REGISTERCLASS` registers types into a per-interpreter +/// registry, so the registration must run in every interpreter that +/// reconstructs a `Counter`. `Counter` itself is a heap type created +/// via `PyType_FromModuleAndSpec`, owned by per-module state, with +/// the `XIDATA_REGISTERCLASS` call living in the exec slot. + +#include + +/* Compile-time guard: bocpy.h must not leak Py_BUILD_CORE. If a + * future refactor of xidata.h forgets the #undef, this file fails + * to compile, which fails CI louder than any runtime test could. */ +#ifdef Py_BUILD_CORE +#error "Py_BUILD_CORE leaked from bocpy.h" +#endif + +#include + +/* ------------------------------------------------------------------ */ +/* counter_impl: heap-allocated, shared by reference */ +/* ------------------------------------------------------------------ */ + +typedef struct { + atomic_int_least64_t refcount; /* number of Counter wrappers */ + atomic_int_least64_t count; /* number of XIData round-trips */ + /* Interpreter ID currently allowed to read/write the impl, or + * BOCPY_NO_OWNER while the impl is in flight between interpreters. + * Flipped by the producer/consumer XIData callbacks; checked by + * Counter_get_count. */ + atomic_int_least64_t owner; +} counter_impl; + +static counter_impl *counter_impl_new(void) { + counter_impl *impl = PyMem_RawMalloc(sizeof(*impl)); + if (impl == NULL) { + return NULL; + } + atomic_store(&impl->refcount, 1); + atomic_store(&impl->count, 0); + /* Born owned by the constructing interpreter. */ + atomic_store(&impl->owner, bocpy_interpid()); + return impl; +} + +static void counter_impl_incref(counter_impl *impl) { + atomic_fetch_add(&impl->refcount, 1); +} + +static void counter_impl_decref(counter_impl *impl) { + /* fetch_add returns the *old* value; if it was 1 we are the last + * holder. */ + int_least64_t old = atomic_fetch_add(&impl->refcount, -1); + if (old == 1) { + PyMem_RawFree(impl); + } +} + +/* Returns true if the current interpreter currently owns the impl. + * Used by data-reading accessors (Counter_get_count). Identity-only + * accessors (address, refcount) deliberately do not call this: they + * are valid to inspect from any interpreter holding a wrapper, the + * same way you may print the address of a Region handle without + * being inside the Region. */ +static bool counter_impl_check_acquired(counter_impl *impl, bool set_error) { + if (bocpy_interpid() != atomic_load(&impl->owner)) { + if (set_error) { + PyErr_SetString(PyExc_RuntimeError, + "the current interpreter does not own this Counter"); + } + return false; + } + return true; +} + +/* ------------------------------------------------------------------ */ +/* Counter: Python wrapper around a counter_impl pointer */ +/* ------------------------------------------------------------------ */ + +/* Forward declaration so the per-interpreter state lookup helper can + * reference the module def by address. */ +static struct PyModuleDef _bocpy_probe_module; + +/* Per-interpreter module state. Each interpreter that imports the + * module gets its own copy, with its own heap-allocated `counter_type`. + * `LOCAL_STATE` is a thread-local cache populated by the exec slot, + * so callbacks (the XIData consumer side, methods, …) can reach the + * right `counter_type` without re-walking PyModule_GetState every + * call. Mirrors the LOCAL_STATE pattern in `src/bocpy/_math.c`. */ +typedef struct { + PyTypeObject *counter_type; +} _bocpy_probe_module_state; + +static thread_local _bocpy_probe_module_state *LOCAL_STATE; + +#define LOCAL_STATE_SET(m) \ + do { \ + LOCAL_STATE = (_bocpy_probe_module_state *)PyModule_GetState(m); \ + } while (0) + +typedef struct { + PyObject_HEAD counter_impl *impl; +} CounterObject; + +static int Counter_init(CounterObject *self, PyObject *args, PyObject *kwds) { + /* Counter takes no arguments; reject anything passed in to surface + * mistakes loudly rather than silently dropping kwargs. */ + static char *kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, ":Counter", kwlist)) { + return -1; + } + /* Guard against re-initialisation: __init__ is callable more than + * once on the same instance, and without this check the second + * call would leak the first impl. */ + if (self->impl != NULL) { + counter_impl_decref(self->impl); + self->impl = NULL; + } + self->impl = counter_impl_new(); + if (self->impl == NULL) { + PyErr_NoMemory(); + return -1; + } + return 0; +} + +static void Counter_dealloc(PyObject *op) { + CounterObject *self = (CounterObject *)op; + if (self->impl != NULL) { + counter_impl_decref(self->impl); + self->impl = NULL; + } + Py_TYPE(self)->tp_free(self); +} + +static PyObject *Counter_get_count(CounterObject *self, void *closure) { + if (self->impl == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Counter not initialised"); + return NULL; + } + if (!counter_impl_check_acquired(self->impl, true)) { + return NULL; + } + return PyLong_FromLongLong((long long)atomic_load(&self->impl->count)); +} + +static PyObject *Counter_get_address(CounterObject *self, void *closure) { + if (self->impl == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Counter not initialised"); + return NULL; + } + return PyLong_FromVoidPtr(self->impl); +} + +static PyObject *Counter_get_refcount(CounterObject *self, void *closure) { + if (self->impl == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Counter not initialised"); + return NULL; + } + return PyLong_FromLongLong((long long)atomic_load(&self->impl->refcount)); +} + +static PyGetSetDef Counter_getset[] = { + {"count", (getter)Counter_get_count, NULL, + "Number of XIData round-trips the underlying impl has seen.", NULL}, + {"address", (getter)Counter_get_address, NULL, + "Identity of the underlying counter_impl pointer (as int).", NULL}, + {"refcount", (getter)Counter_get_refcount, NULL, + "Number of Counter wrappers currently holding the impl.", NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + +static PyType_Slot Counter_slots[] = { + {Py_tp_doc, (void *)"Counter()\n--\n\n" + "A refcounted counter shareable across interpreters."}, + {Py_tp_new, PyType_GenericNew}, + {Py_tp_init, (void *)Counter_init}, + {Py_tp_dealloc, (void *)Counter_dealloc}, + {Py_tp_getset, Counter_getset}, + {0, NULL}, +}; + +static PyType_Spec Counter_Spec = { + .name = "_bocpy_probe.Counter", + .basicsize = sizeof(CounterObject), + .itemsize = 0, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, + .slots = Counter_slots, +}; + +/* ------------------------------------------------------------------ */ +/* XIData callbacks */ +/* ------------------------------------------------------------------ */ + +/// @brief Wraps a counter sent from another interpreter. +/// @details The underlying counter_impl, when it arrives at another +/// interpreter, is wrapped by this method in a CounterObject so that +/// it can be used from code running in that interpreter. +/// @param xidata The xidata containing the counter_impl +/// @return a new CounterObject reference, or NULL on error +static PyObject *_new_counter_object(XIDATA_T *xidata) { + counter_impl *impl = (counter_impl *)xidata->data; + + /* Take ownership of the impl: BOCPY_NO_OWNER -> this interpreter. + * The producer callback parked the impl at BOCPY_NO_OWNER before + * handing it off; if the CAS fails, something else has already + * claimed it (a bug in the cross-interpreter handoff machinery). + */ + int_least64_t expected = BOCPY_NO_OWNER; + int_least64_t desired = bocpy_interpid(); + if (!atomic_compare_exchange_strong(&impl->owner, &expected, desired)) { + PyErr_Format(PyExc_RuntimeError, + "cannot acquire Counter (expected BOCPY_NO_OWNER, " + "observed owner=%lld)", + (long long)expected); + return NULL; + } + + atomic_fetch_add(&impl->count, 1); + + /* Use this interpreter's heap-allocated copy of the type. */ + PyTypeObject *type = LOCAL_STATE->counter_type; + CounterObject *counter = (CounterObject *)type->tp_alloc(type, 0); + if (counter == NULL) { + /* Roll the owner back so a future retry of the handoff can + * succeed and the impl is not stranded with us as owner while + * we have no wrapper to release it. */ + atomic_store(&impl->owner, BOCPY_NO_OWNER); + return NULL; + } + counter->impl = impl; + counter_impl_incref(impl); + return (PyObject *)counter; +} + +/// @brief Prepare the underlying counter_impl for sharing with another +/// interpreter. +/// @param tstate The thread state of the current interpreter +/// @param obj The CounterObject instance +/// @param xidata An empty xidata package +/// @return 0 if successful, < 0 on error +XIDATA_GETDATA_FUNC(_counter_shared) { + CounterObject *counter = (CounterObject *)obj; + counter_impl *impl = counter->impl; + if (impl == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Counter not initialised"); + return -1; + } + + /* Release ownership: this interpreter -> BOCPY_NO_OWNER. The + * consumer-side callback will CAS it from NO_OWNER to its own + * interpreter ID. Failing here means another interpreter already + * owns the impl, so it cannot be lawfully shipped from us. */ + int_least64_t expected = bocpy_interpid(); + int_least64_t desired = BOCPY_NO_OWNER; + if (!atomic_compare_exchange_strong(&impl->owner, &expected, desired)) { + PyErr_Format(PyExc_RuntimeError, + "cannot share Counter (owned by interpreter %lld, " + "this interpreter is %lld)", + (long long)expected, (long long)bocpy_interpid()); + return -1; + } + + XIDATA_INIT(xidata, tstate->interp, impl, obj, _new_counter_object); + return 0; +} + +/* ------------------------------------------------------------------ */ +/* Module-level methods */ +/* ------------------------------------------------------------------ */ + +static PyMethodDef _bocpy_probe_methods[] = { + {NULL, NULL, 0, NULL}, +}; + +/* ------------------------------------------------------------------ */ +/* Module init: multi-phase, per-interpreter-GIL aware */ +/* ------------------------------------------------------------------ */ + +/// @brief Module exec slot. +/// +/// Runs once per interpreter, on every import. Allocates this +/// interpreter's heap-allocated `Counter` type, registers it as +/// cross-interpreter shareable, and primes the `LOCAL_STATE` +/// thread-local cache so XIData callbacks and methods can find the +/// type without walking module state on every call. +/// +/// The `XIDATA_REGISTERCLASS` call must happen here (not in `PyInit`) +/// because CPython's cross-interpreter type registry is +/// per-interpreter — each interpreter that wants to share an instance +/// needs the type registered in its own registry. +static int _bocpy_probe_module_exec(PyObject *module) { + _bocpy_probe_module_state *state = + (_bocpy_probe_module_state *)PyModule_GetState(module); + + state->counter_type = + (PyTypeObject *)PyType_FromModuleAndSpec(module, &Counter_Spec, NULL); + if (state->counter_type == NULL) { + return -1; + } + if (PyModule_AddType(module, state->counter_type) < 0) { + return -1; + } + + /* Register Counter as cross-interpreter shareable. The producer + * callback runs whenever something asks XIData to package a + * Counter — in particular, every time a `Counter` cown is shipped + * to a worker via `@when` or sent through the bocpy message queue. + */ + if (XIDATA_REGISTERCLASS(state->counter_type, _counter_shared)) { + PyErr_SetString(PyExc_RuntimeError, + "could not register Counter for cross-interpreter sharing"); + return -1; + } + + LOCAL_STATE_SET(module); + return 0; +} + +static int _bocpy_probe_module_clear(PyObject *module) { + _bocpy_probe_module_state *state = + (_bocpy_probe_module_state *)PyModule_GetState(module); + Py_CLEAR(state->counter_type); + return 0; +} + +static void _bocpy_probe_module_free(void *module) { + (void)_bocpy_probe_module_clear((PyObject *)module); +} + +static int _bocpy_probe_module_traverse(PyObject *module, visitproc visit, + void *arg) { + _bocpy_probe_module_state *state = + (_bocpy_probe_module_state *)PyModule_GetState(module); + Py_VISIT(state->counter_type); + return 0; +} + +static PyModuleDef_Slot _bocpy_probe_module_slots[] = { + {Py_mod_exec, (void *)_bocpy_probe_module_exec}, +#if PY_VERSION_HEX >= 0x030C0000 + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, +#endif +#if PY_VERSION_HEX >= 0x030D0000 + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, +#endif + {0, NULL}, +}; + +static struct PyModuleDef _bocpy_probe_module = { + PyModuleDef_HEAD_INIT, + .m_name = "_bocpy_probe", + .m_doc = "Smoke test and worked example for the bocpy public C ABI.", + .m_size = sizeof(_bocpy_probe_module_state), + .m_methods = _bocpy_probe_methods, + .m_slots = _bocpy_probe_module_slots, + .m_traverse = _bocpy_probe_module_traverse, + .m_clear = _bocpy_probe_module_clear, + .m_free = (freefunc)_bocpy_probe_module_free, +}; + +PyMODINIT_FUNC PyInit__bocpy_probe(void) { + return PyModuleDef_Init(&_bocpy_probe_module); +} diff --git a/templates/c_abi_consumer/test/test_consumer.py b/templates/c_abi_consumer/test/test_consumer.py new file mode 100644 index 0000000..885b3fb --- /dev/null +++ b/templates/c_abi_consumer/test/test_consumer.py @@ -0,0 +1,156 @@ +"""End-to-end exercise of the bocpy public C ABI via the runtime itself. + +Building ``_bocpy_probe`` against ``bocpy.get_include()`` and +``bocpy.get_sources()`` proves the ABI compiles. Importing it proves +the headers and atomic shim link correctly. The headline test then +shows that downstream extensions can ride the cross-interpreter +machinery — ``Cown``, ``@when`` and ``send``/``receive`` — for free: + + * A ``Counter`` (a downstream type registered via + ``XIDATA_REGISTERCLASS``) is wrapped in a ``Cown``. + * A tail-recursive ``@when`` chain re-schedules itself on a worker + sub-interpreter until the impl's atomic ``count`` reaches a + target. Each behavior dispatch round-trips the impl through the + registered producer + consumer callbacks, which is what bumps the + count. + * The terminal behavior reads ``c.value.address`` and + ``c.value.count`` *inside* the ``@when`` (where it owns the cown + under the proto-Region discipline) and ``send``s the assertion + pairs ``(addr, expected_addr)`` and ``(count >= TARGET, True)`` + back to the main thread. The test ``receive``s them and fails if + either pair disagrees, proving the impl pointer survived every + XIData hop and the consumer callback fired on every dispatch. + +Together this exercises the real BOC scheduler, the real worker +handoff, and the real MPSC message queue — not just an in-process +round-trip of a single XIData callback. +""" + +# Top-level, unconditional import. The transpiler propagates module- +# level ``import`` statements into worker sub-interpreters, where the +# extension's per-interpreter exec slot must run before the consumer +# callback can dereference its ``LOCAL_STATE``. ``pytest.importorskip`` +# is a runtime call the transpiler does not see, so it would leave the +# worker without the probe and segfault on the first reconstruction. +import _bocpy_probe +import pytest + +from bocpy import Cown, drain, receive, send, TIMEOUT, wait, when + + +# --- construction smoke checks ------------------------------------------- +# +# These do not need BOC. They just confirm the extension built and that +# the per-interpreter exec slot ran on the main interpreter. + + +def test_counter_construction(): + """Default-constructed Counter exposes a non-NULL impl with count=0.""" + c = _bocpy_probe.Counter() + assert c.count == 0 + assert c.refcount == 1 + assert isinstance(c.address, int) + assert c.address != 0 + + +def test_counter_uninitialised_raises(): + """Getters must refuse to dereference a NULL impl rather than segfault. + + ``Counter.__new__(Counter)`` skips ``__init__``, so the wrapper has + ``impl == NULL``. Each getter must raise ``RuntimeError`` instead of + crashing. + """ + c = _bocpy_probe.Counter.__new__(_bocpy_probe.Counter) + with pytest.raises(RuntimeError, match="not initialised"): + c.count + with pytest.raises(RuntimeError, match="not initialised"): + c.address + with pytest.raises(RuntimeError, match="not initialised"): + c.refcount + + +# --- BOC-driven XIData round-trip ---------------------------------------- + +TARGET = 5 +RECEIVE_TIMEOUT = 10 + + +def _step(c, expected_addr): + """Schedule one round of the tail loop. + + Defined at module level so the transpiler can resolve it from the + worker interpreter when the recursive call inside the behavior is + executed. ``expected_addr`` is closed over by value at schedule + time (the transpiler snapshots captures into a tuple) so the + terminal behavior can compare it against the impl pointer it + observes from inside the worker. + """ + @when(c) + def _(c): + # Counter follows proto-Region semantics: only the interpreter + # currently owning the cown may inspect ``c.value``. Do all + # checks here, inside the @when, where ownership is held. + addr = c.value.address + count = c.value.count + if count < TARGET: + _step(c, expected_addr) + else: + # Identity check: the impl pointer must survive every + # @when handoff in the tail loop. If XIData ever lost it, + # ``addr`` would not match the cown's original address. + send("assert", (addr, expected_addr)) + # Progress check: the consumer callback bumps ``count`` on + # every reconstruction, so by the terminal behavior we + # must have round-tripped at least TARGET times. + send("assert", (count >= TARGET, True)) + + +class TestBOCRoundtrip: + """BOC-driven round-trip of a ``Counter`` cown via ``@when`` + send.""" + + @classmethod + def teardown_class(cls): + """Drain pending behaviors so the runtime can shut cleanly.""" + wait() + + def receive_asserts(self, count): + """Collect ``count`` assertion messages and fail on mismatch. + + Mirrors the helper from .github/skills/testing-with-boc — uses + a timeout so a stalled behavior fails the test loudly instead + of hanging, and drains the queue on the way out. + """ + failed = None + timed_out = False + try: + for _ in range(count): + result = receive("assert", RECEIVE_TIMEOUT) + if result[0] == TIMEOUT: + timed_out = True + break + _, (actual, expected) = result + if failed is None and actual != expected: + failed = (actual, expected) + finally: + drain("assert") + + assert not timed_out, ( + "tail-recursive @when chain never reached its terminal " + "send('assert', ...). Either XIData round-trip is not " + "incrementing the counter or the behavior chain stalled.") + if failed is not None: + actual, expected = failed + assert actual == expected, f"expected {expected!r}, got {actual!r}" + + def test_tail_loop_roundtrips_counter_through_when_and_send(self): + """Ship a Counter cown through a tail-recursive @when chain.""" + counter = _bocpy_probe.Counter() + expected_addr = counter.address + c = Cown(counter) + + _step(c, expected_addr) + + # Two asserts from the terminal behavior: address identity + # and count progress. ``receive_asserts`` blocks until both + # arrive (or times out), so no extra sentinel is needed. + self.receive_asserts(2) diff --git a/test/test_boc.py b/test/test_boc.py index 17c3747..19788f8 100644 --- a/test/test_boc.py +++ b/test/test_boc.py @@ -1303,3 +1303,81 @@ def patched_set_drop(self, exc): # cap_ki's release_all was attempted too (the KI was raised # from set_drop_exception, which runs *before* release_all). assert cap_ki.released + + +def add_one(fn): + """Module-level decorator that adds 1 to the return value.""" + @functools.wraps(fn) + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + 1 + return wrapper + + +def times_two(fn): + """Module-level decorator that multiplies the return value by 2.""" + @functools.wraps(fn) + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) * 2 + return wrapper + + +class TestDecoratorComposition: + """Decorators below @when should compose with the behavior body.""" + + @classmethod + def teardown_class(cls): + """Ensure runtime is drained after suite.""" + wait() + + def test_decorator_modifies_return_value(self): + x = Cown(10) + + @when(x) + @add_one + def doubled_plus_one(x): + return x.value * 2 + + @when(doubled_plus_one) + def _(result): + send("assert", (result.value, 21)) + + receive_asserts() + + def test_stacked_below_decorators_apply_in_order(self): + """Stacked below-decorators compose innermost-first on the worker. + + ``@times_two @add_one def f(x): return x.value`` should compute + ``(x + 1) * 2`` because ``add_one`` wraps the body first, then + ``times_two`` wraps the result. + """ + x = Cown(10) + + @when(x) + @times_two + @add_one + def composed(x): + return x.value + + @when(composed) + def _(result): + send("assert", (result.value, 22)) + + receive_asserts() + + def test_below_decorator_inside_nested_when(self): + """A nested ``@when`` body may itself carry a below-decorator.""" + x = Cown(10) + y = Cown(7) + + @when(x) + def outer(x): + @when(y) + @add_one + def inner(y): + return y.value + + @when(inner) + def _(result): + send("assert", (result.value, 8)) + + receive_asserts() diff --git a/test/test_compat_atomics.py b/test/test_compat_atomics.py index 080759f..893f5c2 100644 --- a/test/test_compat_atomics.py +++ b/test/test_compat_atomics.py @@ -1,4 +1,4 @@ -"""Tests for the typed `boc_atomic_*_explicit` API in `compat.h`. +"""Tests for the typed `boc_atomic_*_explicit` API in `boc_compat.h`. These tests drive the C extension `bocpy._internal_test` (atomics domain, `atomics_*` methods) from real Python threads. On @@ -10,7 +10,7 @@ On x86/x64 these tests are smoke tests (every Interlocked* on those architectures is a full barrier). On ARM64 they are the canonical weak-memory correctness tests for the `__ldar*`/`__stlr*` and -`Interlocked*_{nf,acq,rel}` dispatch in `compat.h`. +`Interlocked*_{nf,acq,rel}` dispatch in `boc_compat.h`. """ import threading diff --git a/test/test_internal_wsq.py b/test/test_internal_wsq.py index dbd1cdc..54c6c77 100644 --- a/test/test_internal_wsq.py +++ b/test/test_internal_wsq.py @@ -1,4 +1,4 @@ -"""Unit tests for the inline ``boc_wsq_*`` helpers in ``sched.h``. +"""Unit tests for the inline ``boc_wsq_*`` helpers in ``boc_sched.h``. These tests exercise the work-stealing-queue cursor arithmetic and ``enqueue_spread`` distribution invariant directly via the diff --git a/test/test_noticeboard.py b/test/test_noticeboard.py index f85efc3..188064b 100644 --- a/test/test_noticeboard.py +++ b/test/test_noticeboard.py @@ -6,7 +6,7 @@ from bocpy import (Cown, drain, notice_delete, notice_read, notice_sync, notice_update, notice_write, noticeboard, - noticeboard_version, receive, + receive, REMOVED, send, start, TIMEOUT, wait, when) import bocpy._core as _core @@ -1400,81 +1400,6 @@ def step3(x, _): receive_asserts(3) - def test_snapshot_reused_when_no_writes_intervene(self): - """Version is unchanged across read-only behaviors. - - With no writes in flight, the version counter must stay constant - no matter how many read-only behaviors run. - """ - from bocpy import noticeboard_version - - x = Cown(0) - - @when(x) - def seed(x): - notice_write("k", 1) - notice_sync() - - # Drain the seed behavior by chaining a subsequent read; this - # ensures the write has landed before we sample the version. - @when(x, seed) - def warm(x, _): - send("assert", (notice_read("k"), 1)) - - receive_asserts() - - # Now run N read-only behaviors and watch the version. - before = noticeboard_version() - n = 20 - - for _ in range(n): - @when(x) - def reader(x): - send("assert", (notice_read("k"), 1)) - - receive_asserts(n) - - after = noticeboard_version() - assert after == before, ( - f"version moved from {before} to {after} across {n} " - f"read-only behaviors; no writes were issued") - - def test_writes_advance_version(self): - """Each notice_write strictly increases the version counter.""" - x = Cown(0) - - @when(x) - def seed(x): - notice_write("vk", 0) - notice_sync() - # Reading noticeboard_version() from the test thread would - # race the noticeboard mutator thread; the result-cown of - # this behavior carries the sample safely into `check`. - return noticeboard_version() - - n = 5 - for _ in range(n): - @when(x) - def writer(x): - notice_write("vk", 1) - notice_sync() - - @when(x) - def sample(x): - # Runs after every writer because all share `x`. Every - # writer's notice_sync() committed before its behavior - # released x, so the version we read here reflects all of - # them. - return noticeboard_version() - - @when(seed, sample) - def check(before, after): - # `before` and `after` are the result-cowns of the upstream - # behaviors; their values are the noticeboard_version() ints. - send("assert", (after.value - before.value, n)) - - receive_asserts() - def test_cross_behavior_visibility_preserved(self): """Sanity: write in A is visible in B (no regression vs baseline).""" x = Cown(0) @@ -1491,19 +1416,21 @@ def reader(x, _): receive_asserts() -class TestNoticeboardVersionAPI: - """Public-API surface tests for ``noticeboard_version``.""" +class TestNoticeSyncReturnType: + """Pin the documented return type of ``notice_sync()`` (None).""" - def test_returns_int(self): - """The version is an int.""" - from bocpy import noticeboard_version - v = noticeboard_version() - assert isinstance(v, int) - assert v >= 0 + @classmethod + def teardown_class(cls): + """Drain the runtime after the suite.""" + wait() + + def test_returns_none_inside_behavior(self): + x = Cown(0) - def test_monotonic(self): - """The version never decreases between consecutive reads.""" - from bocpy import noticeboard_version - a = noticeboard_version() - b = noticeboard_version() - assert b >= a + @when(x) + def _(x): + notice_write("rk", 1) + result = notice_sync() + send("assert", (result, None)) + + receive_asserts() diff --git a/test/test_public_c_abi.py b/test/test_public_c_abi.py new file mode 100644 index 0000000..dc2d64e --- /dev/null +++ b/test/test_public_c_abi.py @@ -0,0 +1,258 @@ +"""Lightweight smoke tests for the bocpy public C ABI. + +Compile-time and runtime behaviour of the ABI is covered by the +standalone ``templates/c_abi_consumer`` extension, which CI builds and +imports separately. The tests here cover only what does not need a +C compiler: + + * ``get_include`` / ``get_sources`` shape. + * The wheel allow-list (no internal ``.h`` / ``.c`` leaks). + * Byte-identity between the MSVC atomic bodies in ``boc_compat.c`` and + ``bocpy_msvc.c``. + * Static parameter-signature parity between the prototypes in + ``bocpy.h`` and the bodies in ``bocpy_msvc.c``. + +See :ref:`c-abi` for the full usage contract. +""" + +from __future__ import annotations + +import os +import pathlib +import re +import sys +import textwrap + +import pytest + +import bocpy + + +EXPECTED_PUBLIC_C_FILES = {"bocpy.h", "xidata.h", "bocpy_msvc.c"} + +# Filename extensions a wheel install of bocpy is allowed to ship. +_ALLOWED_SHIPPED_EXTS = { + ".py", # source modules + ".pyc", # bytecode in __pycache__ + ".pyi", # type stubs + ".so", # Linux/BSD compiled extensions + ".pyd", # Windows compiled extensions + ".dylib", # macOS dynamic libraries (defensive) + ".dll", # Windows dynamic libraries (defensive) + ".txt", # bocpy.examples ships menu.txt / cheese.txt +} +# Filenames (full basename, no extension) that are allowed even +# though they don't match _ALLOWED_SHIPPED_EXTS. +_ALLOWED_SHIPPED_NAMES = {"py.typed"} + +EXPECTED_ATOMIC_NAMES = { + "atomic_load", + "atomic_store", + "atomic_fetch_add", + "atomic_compare_exchange_strong", +} + + +# --------------------------------------------------------------------------- +# get_include / get_sources +# --------------------------------------------------------------------------- + + +def test_get_include_points_at_headers(): + inc = bocpy.get_include() + assert os.path.isabs(inc) + assert os.path.isfile(os.path.join(inc, "bocpy", "bocpy.h")) + assert os.path.isfile(os.path.join(inc, "bocpy", "xidata.h")) + + +def test_get_sources_shape(): + sources = bocpy.get_sources() + if sys.platform == "win32": + assert len(sources) == 1 + assert sources[0].endswith("bocpy_msvc.c") + assert os.path.isfile(sources[0]) + else: + assert sources == [] + + +# --------------------------------------------------------------------------- +# Wheel allow-list (no internal .h / .c leaks) +# --------------------------------------------------------------------------- + + +def _assert_only_public_artefacts(package_dir: str) -> None: + """Walk ``package_dir`` and assert every shipped file is allowed. + + ``.c`` / ``.h`` files must appear in :data:`EXPECTED_PUBLIC_C_FILES`. + Every other file must either match :data:`_ALLOWED_SHIPPED_EXTS` + by extension or :data:`_ALLOWED_SHIPPED_NAMES` by exact basename. + Anything else is treated as an internal-implementation leak. + """ + forbidden_c = set() + forbidden_other = set() + for _root, _dirs, files in os.walk(package_dir, followlinks=True): + for name in files: + ext = os.path.splitext(name)[1] + if ext in (".c", ".h"): + if name not in EXPECTED_PUBLIC_C_FILES: + forbidden_c.add(name) + elif ext in _ALLOWED_SHIPPED_EXTS: + continue + elif name in _ALLOWED_SHIPPED_NAMES: + continue + else: + forbidden_other.add(name) + assert not forbidden_c, ( + f"forbidden internal C/H files shipped: {sorted(forbidden_c)}") + assert not forbidden_other, ( + f"forbidden internal files shipped (unknown extension): " + f"{sorted(forbidden_other)}") + + +@pytest.mark.skipif( + os.environ.get("BOCPY_TEST_WHEEL") != "1", + reason="set BOCPY_TEST_WHEEL=1 to run wheel-content checks") +def test_wheel_ships_no_internal_files(): + package_dir = os.path.dirname(bocpy.__file__) + _assert_only_public_artefacts(package_dir) + + +def test_wheel_allowlist_assertion_actually_fires(tmp_path): + fake = tmp_path / "fake_pkg" + fake.mkdir() + (fake / "bocpy.h").write_text("/* allowed */\n") + (fake / "compat.h").write_text("/* forbidden */\n") + with pytest.raises(AssertionError) as exc_info: + _assert_only_public_artefacts(str(fake)) + assert "compat.h" in str(exc_info.value) + + +def test_wheel_allowlist_rejects_unknown_extension(tmp_path): + """An internal artefact with a non-C/H extension must also be flagged.""" + fake = tmp_path / "fake_pkg" + fake.mkdir() + (fake / "bocpy.h").write_text("/* allowed */\n") + (fake / "secrets.json").write_text("{}\n") + with pytest.raises(AssertionError) as exc_info: + _assert_only_public_artefacts(str(fake)) + assert "secrets.json" in str(exc_info.value) + + +# --------------------------------------------------------------------------- +# MSVC atomic bodies in lockstep (boc_compat.c vs bocpy_msvc.c) +# --------------------------------------------------------------------------- + + +_MARKER_BEGIN = "/* @atomic-bodies-begin */" +_MARKER_END = "/* @atomic-bodies-end */" + + +def _extract_marker_region(path: str) -> str: + text = pathlib.Path(path).read_text() + begin = text.find(_MARKER_BEGIN) + end = text.find(_MARKER_END) + assert begin != -1, f"begin marker missing in {path}" + assert end != -1, f"end marker missing in {path}" + assert begin < end, f"markers out of order in {path}" + return text[begin + len(_MARKER_BEGIN):end] + + +def test_msvc_bodies_in_lockstep(): + repo_root = pathlib.Path(__file__).resolve().parent.parent + compat_c = repo_root / "src" / "bocpy" / "boc_compat.c" + msvc_c = (repo_root / "src" / "bocpy" / "include" / "bocpy" + / "bocpy_msvc.c") + if not compat_c.is_file() or not msvc_c.is_file(): + pytest.skip( + "source files not present (running against installed wheel)") + a = _extract_marker_region(str(compat_c)) + b = _extract_marker_region(str(msvc_c)) + assert a == b, "marker regions differ — atomic bodies have drifted" + + +# --------------------------------------------------------------------------- +# Static prototype/body parameter-signature parity (bocpy.h vs bocpy_msvc.c) +# --------------------------------------------------------------------------- + + +def _extract_atomic_signatures(text: str) -> dict[str, str]: + """Return the parameter list of every atomic declaration in ``text``. + + The result is a mapping ``{name: parenthesised-param-list}`` with + one entry per occurrence of an :data:`EXPECTED_ATOMIC_NAMES` name + followed by a ``(``. The match walks balanced parentheses forward + from the opening ``(`` so multi-line declarations (e.g. the three- + line CAS prototype) are captured intact. Whitespace inside the + captured signature is normalised so single-line and multi-line + shapes compare equal. + """ + out: dict[str, str] = {} + name_re = re.compile( + r"\b(" + "|".join(re.escape(n) for n in EXPECTED_ATOMIC_NAMES) + + r")\s*\(") + for m in name_re.finditer(text): + name = m.group(1) + i = m.end() - 1 + depth = 0 + j = i + while j < len(text): + c = text[j] + if c == "(": + depth += 1 + elif c == ")": + depth -= 1 + if depth == 0: + j += 1 + break + j += 1 + out[name] = " ".join(text[i:j].split()) + return out + + +def test_msvc_prototypes_match_bodies(): + inc = bocpy.get_include() + bocpy_h = pathlib.Path(inc, "bocpy", "bocpy.h").read_text() + msvc_c = pathlib.Path(inc, "bocpy", "bocpy_msvc.c").read_text() + d_h = _extract_atomic_signatures(bocpy_h) + d_msvc = _extract_atomic_signatures(msvc_c) + assert len(d_h) == 4, ( + f"bocpy.h: expected 4 atomic decls, got {sorted(d_h)}") + assert len(d_msvc) == 4, ( + f"bocpy_msvc.c: expected 4 atomic decls, got {sorted(d_msvc)}") + assert set(d_h.keys()) == EXPECTED_ATOMIC_NAMES + assert set(d_msvc.keys()) == EXPECTED_ATOMIC_NAMES + assert d_h == d_msvc, ( + "prototype/body parameter signatures diverge between bocpy.h and " + "bocpy_msvc.c") + + +def test_msvc_prototype_extraction_actually_fires(): + """Lock the extractor against vacuous-pass and multi-line regression.""" + _cas_one_line = ( + "bool atomic_compare_exchange_strong(" + "atomic_int_least64_t *ptr, " + "atomic_int_least64_t *expected, " + "int_least64_t desired);") + single_line_all_four = textwrap.dedent("""\ + int_least64_t atomic_load(atomic_int_least64_t *ptr); + void atomic_store(atomic_int_least64_t *ptr, int_least64_t value); + int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value); + """) + _cas_one_line + "\n" + d = _extract_atomic_signatures(single_line_all_four) + assert len(d) == 4 + assert set(d.keys()) == EXPECTED_ATOMIC_NAMES + + multi_line_cas = textwrap.dedent("""\ + int_least64_t atomic_load(atomic_int_least64_t *ptr); + void atomic_store(atomic_int_least64_t *ptr, int_least64_t value); + int_least64_t atomic_fetch_add(atomic_int_least64_t *ptr, int_least64_t value); + bool atomic_compare_exchange_strong(atomic_int_least64_t *ptr, + atomic_int_least64_t *expected, + int_least64_t desired); + """) + d2 = _extract_atomic_signatures(multi_line_cas) + assert len(d2) == 4, f"multi-line CAS not captured: {sorted(d2)}" + assert set(d2.keys()) == EXPECTED_ATOMIC_NAMES + + no_atomics = "int unrelated(int x) { return x + 1; }\n" + assert _extract_atomic_signatures(no_atomics) == {} diff --git a/test/test_transpiler.py b/test/test_transpiler.py index 2f4d51e..2f4007f 100644 --- a/test/test_transpiler.py +++ b/test/test_transpiler.py @@ -422,15 +422,15 @@ def identity(x): assert info.captures == [] -class TestExportDecoratorStripping: - """Generated behavior functions must not carry any decorators.""" +class TestExportDecoratorComposition: + """Decorator handling: @when is stripped, others are preserved.""" @staticmethod def _export(source, path="/tmp/test.py"): tree = ast.parse(textwrap.dedent(source)) return export_module(tree, path) - def test_no_decorator_on_behavior(self): + def test_when_stripped_from_behavior(self): result = self._export("""\ from bocpy import when, whencall, Cown @@ -443,9 +443,293 @@ def f(x): gen_tree = ast.parse(result.code) for node in ast.walk(gen_tree): if isinstance(node, ast.FunctionDef) and node.name.startswith("__behavior__"): - assert node.decorator_list == [], ( - f"{node.name} still has decorators" + for dec in node.decorator_list: + dec_src = ast.unparse(dec) + assert "when" not in dec_src, ( + f"{node.name} still has @when decorator" + ) + + def test_below_decorator_preserved(self): + result = self._export("""\ + from bocpy import when, whencall, Cown + import functools + + x = Cown(1) + + def identity(fn): + return fn + + @when(x) + @identity + def f(x): + return x.value + """) + gen_tree = ast.parse(result.code) + found = False + for node in ast.walk(gen_tree): + if isinstance(node, ast.FunctionDef) and node.name.startswith("__behavior__"): + assert len(node.decorator_list) == 1, ( + f"expected 1 decorator, got {len(node.decorator_list)}" ) + assert ast.unparse(node.decorator_list[0]) == "identity" + found = True + assert found, "no __behavior__ function found" + + def test_above_decorator_raises(self): + import pytest + with pytest.raises(SyntaxError, match="above @when"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + def log_calls(fn): + return fn + + @log_calls + @when(x) + def f(x): + return x.value + """) + + def test_unresolvable_decorator_name_raises(self): + import pytest + with pytest.raises(SyntaxError, match="not_importable"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + @when(x) + @not_importable + def f(x): + return x.value + """) + + def test_decorator_with_module_level_constant_arg(self): + result = self._export("""\ + from bocpy import when, whencall, Cown + + MAX_RETRIES = 3 + + def retry(n): + def decorator(fn): + return fn + return decorator + + x = Cown(1) + + @when(x) + @retry(MAX_RETRIES) + def f(x): + return x.value + """) + gen_tree = ast.parse(result.code) + for node in ast.walk(gen_tree): + if isinstance(node, ast.FunctionDef) and node.name.startswith("__behavior__"): + assert len(node.decorator_list) == 1 + assert "retry" in ast.unparse(node.decorator_list[0]) + + def test_async_def_with_when_raises(self): + import pytest + with pytest.raises(SyntaxError, match="async"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + @when(x) + async def f(x): + return x.value + """) + + def test_lambda_in_decorator_does_not_false_positive(self): + """Names bound by a Lambda inside a decorator must not be flagged.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + def retry(fn): + def deco(target): + return target + return deco + + @when(x) + @retry(lambda x: x * 2) + def f(x): + return x.value + """) + assert "__behavior__" in result.code + + def test_comprehension_in_decorator_does_not_false_positive(self): + """Comprehension targets are local to the comprehension scope.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + REGISTRY = [1, 2, 3] + + def register(items): + def deco(fn): + return fn + return deco + + @when(x) + @register([item for item in REGISTRY]) + def f(x): + return x.value + """) + assert "__behavior__" in result.code + + def test_genexp_in_decorator_does_not_false_positive(self): + """Generator-expression bound names are local to the genexp.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + REGISTRY = [1, 2, 3] + + def use(items): + def deco(fn): + return fn + return deco + + @when(x) + @use(sum(item for item in REGISTRY)) + def f(x): + return x.value + """) + assert "__behavior__" in result.code + + def test_dictcomp_in_decorator_does_not_false_positive(self): + """DictComp key/value names are local to the DictComp scope.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + REGISTRY = [1, 2, 3] + + def use(d): + def deco(fn): + return fn + return deco + + @when(x) + @use({k: k * 2 for k in REGISTRY}) + def f(x): + return x.value + """) + assert "__behavior__" in result.code + + def test_staticmethod_below_when_raises(self): + import pytest + with pytest.raises(SyntaxError, match="staticmethod"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + @when(x) + @staticmethod + def f(x): + return x.value + """) + + def test_classmethod_below_when_raises(self): + import pytest + with pytest.raises(SyntaxError, match="classmethod"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + @when(x) + @classmethod + def f(x): + return x.value + """) + + def test_property_below_when_raises(self): + import pytest + with pytest.raises(SyntaxError, match="property"): + self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + @when(x) + @property + def f(x): + return x.value + """) + + def test_stacked_below_decorators_preserved_in_order(self): + """Multiple below-decorators are preserved with their source order.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + x = Cown(1) + + def deco_a(fn): + return fn + + def deco_b(fn): + return fn + + @when(x) + @deco_a + @deco_b + def f(x): + return x.value + """) + gen_tree = ast.parse(result.code) + for node in ast.walk(gen_tree): + if isinstance(node, ast.FunctionDef) and node.name.startswith("__behavior__"): + names = [ast.unparse(d) for d in node.decorator_list] + assert names == ["deco_a", "deco_b"], names + + def test_annassign_constant_resolves_in_decorator(self): + """``X: int = 3`` makes ``X`` resolvable to a decorator argument.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + MAX_RETRIES: int = 3 + + def retry(n): + def deco(fn): + return fn + return deco + + x = Cown(1) + + @when(x) + @retry(MAX_RETRIES) + def f(x): + return x.value + """) + assert "__behavior__" in result.code + + def test_tuple_constant_target_resolves_in_decorator(self): + """Tuple-target uppercase assignment makes targets resolvable.""" + result = self._export("""\ + from bocpy import when, whencall, Cown + + A, B = 1, 2 + + def use(x): + def deco(fn): + return fn + return deco + + x = Cown(1) + + @when(x) + @use(A + B) + def f(x): + return x.value + """) + assert "__behavior__" in result.code class TestExportFileRewrite: