Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mwparserfromhell/parser/ctokenizer/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ SOFTWARE.
#endif

#define malloc PyObject_Malloc // XXX: yuck
#define calloc PyObject_Calloc
#define realloc PyObject_Realloc
#define free PyObject_Free

Expand Down
31 changes: 31 additions & 0 deletions tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from __future__ import annotations

import os
import subprocess
import sys
import textwrap
from collections.abc import Generator
from dataclasses import dataclass

Expand Down Expand Up @@ -145,3 +148,31 @@ def test_describe_context():
assert "" == contexts.describe(0)
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY | contexts.HAS_TEXT)
assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx


@pytest.mark.skipif(CTokenizer is None, reason="CTokenizer not available")
def test_entity_does_not_corrupt_heap():
"""Regression test: the C tokenizer must not mix raw libc calloc with
PyObject_Free when handling an ampersand that is not a valid entity.

Run in a subprocess with PYTHONMALLOC=debug so any allocator mismatch on
the entity-parsing path is reported as a fatal error rather than silent
heap corruption.
"""
program = textwrap.dedent(
"""
import mwparserfromhell
from mwparserfromhell.parser._tokenizer import CTokenizer
assert isinstance(mwparserfromhell.parser.Parser()._tokenizer, CTokenizer)
for text in ("a & b", "{{T|p=a & b}}", "&", "*", "*"):
assert str(mwparserfromhell.parse(text)) == text
"""
)
env = {**os.environ, "PYTHONMALLOC": "debug"}
result = subprocess.run(
[sys.executable, "-c", program], env=env, capture_output=True, text=True
)
assert result.returncode == 0, (
"C tokenizer triggered allocator mismatch under PYTHONMALLOC=debug:\n"
f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
)
Loading