diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2fb658b..db33602 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,7 +27,9 @@ jobs: steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - repository: google/oss-fuzz + # Temporary, for testing use my branch with the updated build scripts. + repository: stanfromireland/oss-fuzz + ref: merge-cpython3-python3-libraries path: oss-fuzz persist-credentials: false diff --git a/Makefile b/Makefile index 01df3e8..70bcb16 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ -all: fuzzer-binascii fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo +C_FUZZ_TESTS := $(shell cat fuzz_tests.txt) + +all: fuzzer-binascii fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo $(C_FUZZ_TESTS) PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config +CFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) $(CPYTHON_MODLIBS) -Wl,--allow-multiple-definition @@ -42,3 +45,8 @@ fuzzer-xml: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml fuzzer-zoneinfo: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo + +# C fuzz targets +$(C_FUZZ_TESTS): %: fuzzer.c + clang $(CFLAGS) -D _Py_FUZZ_ONE -D _Py_FUZZ_$@ -c -Wno-unused-function fuzzer.c -o $@.o + clang++ $(CXXFLAGS) -rdynamic $@.o $(LIB_FUZZING_ENGINE) $(LDFLAGS) -o $@ diff --git a/dictionaries/fuzz_elementtree_parsewhole.dict b/dictionaries/fuzz_elementtree_parsewhole.dict new file mode 100644 index 0000000..e1b58cd --- /dev/null +++ b/dictionaries/fuzz_elementtree_parsewhole.dict @@ -0,0 +1,134 @@ +tok_1="<" +tok_2=">" +tok_3="/" +tok_4="" +tok_6="" +tok_7="version" +tok_8="encoding" +tok_9="UTF-8" +tok_9a="UTF-16" +tok_9b="ASCII" +tok_9c="LATIN-1" +tok_9d="UTF-32" +tok_9e="UTF-7" +tok_10="\"" +tok_11="&" +tok_11a="&#" +tok_11b=";" +tok_12="'" +tok_13="" +tok_14="" +tag_doctype="" +tag_open_close="" +tag_open_exclamation="" +tag_xml_q="" + +encoding_utf="UTF-" +encoding_iso1="ISO-8859" +encoding_iso3="ISO-10646-UCS" +encoding_iso5="ISO-LATIN-1" +encoding_jis="SHIFT_JIS" +encoding_utf7="UTF-7" +encoding_utf16le="UTF-16BE" +encoding_utf16le="UTF-16LE" +encoding_ascii="US-ASCII" +encoding_latin1="latin1" diff --git a/dictionaries/fuzz_json_loads.dict b/dictionaries/fuzz_json_loads.dict new file mode 100644 index 0000000..ad64917 --- /dev/null +++ b/dictionaries/fuzz_json_loads.dict @@ -0,0 +1,40 @@ +"0" +",0" +":0" +"0:" +"-1.2e+3" + +"true" +"false" +"null" + +"\"\"" +",\"\"" +":\"\"" +"\"\":" + +"{}" +",{}" +":{}" +"{\"\":0}" +"{{}}" + +"[]" +",[]" +":[]" +"[0]" +"[[]]" + +"''" +"\\" +"\\b" +"\\f" +"\\n" +"\\r" +"\\t" +"\\u0000" +"\\x00" +"\\0" +"\\uD800\\uDC00" +"\\uDBFF\\uDFFF" + diff --git a/dictionaries/fuzz_pycompile.dict b/dictionaries/fuzz_pycompile.dict new file mode 100644 index 0000000..322d818 --- /dev/null +++ b/dictionaries/fuzz_pycompile.dict @@ -0,0 +1,186 @@ +# bits of syntax +"( " +") " +"[ " +"] " +": " +", " +"; " +"{ " +"} " + +# operators +"+ " +"- " +"* " +"** " +"/ " +"// " +"| " +"& " +"< " +"> " +"= " +". " +"% " +"` " +"^ " +"~ " +"@ " +"== " +"!= " +"<> " +"<< " +"<= " +">= " +">> " +"+= " +"-= " +"*= " +"** " +"/= " +"//= " +"|= " +"%= " +"&= " +"^= " +"<<= " +">>= " +"**= " +":= " +"@= " + +# whitespace +" " +"\\t" +":\\n " +"\\\\n" + +# type signatures and functions +"-> " +": List[int]" +": Dict[int, str]" + +"# type:" +"# type: List[int]" +"# type: Dict[int, str]" + +", *" +", /" +", *args" +", **kwargs" +", x=42" + + +# literals +"0x0a" +"0b0000" +"42" +"0o70" +"42j" +"42.01" +"-5" +"+42e-3" +"0_0_0" +"1e1_0" +".1_4" + +"{}" + +# variable names +"x" +"y" +"_" +"*x" + +# strings +"r'x'" + +"b'x'" + +"rb\"x\"" + +"br\"x\"" + +"u\"x\"" + +"f'{x + 5}'" +"f\"{x + 5}\"" +"f'{s!r}'" +"f'{s!s}'" +"f'{s!a}'" +"f'{x=}'" + +"t'{s + 5}'" +"t\"{s + 5}\"" +"tr's'" +"rt\"s\"" + +"'''" +"\"\"\"" + +"\\N" +"\\u" +"\\x" + +# keywords +"def " +"del " +"pass " +"break " +"continue " +"return " +"raise " +"from " +"import " +".. " +"... " +"__future__ " +"as " +"global " +"nonlocal " +"assert " +"print " +"if " +"elif " +"else: " +"while " +"try: " +"except " +"except* " +"finally: " +"with " +"lambda " +"or " +"and " +"not " +"None " +"__peg_parser__" +"True " +"False " +"yield " +"async " +"await " +"for " +"in " +"is " +"class " +"match " +"case " +"type " +"lazy " + +# shebangs and encodings +"#!" +"# coding:" +"# coding=" +"# coding: latin-1" +"# coding=latin-1" +"# coding: utf-8" +"# coding=utf-8" +"# coding: ascii" +"# coding=ascii" +"# coding: cp860" +"# coding=cp860" +"# coding: gbk" +"# coding=gbk" diff --git a/dictionaries/fuzz_sre_compile.dict b/dictionaries/fuzz_sre_compile.dict new file mode 100644 index 0000000..961306a --- /dev/null +++ b/dictionaries/fuzz_sre_compile.dict @@ -0,0 +1,219 @@ +"?" +"abc" +"()" +"[]" +"abc|def" +"abc|def|ghi" +"^xxx$" +"ab\\b\\d\\bcd" +"\\w|\\d" +"a*?" +"abc+" +"abc+?" +"xyz?" +"xyz??" +"xyz{0,1}" +"xyz{0,1}?" +"xyz{93}" +"xyz{1,32}" +"xyz{1,32}?" +"xyz{1,}" +"xyz{1,}?" +"a\\fb\\nc\\rd\\te\\vf" +"a\\nb\\bc" +"(?:foo)" +"(?: foo )" +"foo|(bar|baz)|quux" +"foo(?=bar)baz" +"foo(?!bar)baz" +"foo(?<=bar)baz" +"foo(?)" +"(?.)" +"(?.)\\k" diff --git a/fuzz_tests.txt b/fuzz_tests.txt new file mode 100644 index 0000000..ea6f982 --- /dev/null +++ b/fuzz_tests.txt @@ -0,0 +1,11 @@ +fuzz_builtin_float +fuzz_builtin_int +fuzz_builtin_unicode +fuzz_json_loads +fuzz_sre_compile +fuzz_sre_match +fuzz_csv_reader +fuzz_struct_unpack +fuzz_ast_literal_eval +fuzz_elementtree_parsewhole +fuzz_pycompile diff --git a/fuzzer.c b/fuzzer.c new file mode 100644 index 0000000..02afb01 --- /dev/null +++ b/fuzzer.c @@ -0,0 +1,727 @@ +/* A fuzz test for CPython. + + The only exposed function is LLVMFuzzerTestOneInput, which is called by + fuzzers and by the _fuzz module for smoke tests. + + To build exactly one fuzz test, as when running in oss-fuzz etc., + build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_. e.g. to build + LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with + -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. + + See the source code for LLVMFuzzerTestOneInput for details. */ + +#ifndef Py_BUILD_CORE_MODULE +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include +#include +#include + +/* Fuzz PyFloat_FromString as a proxy for float(str). */ +static int fuzz_builtin_float(const char* data, size_t size) { + PyObject* s = PyBytes_FromStringAndSize(data, size); + if (s == NULL) return 0; + PyObject* f = PyFloat_FromString(s); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + + Py_XDECREF(f); + Py_DECREF(s); + return 0; +} + +#define MAX_INT_TEST_SIZE 0x10000 + +/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ +static int fuzz_builtin_int(const char* data, size_t size) { + /* Ignore test cases with very long ints to avoid timeouts + int("9" * 1000000) is not a very interesting test caase */ + if (size < 1 || size > MAX_INT_TEST_SIZE) { + return 0; + } + // Use the first byte to pick a base + int base = ((unsigned char) data[0]) % 37; + if (base == 1) { + // 1 is the only number between 0 and 36 that is not a valid base. + base = 0; + } + + data += 1; + size -= 1; + + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + return 0; + } + PyObject* l = PyLong_FromUnicodeObject(s, base); + if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + PyErr_Clear(); + Py_XDECREF(l); + Py_DECREF(s); + return 0; +} + +/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ +static int fuzz_builtin_unicode(const char* data, size_t size) { + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + Py_XDECREF(s); + return 0; +} + + +PyObject* struct_unpack_method = NULL; +PyObject* struct_error = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_struct_unpack(void) { + /* Import struct.unpack */ + PyObject* struct_module = PyImport_ImportModule("struct"); + if (struct_module == NULL) { + return 0; + } + struct_error = PyObject_GetAttrString(struct_module, "error"); + if (struct_error == NULL) { + return 0; + } + struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack"); + return struct_unpack_method != NULL; +} +/* Fuzz struct.unpack(x, y) */ +static int fuzz_struct_unpack(const char* data, size_t size) { + /* Everything up to the first null byte is considered the + format. Everything after is the buffer */ + const char* first_null = memchr(data, '\0', size); + if (first_null == NULL) { + return 0; + } + + size_t format_length = first_null - data; + size_t buffer_length = size - format_length - 1; + + PyObject* pattern = PyBytes_FromStringAndSize(data, format_length); + if (pattern == NULL) { + return 0; + } + PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length); + if (buffer == NULL) { + Py_DECREF(pattern); + return 0; + } + + PyObject* unpacked = PyObject_CallFunctionObjArgs( + struct_unpack_method, pattern, buffer, NULL); + /* Ignore any overflow errors, these are easily triggered accidentally */ + if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Clear(); + } + /* The pascal format string will throw a negative size when passing 0 + like: struct.unpack('0p', b'') */ + if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) { + PyErr_Clear(); + } + /* Ignore any ValueError, these are triggered by non-ASCII format. */ + if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + /* Ignore any struct.error exceptions, these can be caused by invalid + formats or incomplete buffers both of which are common. */ + if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) { + PyErr_Clear(); + } + + Py_XDECREF(unpacked); + Py_DECREF(pattern); + Py_DECREF(buffer); + return 0; +} + + +#define MAX_JSON_TEST_SIZE 0x100000 + +PyObject* json_loads_method = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_json_loads(void) { + /* Import json.loads */ + PyObject* json_module = PyImport_ImportModule("json"); + if (json_module == NULL) { + return 0; + } + json_loads_method = PyObject_GetAttrString(json_module, "loads"); + return json_loads_method != NULL; +} +/* Fuzz json.loads(x) */ +static int fuzz_json_loads(const char* data, size_t size) { + /* Since python supports arbitrarily large ints in JSON, + long inputs can lead to timeouts on boring inputs like + `json.loads("9" * 100000)` */ + if (size > MAX_JSON_TEST_SIZE) { + return 0; + } + PyObject* input_bytes = PyBytes_FromStringAndSize(data, size); + if (input_bytes == NULL) { + return 0; + } + PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes); + if (parsed == NULL) { + /* Ignore ValueError as the fuzzer will more than likely + generate some invalid json and values */ + if (PyErr_ExceptionMatches(PyExc_ValueError) || + /* Ignore RecursionError as the fuzzer generates long sequences of + arrays such as `[[[...` */ + PyErr_ExceptionMatches(PyExc_RecursionError) || + /* Ignore unicode errors, invalid byte sequences are common */ + PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) + ) { + PyErr_Clear(); + } + } + Py_DECREF(input_bytes); + Py_XDECREF(parsed); + return 0; +} + +#define MAX_RE_TEST_SIZE 0x10000 + +PyObject* re_compile_method = NULL; +PyObject* re_error_exception = NULL; +int RE_FLAG_DEBUG = 0; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_sre_compile(void) { + /* Import sre_compile.compile and sre.error */ + PyObject* re_module = PyImport_ImportModule("re"); + if (re_module == NULL) { + return 0; + } + re_compile_method = PyObject_GetAttrString(re_module, "compile"); + if (re_compile_method == NULL) { + return 0; + } + + re_error_exception = PyObject_GetAttrString(re_module, "error"); + if (re_error_exception == NULL) { + return 0; + } + PyObject* debug_flag = PyObject_GetAttrString(re_module, "DEBUG"); + if (debug_flag == NULL) { + return 0; + } + RE_FLAG_DEBUG = PyLong_AsLong(debug_flag); + return 1; +} +/* Fuzz re.compile(x) */ +static int fuzz_sre_compile(const char* data, size_t size) { + /* Ignore really long regex patterns that will timeout the fuzzer */ + if (size > MAX_RE_TEST_SIZE) { + return 0; + } + /* We treat the first 2 bytes of the input as a number for the flags */ + if (size < 2) { + return 0; + } + uint16_t flags = ((uint16_t*) data)[0]; + /* We remove the SRE_FLAG_DEBUG if present. This is because it + prints to stdout which greatly decreases fuzzing speed */ + flags &= ~RE_FLAG_DEBUG; + + /* Pull the pattern from the remaining bytes */ + PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2); + if (pattern_bytes == NULL) { + return 0; + } + PyObject* flags_obj = PyLong_FromUnsignedLong(flags); + if (flags_obj == NULL) { + Py_DECREF(pattern_bytes); + return 0; + } + + /* compiled = re.compile(data[2:], data[0:2] */ + PyObject* compiled = PyObject_CallFunctionObjArgs( + re_compile_method, pattern_bytes, flags_obj, NULL); + /* Ignore ValueError as the fuzzer will more than likely + generate some invalid combination of flags */ + if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + /* Ignore some common errors thrown by sre_parse: + Overflow, Assertion, Recursion and Index */ + if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) || + PyErr_ExceptionMatches(PyExc_AssertionError) || + PyErr_ExceptionMatches(PyExc_RecursionError) || + PyErr_ExceptionMatches(PyExc_IndexError)) + ) { + PyErr_Clear(); + } + /* Ignore re.error */ + if (compiled == NULL && PyErr_ExceptionMatches(re_error_exception)) { + PyErr_Clear(); + } + + Py_DECREF(pattern_bytes); + Py_DECREF(flags_obj); + Py_XDECREF(compiled); + return 0; +} + +/* Some random patterns used to test re.match. + Be careful not to add catostraphically slow regexes here, we want to + exercise the matching code without causing timeouts.*/ +static const char* regex_patterns[] = { + ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]", + "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?", + "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$", + "(?:a*)*", "a{1,2}?" +}; +const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]); +PyObject** compiled_patterns = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_sre_match(void) { + PyObject* re_module = PyImport_ImportModule("re"); + if (re_module == NULL) { + return 0; + } + compiled_patterns = (PyObject**) PyMem_RawMalloc( + sizeof(PyObject*) * NUM_PATTERNS); + if (compiled_patterns == NULL) { + PyErr_NoMemory(); + return 0; + } + + /* Precompile all the regex patterns on the first run for faster fuzzing */ + for (size_t i = 0; i < NUM_PATTERNS; i++) { + PyObject* compiled = PyObject_CallMethod( + re_module, "compile", "y", regex_patterns[i]); + /* Bail if any of the patterns fail to compile */ + if (compiled == NULL) { + return 0; + } + compiled_patterns[i] = compiled; + } + return 1; +} +/* Fuzz re.match(x) */ +static int fuzz_sre_match(const char* data, size_t size) { + if (size < 1 || size > MAX_RE_TEST_SIZE) { + return 0; + } + /* Use the first byte as a uint8_t specifying the index of the + regex to use */ + unsigned char idx = (unsigned char) data[0]; + idx = idx % NUM_PATTERNS; + + /* Pull the string to match from the remaining bytes */ + PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1); + if (to_match == NULL) { + return 0; + } + + PyObject* pattern = compiled_patterns[idx]; + PyObject* match_callable = PyObject_GetAttrString(pattern, "match"); + + PyObject* matches = PyObject_CallOneArg(match_callable, to_match); + + Py_XDECREF(matches); + Py_DECREF(match_callable); + Py_DECREF(to_match); + return 0; +} + +#define MAX_CSV_TEST_SIZE 0x100000 +PyObject* csv_module = NULL; +PyObject* csv_error = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_csv_reader(void) { + /* Import csv and csv.Error */ + csv_module = PyImport_ImportModule("csv"); + if (csv_module == NULL) { + return 0; + } + csv_error = PyObject_GetAttrString(csv_module, "Error"); + return csv_error != NULL; +} +/* Fuzz csv.reader([x]) */ +static int fuzz_csv_reader(const char* data, size_t size) { + if (size < 1 || size > MAX_CSV_TEST_SIZE) { + return 0; + } + /* Ignore non null-terminated strings since _csv can't handle + embedded nulls */ + if (memchr(data, '\0', size) == NULL) { + return 0; + } + + PyObject* s = PyUnicode_FromString(data); + /* Ignore exceptions until we have a valid string */ + if (s == NULL) { + PyErr_Clear(); + return 0; + } + + /* Split on \n so we can test multiple lines */ + PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n"); + if (lines == NULL) { + Py_DECREF(s); + return 0; + } + + PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines); + if (reader) { + /* Consume all of the reader as an iterator */ + PyObject* parsed_line; + while ((parsed_line = PyIter_Next(reader))) { + Py_DECREF(parsed_line); + } + } + + /* Ignore csv.Error because we're probably going to generate + some bad files (embedded new-lines, unterminated quotes etc) */ + if (PyErr_ExceptionMatches(csv_error)) { + PyErr_Clear(); + } + + Py_XDECREF(reader); + Py_DECREF(s); + return 0; +} + +#define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x100000 +PyObject* ast_literal_eval_method = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_ast_literal_eval(void) { + PyObject* ast_module = PyImport_ImportModule("ast"); + if (ast_module == NULL) { + return 0; + } + ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval"); + return ast_literal_eval_method != NULL; +} +/* Fuzz ast.literal_eval(x) */ +static int fuzz_ast_literal_eval(const char* data, size_t size) { + if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) { + return 0; + } + /* Ignore non null-terminated strings since ast can't handle + embedded nulls */ + if (memchr(data, '\0', size) == NULL) { + return 0; + } + + PyObject* s = PyUnicode_FromString(data); + /* Ignore exceptions until we have a valid string */ + if (s == NULL) { + PyErr_Clear(); + return 0; + } + + PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s); + /* Ignore some common errors thrown by ast.literal_eval */ + if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) || + PyErr_ExceptionMatches(PyExc_TypeError) || + PyErr_ExceptionMatches(PyExc_SyntaxError) || + PyErr_ExceptionMatches(PyExc_MemoryError) || + PyErr_ExceptionMatches(PyExc_OverflowError) || + PyErr_ExceptionMatches(PyExc_RecursionError)) + ) { + PyErr_Clear(); + } + + Py_XDECREF(literal); + Py_DECREF(s); + return 0; +} + +#define MAX_ELEMENTTREE_PARSEWHOLE_TEST_SIZE 0x100000 +PyObject* xmlparser_type = NULL; +PyObject* bytesio_type = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_elementtree_parsewhole(void) { + PyObject* elementtree_module = PyImport_ImportModule("_elementtree"); + if (elementtree_module == NULL) { + return 0; + } + xmlparser_type = PyObject_GetAttrString(elementtree_module, "XMLParser"); + Py_DECREF(elementtree_module); + if (xmlparser_type == NULL) { + return 0; + } + + + PyObject* io_module = PyImport_ImportModule("io"); + if (io_module == NULL) { + return 0; + } + bytesio_type = PyObject_GetAttrString(io_module, "BytesIO"); + Py_DECREF(io_module); + if (bytesio_type == NULL) { + return 0; + } + + return 1; +} +/* Fuzz _elementtree.XMLParser._parse_whole(x) */ +static int fuzz_elementtree_parsewhole(const char* data, size_t size) { + if (size > MAX_ELEMENTTREE_PARSEWHOLE_TEST_SIZE) { + return 0; + } + + PyObject *input = PyObject_CallFunction(bytesio_type, "y#", data, (Py_ssize_t)size); + if (input == NULL) { + assert(PyErr_Occurred()); + PyErr_Print(); + abort(); + } + + PyObject *xmlparser_instance = PyObject_CallObject(xmlparser_type, NULL); + if (xmlparser_instance == NULL) { + assert(PyErr_Occurred()); + PyErr_Print(); + abort(); + } + + PyObject *result = PyObject_CallMethod(xmlparser_instance, "_parse_whole", "O", input); + if (result == NULL) { + /* Ignore exception here, which can be caused by invalid XML input */ + PyErr_Clear(); + } else { + Py_DECREF(result); + } + + Py_DECREF(xmlparser_instance); + Py_DECREF(input); + + return 0; +} + +#define MAX_PYCOMPILE_TEST_SIZE 16384 + +static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input}; +const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]); + +static const int optimize_vals[] = {-1, 0, 1, 2}; +const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]); + +/* Fuzz `PyCompileStringExFlags` using a variety of input parameters. + * That function is essentially behind the `compile` builtin */ +static int fuzz_pycompile(const char* data, size_t size) { + // Ignore overly-large inputs, and account for a NUL terminator + if (size > MAX_PYCOMPILE_TEST_SIZE - 1) { + return 0; + } + + // Need 3 bytes for parameter selection + if (size < 3) { + return 0; + } + + // Use first byte to determine element of `start_vals` to use + unsigned char start_idx = (unsigned char) data[0]; + int start = start_vals[start_idx % NUM_START_VALS]; + + // Use second byte to determine element of `optimize_vals` to use + unsigned char optimize_idx = (unsigned char) data[1]; + int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS]; + + // Use third byte to determine compiler flags to use. + unsigned char flags_byte = (unsigned char) data[2]; + PyCompilerFlags flags = _PyCompilerFlags_INIT; + if (flags_byte & 0x01) { + flags.cf_flags |= PyCF_DONT_IMPLY_DEDENT; + } + if (flags_byte & 0x02) { + flags.cf_flags |= PyCF_ONLY_AST; + } + if (flags_byte & 0x04) { + flags.cf_flags |= PyCF_IGNORE_COOKIE; + } + if (flags_byte & 0x08) { + flags.cf_flags |= PyCF_TYPE_COMMENTS; + } + if (flags_byte & 0x10) { + flags.cf_flags |= PyCF_ALLOW_TOP_LEVEL_AWAIT; + } + if (flags_byte & 0x20) { + flags.cf_flags |= PyCF_ALLOW_INCOMPLETE_INPUT; + } + if (flags_byte & 0x40) { + flags.cf_flags |= PyCF_OPTIMIZED_AST; + } + + char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE]; + + // Create a NUL-terminated C string from the remaining input + memcpy(pycompile_scratch, data + 3, size - 3); + // Put a NUL terminator just after the copied data. (Space was reserved already.) + pycompile_scratch[size - 3] = '\0'; + + PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "", start, &flags, optimize); + if (result == NULL) { + /* Compilation failed, most likely from a syntax error. If it was a + SystemError we abort. There's no non-bug reason to raise a + SystemError. */ + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SystemError)) { + PyErr_Print(); + abort(); + } + PyErr_Clear(); + } else { + Py_DECREF(result); + } + + return 0; +} + +/* Run fuzzer and abort on failure. */ +static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { + int rv = fuzzer((const char*) data, size); + if (PyErr_Occurred()) { + /* Fuzz tests should handle expected errors for themselves. + This is last-ditch check in case they didn't. */ + PyErr_Print(); + abort(); + } + /* Someday the return value might mean something, propagate it. */ + return rv; +} + +/* CPython generates a lot of leak warnings for whatever reason. */ +int __lsan_is_turned_off(void) { return 1; } + + +int LLVMFuzzerInitialize(int *argc, char ***argv) { + PyConfig config; + PyConfig_InitPythonConfig(&config); + config.install_signal_handlers = 0; + /* Raise the limit above the default allows exercising larger things + * now that we fall back to the _pylong module for large values. */ + config.int_max_str_digits = 8086; + PyStatus status; + status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]); + if (PyStatus_Exception(status)) { + goto fail; + } + + status = Py_InitializeFromConfig(&config); + if (PyStatus_Exception(status)) { + goto fail; + } + PyConfig_Clear(&config); + + return 0; + +fail: + PyConfig_Clear(&config); + Py_ExitStatusException(status); +} + +/* Fuzz test interface. + This returns the bitwise or of all fuzz test's return values. + + All fuzz tests must return 0, as all nonzero return codes are reserved for + future use -- we propagate the return values for that future case. + (And we bitwise or when running multiple tests to verify that normally we + only return 0.) */ +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + + int rv = 0; + +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int) + rv |= _run_fuzz(data, size, fuzz_builtin_int); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode) + rv |= _run_fuzz(data, size, fuzz_builtin_unicode); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack) + static int STRUCT_UNPACK_INITIALIZED = 0; + if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) { + PyErr_Print(); + abort(); + } else { + STRUCT_UNPACK_INITIALIZED = 1; + } + rv |= _run_fuzz(data, size, fuzz_struct_unpack); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) + static int JSON_LOADS_INITIALIZED = 0; + if (!JSON_LOADS_INITIALIZED && !init_json_loads()) { + PyErr_Print(); + abort(); + } else { + JSON_LOADS_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_json_loads); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile) + static int SRE_COMPILE_INITIALIZED = 0; + if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) { + PyErr_Print(); + abort(); + } else { + SRE_COMPILE_INITIALIZED = 1; + } + + if (SRE_COMPILE_INITIALIZED) { + rv |= _run_fuzz(data, size, fuzz_sre_compile); + } +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match) + static int SRE_MATCH_INITIALIZED = 0; + if (!SRE_MATCH_INITIALIZED && !init_sre_match()) { + PyErr_Print(); + abort(); + } else { + SRE_MATCH_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_sre_match); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader) + static int CSV_READER_INITIALIZED = 0; + if (!CSV_READER_INITIALIZED && !init_csv_reader()) { + PyErr_Print(); + abort(); + } else { + CSV_READER_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_csv_reader); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval) + static int AST_LITERAL_EVAL_INITIALIZED = 0; + if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) { + PyErr_Print(); + abort(); + } else { + AST_LITERAL_EVAL_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_ast_literal_eval); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_elementtree_parsewhole) + static int ELEMENTTREE_PARSEWHOLE_INITIALIZED = 0; + if (!ELEMENTTREE_PARSEWHOLE_INITIALIZED && !init_elementtree_parsewhole()) { + PyErr_Print(); + abort(); + } else { + ELEMENTTREE_PARSEWHOLE_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile) + rv |= _run_fuzz(data, size, fuzz_pycompile); +#endif + return rv; +}