From db1fb5b06d37d37d57d8bd6a00a6562386908a13 Mon Sep 17 00:00:00 2001 From: Aaran McGuire Date: Tue, 5 May 2026 19:55:43 +0100 Subject: [PATCH] Fix integer overflow and undefined behaviour in SAV reader Four arithmetic issues in spss/readstat_sav_read.c: 1. size*count multiplication overflow (sav_parse_records_pass1 and pass2): Two uint32_t values from the file are assigned to size_t then multiplied. On 32-bit platforms the product can overflow, producing a data_len of 0 which causes the parser to skip the record and continue reading at the wrong file position. 2. abs(n_missing_values) undefined behaviour (sav_skip_variable_record): When n_missing_values == INT_MIN, abs() is undefined. On two's- complement targets it typically returns INT_MIN (negative), which when used as a seek offset produces a large wrong seek. Replaced with an explicit conditional and a validity range check (> 3 is always invalid per SPSS spec). 3. uint32 label_len overflow (sav_read_variable_label): label_len near UINT32_MAX causes (label_len+3)/4*4 to wrap to 0, resulting in readstat_malloc(0). Added a 0xFFFF sanity bound (well above the SPSS limit of 255 characters). 4. uint32_t buffer length overflow in long-string value labels: value_buffer_len and label_buffer_len are uint32_t; value_len*4+1 overflows when value_len >= 0x40000000, producing a tiny realloc target. Changed to size_t with an explicit > 0xFFFFFF guard. --- src/spss/readstat_sav_read.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/spss/readstat_sav_read.c b/src/spss/readstat_sav_read.c index 7f494904..43b04e7d 100644 --- a/src/spss/readstat_sav_read.c +++ b/src/spss/readstat_sav_read.c @@ -188,8 +188,10 @@ static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx) { } if (variable.n_missing_values) { int n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; - if (io->seek(abs(n_missing_values) * sizeof(double), READSTAT_SEEK_CUR, io->io_ctx) == -1) { - retval = READSTAT_ERROR_SEEK; + int n_mv_abs = (n_missing_values < 0) ? -n_missing_values : n_missing_values; + if (n_mv_abs > 3) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + if (io->seek((off_t)n_mv_abs * sizeof(double), READSTAT_SEEK_CUR, io->io_ctx) == -1) { + retval = READSTAT_ERROR_READ; goto cleanup; } } @@ -212,6 +214,11 @@ static readstat_error_t sav_read_variable_label(spss_varinfo_t *info, sav_ctx_t if (label_len == 0) goto cleanup; + if (label_len > 0xFFFF) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + label_capacity = (label_len + 3) / 4 * 4; if ((label_buf = readstat_malloc(label_capacity)) == NULL) { retval = READSTAT_ERROR_MALLOC; @@ -622,9 +629,9 @@ static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx) { } if (ctx->bswap) n_lines = byteswap4(n_lines); - if (io->seek(n_lines * SPSS_DOC_LINE_SIZE, READSTAT_SEEK_CUR, io->io_ctx) == -1) { - retval = READSTAT_ERROR_SEEK; - goto cleanup; + if (n_lines > 0x3FFFFFF) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + if (io->seek((off_t)n_lines * SPSS_DOC_LINE_SIZE, READSTAT_SEEK_CUR, io->io_ctx) == -1) { + retval = READSTAT_ERROR_READ; goto cleanup; } cleanup: @@ -1113,7 +1120,7 @@ static readstat_error_t sav_parse_long_string_value_labels_record(const void *da for (i=0; i data_end) { retval = READSTAT_ERROR_PARSE; @@ -1126,7 +1133,8 @@ static readstat_error_t sav_parse_long_string_value_labels_record(const void *da data_ptr += sizeof(uint32_t); - value_buffer_len = value_len*4+1; + if (value_len > 0xFFFFFF) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + value_buffer_len = (size_t)value_len*4+1; value_buffer = readstat_realloc(value_buffer, value_buffer_len); if (value_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; @@ -1155,7 +1163,8 @@ static readstat_error_t sav_parse_long_string_value_labels_record(const void *da data_ptr += sizeof(uint32_t); - label_buffer_len = label_len*4+1; + if (label_len > 0xFFFFFF) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + label_buffer_len = (size_t)label_len*4+1; label_buffer = readstat_realloc(label_buffer, label_buffer_len); if (label_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; @@ -1322,6 +1331,10 @@ static readstat_error_t sav_parse_records_pass1(sav_ctx_t *ctx) { uint32_t subtype = extra_info[0]; size_t size = extra_info[1]; size_t count = extra_info[2]; + if (size > 0 && count > 0x7FFFFFFF / size) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } data_len = size * count; if (subtype == SAV_RECORD_SUBTYPE_INTEGER_INFO) { if (data_len > sizeof(data_buf)) { @@ -1410,6 +1423,10 @@ static readstat_error_t sav_parse_records_pass2(sav_ctx_t *ctx) { uint32_t subtype = extra_info[0]; size_t size = extra_info[1]; size_t count = extra_info[2]; + if (size > 0 && count > 0x7FFFFFFF / size) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } data_len = size * count; if (data_buf_capacity < data_len) { if ((data_buf = readstat_realloc(data_buf, data_buf_capacity = data_len)) == NULL) {