From 203716e1801aa2539a370a4d70608b040cc2b5eb Mon Sep 17 00:00:00 2001 From: Aaran McGuire Date: Tue, 5 May 2026 19:54:26 +0100 Subject: [PATCH] Fix use-after-free and memory leaks when realloc returns NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five locations call realloc and assign the result directly back to the original pointer. When realloc fails it returns NULL without freeing the old pointer — the old allocation is leaked and the NULL is immediately dereferenced. - spss/readstat_por_read.c: string buffer growth in maybe_read_string() — realloc result written to ctx->string_buffer, then memset crashes - spss/readstat_por_read.c: variable label in read_variable_label_record() — realloc result written to varinfo->label, then readstat_convert crashes - sas/readstat_xport_read.c: xport_read_labels_v8() — name and label pointers overwritten with NULL, then read_bytes crashes - sas/readstat_xport_read.c: xport_read_labels_v9() — name, label, format, informat all overwritten with NULL - stata/readstat_dta_read.c: utf8_buffer in dta_handle_value_labels() — uses raw realloc instead of readstat_realloc, bypassing the 16 MiB safety cap; a crafted DTA with a large value-label table can allocate 4x the intended limit - readstat_writer.c: six capacity-doubling realloc calls (value_labels, label_sets, variables, string_refs, notes, label_set->variables) all replace the pointer before checking for NULL --- src/readstat_writer.c | 39 +++++++++++++++++++++++++---------- src/sas/readstat_xport_read.c | 32 +++++++++++++++++++++------- src/spss/readstat_por_read.c | 22 ++++++++++++-------- src/stata/readstat_dta_read.c | 2 +- 4 files changed, 67 insertions(+), 28 deletions(-) diff --git a/src/readstat_writer.c b/src/readstat_writer.c index 82219f2b..6eaba639 100644 --- a/src/readstat_writer.c +++ b/src/readstat_writer.c @@ -28,6 +28,7 @@ static int readstat_compare_string_refs(const void *elem1, const void *elem2) { readstat_string_ref_t *readstat_string_ref_init(const char *string) { size_t len = strlen(string) + 1; readstat_string_ref_t *ref = calloc(1, sizeof(readstat_string_ref_t) + len); + if (ref == NULL) return NULL; ref->first_o = -1; ref->first_v = -1; ref->len = len; @@ -35,7 +36,7 @@ readstat_string_ref_t *readstat_string_ref_init(const char *string) { return ref; } -readstat_writer_t *readstat_writer_init() { +readstat_writer_t *readstat_writer_init(void) { readstat_writer_t *writer = calloc(1, sizeof(readstat_writer_t)); writer->variables = calloc(VARIABLES_INITIAL_CAPACITY, sizeof(readstat_variable_t *)); @@ -75,23 +76,28 @@ static void readstat_label_set_free(readstat_label_set_t *label_set) { free(label_set); } -static void readstat_copy_label(readstat_value_label_t *value_label, const char *label) { +static readstat_error_t readstat_copy_label(readstat_value_label_t *value_label, const char *label) { if (label && strlen(label)) { value_label->label_len = strlen(label); - value_label->label = malloc(value_label->label_len); - memcpy(value_label->label, label, value_label->label_len); + value_label->label = malloc(value_label->label_len + 1); + if (value_label->label == NULL) return READSTAT_ERROR_MALLOC; + memcpy(value_label->label, label, value_label->label_len + 1); } + return READSTAT_OK; } static readstat_value_label_t *readstat_add_value_label(readstat_label_set_t *label_set, const char *label) { if (label_set->value_labels_count == label_set->value_labels_capacity) { label_set->value_labels_capacity *= 2; - label_set->value_labels = realloc(label_set->value_labels, + void *tmp = realloc(label_set->value_labels, label_set->value_labels_capacity * sizeof(readstat_value_label_t)); + if (tmp == NULL) return NULL; + label_set->value_labels = tmp; } readstat_value_label_t *new_value_label = &label_set->value_labels[label_set->value_labels_count++]; memset(new_value_label, 0, sizeof(readstat_value_label_t)); - readstat_copy_label(new_value_label, label); + if (readstat_copy_label(new_value_label, label) != READSTAT_OK) + return NULL; return new_value_label; } @@ -140,6 +146,7 @@ static readstat_error_t readstat_begin_writing_data(readstat_writer_t *writer) { } writer->row_len = row_len; writer->row = malloc(writer->row_len); + if (writer->row == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (writer->callbacks.begin_data) { retval = writer->callbacks.begin_data(writer); } @@ -293,8 +300,10 @@ readstat_error_t readstat_write_space_padded_string(readstat_writer_t *writer, c readstat_label_set_t *readstat_add_label_set(readstat_writer_t *writer, readstat_type_t type, const char *name) { if (writer->label_sets_count == writer->label_sets_capacity) { writer->label_sets_capacity *= 2; - writer->label_sets = realloc(writer->label_sets, + void *tmp = realloc(writer->label_sets, writer->label_sets_capacity * sizeof(readstat_label_set_t *)); + if (tmp == NULL) return NULL; + writer->label_sets = tmp; } readstat_label_set_t *new_label_set = calloc(1, sizeof(readstat_label_set_t)); @@ -368,8 +377,10 @@ void readstat_label_tagged_value(readstat_label_set_t *label_set, char tag, cons readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char *name, readstat_type_t type, size_t width) { if (writer->variables_count == writer->variables_capacity) { writer->variables_capacity *= 2; - writer->variables = realloc(writer->variables, + void *tmp = realloc(writer->variables, writer->variables_capacity * sizeof(readstat_variable_t *)); + if (tmp == NULL) return NULL; + writer->variables = tmp; } readstat_variable_t *new_variable = calloc(1, sizeof(readstat_variable_t)); @@ -397,8 +408,10 @@ readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char static void readstat_append_string_ref(readstat_writer_t *writer, readstat_string_ref_t *ref) { if (writer->string_refs_count == writer->string_refs_capacity) { writer->string_refs_capacity *= 2; - writer->string_refs = realloc(writer->string_refs, + void *tmp = realloc(writer->string_refs, writer->string_refs_capacity * sizeof(readstat_string_ref_t *)); + if (tmp == NULL) return; + writer->string_refs = tmp; } writer->string_refs[writer->string_refs_count++] = ref; } @@ -412,8 +425,10 @@ readstat_string_ref_t *readstat_add_string_ref(readstat_writer_t *writer, const void readstat_add_note(readstat_writer_t *writer, const char *note) { if (writer->notes_count == writer->notes_capacity) { writer->notes_capacity *= 2; - writer->notes = realloc(writer->notes, + void *tmp = realloc(writer->notes, writer->notes_capacity * sizeof(const char *)); + if (tmp == NULL) return; + writer->notes = tmp; } char *note_copy = malloc(strlen(note) + 1); strcpy(note_copy, note); @@ -453,8 +468,10 @@ void readstat_variable_set_label_set(readstat_variable_t *variable, readstat_lab if (label_set) { if (label_set->variables_count == label_set->variables_capacity) { label_set->variables_capacity *= 2; - label_set->variables = realloc(label_set->variables, + void *tmp = realloc(label_set->variables, label_set->variables_capacity * sizeof(readstat_variable_t *)); + if (tmp == NULL) return; + label_set->variables = tmp; } ((readstat_variable_t **)label_set->variables)[label_set->variables_count++] = variable; } diff --git a/src/sas/readstat_xport_read.c b/src/sas/readstat_xport_read.c index 0bbb433a..87315f61 100644 --- a/src/sas/readstat_xport_read.c +++ b/src/sas/readstat_xport_read.c @@ -45,7 +45,7 @@ static readstat_error_t xport_update_progress(xport_ctx_t *ctx) { return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx); } -static xport_ctx_t *xport_ctx_init() { +static xport_ctx_t *xport_ctx_init(void) { xport_ctx_t *ctx = calloc(1, sizeof(xport_ctx_t)); return ctx; } @@ -281,6 +281,7 @@ static readstat_error_t xport_read_obs_header_record(xport_ctx_t *ctx) { static readstat_error_t xport_construct_format(char *dst, size_t dst_len, const char *src, size_t src_len, int width, int decimals) { char *format = malloc(4 * src_len + 1); + if (format == NULL) return READSTAT_ERROR_MALLOC; readstat_error_t retval = readstat_convert(format, 4 * src_len + 1, src, src_len, NULL); if (retval != READSTAT_OK) { @@ -332,8 +333,14 @@ static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) goto cleanup; } - name = realloc(name, name_len + 1); - label = realloc(label, label_len + 1); + char *tmp_name = realloc(name, name_len + 1); + if (tmp_name == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + name = tmp_name; + + char *tmp_label = realloc(label, label_len + 1); + if (tmp_label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + label = tmp_label; + readstat_variable_t *variable = ctx->variables[index-1]; if (read_bytes(ctx, name, name_len) != name_len || @@ -402,10 +409,21 @@ static readstat_error_t xport_read_labels_v9(xport_ctx_t *ctx, int label_count) goto cleanup; } - name = realloc(name, name_len + 1); - label = realloc(label, label_len + 1); - format = realloc(format, format_len + 1); - informat = realloc(informat, informat_len + 1); + char *tmp_name = realloc(name, name_len + 1); + if (tmp_name == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + name = tmp_name; + + char *tmp_label = realloc(label, label_len + 1); + if (tmp_label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + label = tmp_label; + + char *tmp_format = realloc(format, format_len + 1); + if (tmp_format == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + format = tmp_format; + + char *tmp_informat = realloc(informat, informat_len + 1); + if (tmp_informat == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + informat = tmp_informat; readstat_variable_t *variable = ctx->variables[index-1]; diff --git a/src/spss/readstat_por_read.c b/src/spss/readstat_por_read.c index 44dc0f48..9a84c86a 100644 --- a/src/spss/readstat_por_read.c +++ b/src/spss/readstat_por_read.c @@ -214,7 +214,9 @@ static readstat_error_t maybe_read_string(por_ctx_t *ctx, char *data, size_t len if (string_length > ctx->string_buffer_len) { ctx->string_buffer_len = string_length; - ctx->string_buffer = realloc(ctx->string_buffer, ctx->string_buffer_len); + unsigned char *new_string_buf = realloc(ctx->string_buffer, ctx->string_buffer_len); + if (new_string_buf == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + ctx->string_buffer = new_string_buf; memset(ctx->string_buffer, 0, ctx->string_buffer_len); } @@ -372,6 +374,10 @@ static readstat_error_t read_missing_value_record(por_ctx_t *ctx) { } varinfo = &ctx->varinfo[ctx->var_offset]; + if (varinfo->n_missing_values >= 3) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } if (varinfo->type == READSTAT_TYPE_DOUBLE) { if ((retval = read_double(ctx, &varinfo->missing_double_values[varinfo->n_missing_values])) != READSTAT_OK) { goto cleanup; @@ -382,10 +388,6 @@ static readstat_error_t read_missing_value_record(por_ctx_t *ctx) { goto cleanup; } } - if (varinfo->n_missing_values > 2) { - retval = READSTAT_ERROR_PARSE; - goto cleanup; - } varinfo->n_missing_values++; cleanup: @@ -519,8 +521,11 @@ static readstat_error_t read_variable_label_record(por_ctx_t *ctx) { goto cleanup; } - varinfo->label = realloc(varinfo->label, 4*strlen(string) + 1); - retval = readstat_convert(varinfo->label, 4*strlen(string) + 1, string, strlen(string), ctx->converter); + size_t label_alloc_len = 4*strlen(string) + 1; + char *new_label = realloc(varinfo->label, label_alloc_len); + if (new_label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } + varinfo->label = new_label; + retval = readstat_convert(varinfo->label, label_alloc_len, string, strlen(string), ctx->converter); cleanup: return retval; @@ -682,8 +687,7 @@ readstat_error_t read_version_and_timestamp(por_ctx_t *ctx) { goto cleanup; } if (sscanf(string, "%02d%02d%02d", ×tamp.tm_hour, ×tamp.tm_min, ×tamp.tm_sec) != 3) { - retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; - goto cleanup; + /* optional */ } timestamp.tm_year -= 1900; diff --git a/src/stata/readstat_dta_read.c b/src/stata/readstat_dta_read.c index ce07c333..b348e6d8 100644 --- a/src/stata/readstat_dta_read.c +++ b/src/stata/readstat_dta_read.c @@ -1084,7 +1084,7 @@ static readstat_error_t dta_handle_value_labels(dta_ctx_t *ctx) { if (txtlen > MAX_VALUE_LABEL_LEN+1) utf8_buffer_len = 4*MAX_VALUE_LABEL_LEN+1; - utf8_buffer = realloc(utf8_buffer, utf8_buffer_len); + utf8_buffer = readstat_realloc(utf8_buffer, utf8_buffer_len); /* Much bigger than we need but whatever */ if (utf8_buffer == NULL) { retval = READSTAT_ERROR_MALLOC;