From 12108c497e673099952dfb14df141cb4bf532929 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Thu, 23 Apr 2026 18:12:42 -0700 Subject: [PATCH] diff: simplify line-range filter by classifying removals immediately The line-range filter buffered '-' (removal) lines in a pending_rm strbuf, deferring their classification until a '+' or ' ' line arrived to reveal the post-image position. This buffering is unnecessary. Removal lines are pre-image content that occupies no post-image space, so they do not advance lno_post. Within a hunk, xdiff emits removals before additions for each change, so a '-' line always arrives while lno_post is at the same position that the following '+' or ' ' line will occupy. Each line can therefore be classified against the tracked ranges as it arrives, without waiting for a non-removal line to confirm the position. The buffering also had a bug: flush_rhunk() unconditionally drained the pending buffer when the range hunk was active, even if lno_post had advanced past the tracked range. This caused deletions immediately after the tracked function to be incorrectly included in patch output. Remove the pending_rm buffer and classify '-' lines using the same in_range check applied to '+' and ' ' lines. This simplifies the filter, removes three struct fields and a helper function, and makes the flush_rhunk() bug impossible by construction. Signed-off-by: Michael Montalbo --- diff.c | 110 ++++++++++++--------------------------- t/t4211-line-log.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 77 deletions(-) diff --git a/diff.c b/diff.c index 397e38b41cc6fa..5662d080be6210 100644 --- a/diff.c +++ b/diff.c @@ -616,11 +616,6 @@ struct emit_callback { * the requested ranges. Contiguous in-range lines are collected into * range hunks and flushed with a synthetic @@ header so that * fn_out_consume() sees well-formed unified-diff fragments. - * - * Removal lines ('-') cannot be classified by post-image position, so - * they are buffered in pending_rm until the next '+' or ' ' line - * reveals whether they precede an in-range line (flush into range hunk) or - * an out-of-range line (discard). */ struct line_range_callback { xdiff_emit_line_fn orig_line_fn; @@ -646,11 +641,6 @@ struct line_range_callback { int rhunk_active; int rhunk_has_changes; /* any '+' or '-' lines? */ - /* Removal lines not yet known to be in-range */ - struct strbuf pending_rm; - int pending_rm_count; - long pending_rm_pre_begin; /* pre-image line of first pending */ - int ret; /* latched error from orig_line_fn */ }; @@ -2539,12 +2529,6 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED return 1; } -static void discard_pending_rm(struct line_range_callback *s) -{ - strbuf_reset(&s->pending_rm); - s->pending_rm_count = 0; -} - static void flush_rhunk(struct line_range_callback *s) { struct strbuf hdr = STRBUF_INIT; @@ -2553,14 +2537,6 @@ static void flush_rhunk(struct line_range_callback *s) if (!s->rhunk_active || s->ret) return; - /* Drain any pending removal lines into the range hunk */ - if (s->pending_rm_count) { - strbuf_addbuf(&s->rhunk, &s->pending_rm); - s->rhunk_old_count += s->pending_rm_count; - s->rhunk_has_changes = 1; - discard_pending_rm(s); - } - /* * Suppress context-only hunks: they contain no actual changes * and would just be noise. This can happen when the inflated @@ -2615,11 +2591,6 @@ static void line_range_hunk_fn(void *data, * When count > 0, begin is 1-based. When count == 0, begin is * adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of * that type will arrive, so the value is unused. - * - * Any pending removal lines from the previous xdiff hunk are - * intentionally left in pending_rm: the line callback will - * flush or discard them when the next content line reveals - * whether the removals precede in-range content. */ s->lno_post = new_begin; s->lno_pre = old_begin; @@ -2635,88 +2606,75 @@ static void line_range_hunk_fn(void *data, static int line_range_line_fn(void *priv, char *line, unsigned long len) { struct line_range_callback *s = priv; - const struct range *cur; - long lno_0, cur_pre; + long lno_0; + int in_range; if (s->ret) return s->ret; - if (line[0] == '-') { - if (!s->pending_rm_count) - s->pending_rm_pre_begin = s->lno_pre; - s->lno_pre++; - strbuf_add(&s->pending_rm, line, len); - s->pending_rm_count++; - return s->ret; - } - if (line[0] == '\\') { - if (s->pending_rm_count) - strbuf_add(&s->pending_rm, line, len); - else if (s->rhunk_active) + if (s->rhunk_active) strbuf_add(&s->rhunk, line, len); - /* otherwise outside tracked range; drop silently */ return s->ret; } - if (line[0] != '+' && line[0] != ' ') + if (line[0] != '+' && line[0] != ' ' && line[0] != '-') BUG("unexpected diff line type '%c'", line[0]); + /* + * Compute post-image position. '+' and ' ' lines advance + * lno_post; '-' lines do not (they occupy no post-image space). + */ lno_0 = s->lno_post - 1; - cur_pre = s->lno_pre; /* save before advancing for context lines */ - s->lno_post++; - if (line[0] == ' ') - s->lno_pre++; + if (line[0] != '-') + s->lno_post++; - /* Advance past ranges we've passed */ + /* + * Advance past any ranges we've moved beyond. Emit the + * accumulated range hunk for the range we're leaving. + */ while (s->cur_range < s->ranges->nr && lno_0 >= s->ranges->ranges[s->cur_range].end) { if (s->rhunk_active) flush_rhunk(s); - discard_pending_rm(s); s->cur_range++; } - /* Past all ranges */ - if (s->cur_range >= s->ranges->nr) { - discard_pending_rm(s); - return s->ret; - } - - cur = &s->ranges->ranges[s->cur_range]; + in_range = s->cur_range < s->ranges->nr && + lno_0 >= s->ranges->ranges[s->cur_range].start && + lno_0 < s->ranges->ranges[s->cur_range].end; - /* Before current range */ - if (lno_0 < cur->start) { - discard_pending_rm(s); + if (!in_range) { + if (line[0] != '+') + s->lno_pre++; return s->ret; } - /* In range so start a new range hunk if needed */ + /* Start a new range hunk if this is the first in-range line */ if (!s->rhunk_active) { s->rhunk_active = 1; s->rhunk_has_changes = 0; s->rhunk_new_begin = lno_0 + 1; - s->rhunk_old_begin = s->pending_rm_count - ? s->pending_rm_pre_begin : cur_pre; + s->rhunk_old_begin = s->lno_pre; s->rhunk_old_count = 0; s->rhunk_new_count = 0; strbuf_reset(&s->rhunk); } - /* Flush pending removals into range hunk */ - if (s->pending_rm_count) { - strbuf_addbuf(&s->rhunk, &s->pending_rm); - s->rhunk_old_count += s->pending_rm_count; - s->rhunk_has_changes = 1; - discard_pending_rm(s); - } - + /* Append line to the range hunk */ strbuf_add(&s->rhunk, line, len); - s->rhunk_new_count++; - if (line[0] == '+') + if (line[0] == '-') { + s->rhunk_old_count++; s->rhunk_has_changes = 1; - else + s->lno_pre++; + } else if (line[0] == '+') { + s->rhunk_new_count++; + s->rhunk_has_changes = 1; + } else { s->rhunk_old_count++; + s->rhunk_new_count++; + s->lno_pre++; + } return s->ret; } @@ -4072,7 +4030,6 @@ static void builtin_diff(const char *name_a, lr_state.orig_cb_data = &ecbdata; lr_state.ranges = line_ranges; strbuf_init(&lr_state.rhunk, 0); - strbuf_init(&lr_state.pending_rm, 0); /* * Inflate ctxlen so that all changes within @@ -4107,7 +4064,6 @@ static void builtin_diff(const char *name_a, die("unable to generate diff for %s", one->path); strbuf_release(&lr_state.rhunk); - strbuf_release(&lr_state.pending_rm); } else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume, &ecbdata, &xpp, &xecfg)) die("unable to generate diff for %s", one->path); diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index aaf197d2edc4d8..2bff0e4c2655c1 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -711,4 +711,128 @@ test_expect_success '-L with -G filters to diff-text matches' ' grep "F2 + 2" actual ' +test_expect_success 'setup for trailing deletion test' ' + git checkout --orphan trailing-del && + git reset --hard && + cat >file.c <<-\EOF && + void tracked() + { + return 1; + } + // trailing comment + EOF + git add file.c && + test_tick && + git commit -m "add file with trailing comment" && + # Modify tracked() AND delete the trailing comment in + # one commit, so the commit touches the tracked range + # and is not filtered out by the revision walker. + cat >file.c <<-\EOF && + void tracked() + { + return 2; + } + EOF + git commit -a -m "modify tracked and delete trailing comment" +' + +test_expect_success '-L does not include deletions past end of tracked range' ' + git log -L:tracked:file.c --format= -1 -p >actual && + # The trailing comment deletion is outside the tracked + # range and should not appear in the patch output. + grep "return 2" actual && + ! grep "trailing comment" actual +' + +test_expect_success '-L includes leading deletions resolved by in-range line' ' + git checkout --orphan leading-del && + git reset --hard && + cat >file.c <<-\EOF && + // leading comment + void tracked() + { + return 1; + } + EOF + git add file.c && + test_tick && + git commit -m "add file with leading comment" && + cat >file.c <<-\EOF && + void tracked() + { + return 2; + } + EOF + git commit -a -m "modify tracked and delete leading comment" && + git log -L:tracked:file.c --format= -1 -p >actual && + # The leading comment deletion is resolved by the next + # non-removal line (void tracked), which is in range. + # Pending removals are attributed to the range they precede. + grep "return 2" actual && + grep "leading comment" actual +' + +test_expect_success 'setup for line-range filter edge cases' ' + git checkout --orphan filter-edge && + git reset --hard && + cat >file.c <<-\EOF && + void before() + { + return 0; + } + + void tracked() + { + int a = 1; + int b = 2; + int c = 3; + return a + b + c; + } + + void after() + { + return 9; + } + EOF + git add file.c && + test_tick && + git commit -m "initial" +' + +test_expect_success '-L change at exact first line of range' ' + git checkout filter-edge && + # Change the function signature (first line of range) + sed "s/void tracked/int tracked/" file.c >tmp && + mv tmp file.c && + git commit -a -m "change first line" && + git log -L:tracked:file.c -p --format=%s -1 >actual && + grep "change first line" actual && + grep "+int tracked" actual && + grep "\\-void tracked" actual +' + +test_expect_success '-L change at exact last line of range' ' + git checkout filter-edge && + git reset --hard HEAD~1 && + # Change the closing brace line (last line of range) + sed "s/^}$/} \/\/ end tracked/" file.c >tmp && + mv tmp file.c && + git commit -a -m "change last line" && + git log -L:tracked:file.c -p --format=%s -1 >actual && + grep "change last line" actual && + grep "end tracked" actual +' + +test_expect_success '-L pure deletion in range (no additions)' ' + git checkout filter-edge && + git reset --hard HEAD~1 && + # Delete a line inside tracked() without adding anything + sed "/int c/d" file.c >tmp && + mv tmp file.c && + git commit -a -m "pure deletion" && + git log -L:tracked:file.c -p --format=%s -1 >actual && + grep "pure deletion" actual && + grep "\\-.*int c" actual +' + test_done