Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions plugins/core/hooks/tests/test-verify-deliverables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ TX_NEGATION=$(write_transcript "negated" \
'{"type":"assistant","message":{"content":[{"type":"text","text":"Investigated src/foo.ts — not all tests pass yet."}]}}'
)

# Space-terminated pattern: "Implemented the fix in src/x.ts" should trigger.
TX_IMPLEMENTED=$(write_transcript "implemented-space" \
'{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Read","input":{"file_path":"src/x.ts"}}]}}' \
'{"type":"assistant","message":{"content":[{"type":"text","text":"Implemented the fix in src/x.ts to resolve the issue."}]}}'
)

# Trailing boundary: "fixed itself" should not match "fixed it".
TX_FIXED_ITSELF=$(write_transcript "fixed-itself" \
'{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Read","input":{"file_path":"src/x.ts"}}]}}' \
Expand Down Expand Up @@ -197,6 +203,7 @@ run_case "alt payload shape (.tool_input.file_path)" "$(make_payload "$TX_ALT_SH
run_case "pure analyst (no claim, no mutations)" "$(make_payload "$TX_ANALYST")" silent
run_case "trivial chatter (claim word but no file token)" "$(make_payload "$TX_TRIVIAL")" silent
run_case "negated 'not all tests pass' (no flag)" "$(make_payload "$TX_NEGATION")" silent
run_case "space-terminated 'implemented' triggers advisory" "$(make_payload "$TX_IMPLEMENTED")" "made no file changes"
run_case "'fixed itself' does not match 'fixed it'" "$(make_payload "$TX_FIXED_ITSELF")" silent
run_case "broken symlink counts as present" "$(make_payload "$TX_BROKEN_SYMLINK")" silent
run_case "long transcript (Write at line 1, msg after 60 Reads)" "$(make_payload "$TX_LONG")" silent
Expand Down
10 changes: 6 additions & 4 deletions plugins/core/hooks/verify-deliverables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ ISSUES=""

if [ -n "$LAST_MSG" ] && [ "$MUTATING_CALLS" = "0" ]; then
# Conservative completion patterns. False positives are noisier than
# false negatives, so we only fire on strong signals. Trailing
# boundary keeps "fixed it" from matching inside "fixed itself"
# and "all tests pass" from matching inside "passenger".
if echo "$LAST_MSG" | grep -qiE '(^|[^a-z])(done\.|complete\.|completed\.|finished\.|implemented |fixed it|created the |wrote the |added the |saved the |all tests pass(ed)?)([^a-zA-Z]|$)'; then
# false negatives, so we only fire on strong signals.
# Two groups: (1) patterns needing a trailing boundary to avoid
# substring matches ("fixed it" vs "fixed itself"), and
# (2) space-terminated patterns where the space IS the boundary.
if echo "$LAST_MSG" | grep -qiE '(^|[^a-z])(done\.|complete\.|completed\.|finished\.|fixed it|all tests pass(ed)?)([^a-zA-Z]|$)' ||
echo "$LAST_MSG" | grep -qiE '(^|[^a-z])(implemented |created the |wrote the |added the |saved the )'; then
# Reject negated phrasing — "not all tests pass" should not trigger.
if ! echo "$LAST_MSG" | grep -qiE "(not|n't|cannot|couldn't|didn't|haven't|hasn't|wasn't)([[:space:]]+(yet|fully|quite|all|even))?[[:space:]]+(done|complete|completed|finished|implemented|fixed|added|wrote|created|saved|all tests pass)"; then
# Require a file-path-shaped token. Pure analysis output
Expand Down