From 6baf24c0639ebc735650a8a218ffd647bde3571d Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Sun, 9 Nov 2025 14:52:16 -0700 Subject: [PATCH 01/19] Added Capture to Main CodeChat --- .gitignore | 2 + dist-workspace.toml | 2 +- extensions/VSCode/src/extension.ts | 253 +++++++++++- server/log4rs.yml | 2 +- server/src/capture.rs | 628 +++++++++++++++++++++-------- server/src/webserver.rs | 89 +++- 6 files changed, 793 insertions(+), 183 deletions(-) diff --git a/.gitignore b/.gitignore index 1d695e01..e0eb371a 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ # # dist build output target/ +capture_config.json +server/capture_config.json diff --git a/dist-workspace.toml b/dist-workspace.toml index 0d75623d..9a5d4fca 100644 --- a/dist-workspace.toml +++ b/dist-workspace.toml @@ -25,7 +25,7 @@ members = ["cargo:server/"] # Config for 'dist' [dist] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.30.0" +cargo-dist-version = "0.30.2" # CI backends to support ci = "github" # The installers to generate for each app diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index cc6eaa76..a9ff0350 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -105,6 +105,135 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; initServer(ext.extensionPath); } +// Types for talking to the Rust /capture endpoint. +// This mirrors `CaptureEventWire` in webserver.rs. +interface CaptureEventPayload { + user_id: string; + assignment_id?: string; + group_id?: string; + file_path?: string; + event_type: string; + data: any; // sent as JSON +} + +// TODO: replace these with something real (e.g., VS Code settings) +// For now, we hard-code to prove that the pipeline works end-to-end. +const CAPTURE_USER_ID = "test-user"; +const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; +const CAPTURE_GROUP_ID = "demo-group"; + +// Base URL for the CodeChat server's /capture endpoint. +// NOTE: keep this in sync with whatever port your server actually uses. +const CAPTURE_SERVER_BASE = "http://127.0.0.1:8080"; + +// Simple classification of what the user is currently doing. +type ActivityKind = "doc" | "code" | "other"; + +// Language IDs that we treat as "documentation" for the dissertation metrics. +// You can refine this later if you want. +const DOC_LANG_IDS = new Set([ + "markdown", + "plaintext", + "latex", + "restructuredtext", +]); + +// Track the last activity kind and when a reflective-writing (doc) session started. +let lastActivityKind: ActivityKind = "other"; +let docSessionStart: number | null = null; + +// Heuristic: classify a document as documentation vs. code vs. other. +function classifyDocument( + doc: vscode.TextDocument | undefined, +): ActivityKind { + if (!doc) { + return "other"; + } + if (DOC_LANG_IDS.has(doc.languageId)) { + return "doc"; + } + // Everything else we treat as code for now. + return "code"; +} + +// Update activity state, emit switch + doc_session events as needed. +function noteActivity(kind: ActivityKind, filePath?: string) { + const now = Date.now(); + + // Handle entering / leaving a "doc" session. + if (kind === "doc") { + if (docSessionStart === null) { + // Starting a new reflective-writing session. + docSessionStart = now; + void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_start", filePath, { + mode: "doc", + }); + } + } else { + if (docSessionStart !== null) { + // Ending a reflective-writing session. + const durationMs = now - docSessionStart; + docSessionStart = null; + void sendCaptureEvent(CAPTURE_SERVER_BASE, "doc_session", filePath, { + duration_ms: durationMs, + duration_seconds: durationMs / 1000.0, + }); + void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", filePath, { + mode: "doc", + }); + } + } + + // If we switched between doc and code, log a switch_pane event. + const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; + if ( + docOrCode(lastActivityKind) && + docOrCode(kind) && + kind !== lastActivityKind + ) { + void sendCaptureEvent(CAPTURE_SERVER_BASE, "switch_pane", filePath, { + from: lastActivityKind, + to: kind, + }); + } + + lastActivityKind = kind; +} + +// Helper to send a capture event to the Rust server. +async function sendCaptureEvent( + serverBaseUrl: string, // e.g. "http://127.0.0.1:8080" + eventType: string, + filePath?: string, + data: any = {} +): Promise { + const payload: CaptureEventPayload = { + user_id: CAPTURE_USER_ID, + assignment_id: CAPTURE_ASSIGNMENT_ID, + group_id: CAPTURE_GROUP_ID, + file_path: filePath, + event_type: eventType, + data, + }; + + try { + const resp = await fetch(`${serverBaseUrl}/capture`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + }); + + if (!resp.ok) { + console.error("Capture event failed:", resp.status, await resp.text()); + } + } catch (err) { + console.error("Error sending capture event:", err); + } +} + + // Activation/deactivation // ----------------------- // @@ -121,6 +250,18 @@ export const activate = (context: vscode.ExtensionContext) => { async () => { console_log("CodeChat Editor extension: starting."); + // CAPTURE: mark the start of an editor session. + const active = vscode.window.activeTextEditor; + const startFilePath = active?.document.fileName; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "session_start", + startFilePath, + { + mode: "vscode_extension", + }, + ); + if (!subscribed) { subscribed = true; @@ -142,6 +283,31 @@ export const activate = (context: vscode.ExtensionContext) => { event.reason }, ${format_struct(event.contentChanges)}.`, ); + + // CAPTURE: classify this as documentation vs. code and log a write_* event. + const doc = event.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + const charsTyped = event.contentChanges + .map((c) => c.text.length) + .reduce((a, b) => a + b, 0); + + if (kind === "doc") { + void sendCaptureEvent(CAPTURE_SERVER_BASE, "write_doc", filePath, { + chars_typed: charsTyped, + languageId: doc.languageId, + }); + } else if (kind === "code") { + void sendCaptureEvent(CAPTURE_SERVER_BASE, "write_code", filePath, { + chars_typed: charsTyped, + languageId: doc.languageId, + }); + } + + // Update our notion of current activity + doc session. + noteActivity(kind, filePath); + + // Existing behavior: trigger CodeChat render. send_update(true); }), ); @@ -158,22 +324,92 @@ export const activate = (context: vscode.ExtensionContext) => { ignore_active_editor_change = false; return; } + + // CAPTURE: update activity + possible switch_pane/doc_session. + const doc = event.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + noteActivity(kind, filePath); + send_update(false); }), ); context.subscriptions.push( vscode.window.onDidChangeTextEditorSelection( - (_event) => { + (event) => { if (ignore_selection_change) { ignore_selection_change = false; return; } + + // CAPTURE: treat a selection change as "activity" in this document. + const doc = event.textEditor.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + noteActivity(kind, filePath); + send_update(false); }, ), ); - } + // Capture event: listen for file saves (dissertation instrumentation) + context.subscriptions.push( + vscode.workspace.onDidSaveTextDocument((doc) => { + // This is the first full end-to-end capture test. + // When a document is saved, send an event to the Rust server. + + void sendCaptureEvent( + "http://127.0.0.1:8080", // <-- update if your server uses a different port + "save", + doc.fileName, + { + reason: "manual_save", + languageId: doc.languageId, + lineCount: doc.lineCount, + }, + ); + }), + ); + + // Capture: start of a debug/run session. + context.subscriptions.push( + vscode.debug.onDidStartDebugSession((session) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "run", + filePath, + { + sessionName: session.name, + sessionType: session.type, + }, + ); + }), + ); + + // Capture: compile/build events via VS Code tasks. + context.subscriptions.push( + vscode.tasks.onDidStartTaskProcess((e) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const task = e.execution.task; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "compile", + filePath, + { + taskName: task.name, + taskSource: task.source, + definition: task.definition, + processId: e.processId, + }, + ); + }), + ); + + } // if subscribed // Get the CodeChat Client's location from the VSCode // configuration. @@ -529,6 +765,19 @@ export const activate = (context: vscode.ExtensionContext) => { // On deactivation, close everything down. export const deactivate = async () => { console_log("CodeChat Editor extension: deactivating."); + + // CAPTURE: mark the end of an editor session. + const active = vscode.window.activeTextEditor; + const endFilePath = active?.document.fileName; + await sendCaptureEvent( + CAPTURE_SERVER_BASE, + "session_end", + endFilePath, + { + mode: "vscode_extension", + }, + ); + await stop_client(); webview_panel?.dispose(); console_log("CodeChat Editor extension: deactivated."); diff --git a/server/log4rs.yml b/server/log4rs.yml index ff3752ed..0a36bfed 100644 --- a/server/log4rs.yml +++ b/server/log4rs.yml @@ -35,7 +35,7 @@ appenders: pattern: "{d} {l} {t} {L} - {m}{n}" root: - level: info + level: debug appenders: - console_appender - file_appender \ No newline at end of file diff --git a/server/src/capture.rs b/server/src/capture.rs index 3f8f7c15..174a4cbb 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -13,227 +13,503 @@ // You should have received a copy of the GNU General Public License along with // the CodeChat Editor. If not, see // [http://www.gnu.org/licenses](http://www.gnu.org/licenses). -/// # `Capture.rs` -- Capture CodeChat Editor Events -// ## Submodules -// -// ## Imports -// -// Standard library -use indoc::indoc; -use std::fs; + +/// `capture.rs` -- Capture CodeChat Editor Events +/// ============================================== +/// +/// This module provides an asynchronous event capture facility backed by a +/// PostgreSQL database. It is designed to support the dissertation study by +/// recording process-level data such as: +/// +/// * Frequency and timing of writing entries +/// * Edits to documentation and code +/// * Switches between documentation and coding activity +/// * Duration of engagement with reflective writing +/// * Save, compile, and run events +/// +/// Events are sent from the client (browser and/or VS Code extension) to the +/// server as JSON. The server enqueues events into an asynchronous worker which +/// performs batched inserts into the `events` table. +/// +/// Database schema +/// --------------- +/// +/// The following SQL statement creates the `events` table used by this module: +/// +/// ```sql +/// CREATE TABLE events ( +/// id SERIAL PRIMARY KEY, +/// user_id TEXT NOT NULL, +/// assignment_id TEXT, +/// group_id TEXT, +/// file_path TEXT, +/// event_type TEXT NOT NULL, +/// timestamp TEXT NOT NULL, +/// data TEXT +/// ); +/// ``` +/// +/// * `user_id` – participant identifier (student id, pseudonym, etc.). +/// * `assignment_id` – logical assignment / lab identifier. +/// * `group_id` – optional grouping (treatment / comparison, section). +/// * `file_path` – logical path of the file being edited. +/// * `event_type` – coarse event type (see `event_type` constants below). +/// * `timestamp` – RFC3339 timestamp (in UTC). +/// * `data` – JSON payload with event-specific details. + use std::io; -use std::path::Path; -use std::sync::Arc; -// Third-party -use chrono::Local; -use log::{error, info}; +use chrono::{DateTime, Utc}; +use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; -use tokio::sync::Mutex; +use tokio::sync::mpsc; use tokio_postgres::{Client, NoTls}; -// Local - -/* ## The Event Structure: - - The `Event` struct represents an event to be stored in the database. - - Fields: - `user_id`: The ID of the user associated with the event. - - `event_type`: The type of event (e.g., "keystroke", "file_open"). - `data`: - Optional additional data associated with the event. +/// Canonical event type strings. Keep these stable for analysis. +pub mod event_types { + pub const WRITE_DOC: &str = "write_doc"; + pub const WRITE_CODE: &str = "write_code"; + pub const SWITCH_PANE: &str = "switch_pane"; + pub const DOC_SESSION: &str = "doc_session"; // duration of reflective writing + pub const SAVE: &str = "save"; + pub const COMPILE: &str = "compile"; + pub const RUN: &str = "run"; + pub const SESSION_START: &str = "session_start"; + pub const SESSION_END: &str = "session_end"; +} - ### Example +/// Configuration used to construct the PostgreSQL connection string. +/// +/// You can populate this from a JSON file or environment variables in +/// `main.rs`; this module stays agnostic. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CaptureConfig { + pub host: String, + pub user: String, + pub password: String, + pub dbname: String, + /// Optional: application-level identifier for this deployment (e.g., course + /// code or semester). Not stored in the DB directly; callers can embed this + /// in `data` if desired. + #[serde(default)] + pub app_id: Option, +} - let event = Event { user_id: "user123".to_string(), event_type: - "keystroke".to_string(), data: Some("Pressed key A".to_string()), }; -*/ +impl CaptureConfig { + /// Build a libpq-style connection string. + pub fn to_conn_str(&self) -> String { + format!( + "host={} user={} password={} dbname={}", + self.host, self.user, self.password, self.dbname + ) + } +} -#[derive(Deserialize, Debug)] -pub struct Event { +/// The in-memory representation of a single capture event. +#[derive(Debug, Clone)] +pub struct CaptureEvent { pub user_id: String, + pub assignment_id: Option, + pub group_id: Option, + pub file_path: Option, pub event_type: String, - pub data: Option, + /// When the event occurred, in UTC. + pub timestamp: DateTime, + /// Event-specific payload, stored as JSON text in the DB. + pub data: serde_json::Value, } -/* - ## The Config Structure: - - The `Config` struct represents the database connection parameters read from - `config.json`. - - Fields: - `db_host`: The hostname or IP address of the database server. - - `db_user`: The username for the database connection. - `db_password`: The - password for the database connection. - `db_name`: The name of the database. - - let config = Config { db_host: "localhost".to_string(), db_user: - "your_db_user".to_string(), db_password: "your_db_password".to_string(), - db_name: "your_db_name".to_string(), }; -*/ +impl CaptureEvent { + /// Convenience constructor when the caller already has a timestamp. + pub fn new( + user_id: String, + assignment_id: Option, + group_id: Option, + file_path: Option, + event_type: impl Into, + timestamp: DateTime, + data: serde_json::Value, + ) -> Self { + Self { + user_id, + assignment_id, + group_id, + file_path, + event_type: event_type.into(), + timestamp, + data, + } + } -#[derive(Deserialize, Serialize, Debug)] -pub struct Config { - pub db_ip: String, - pub db_user: String, - pub db_password: String, - pub db_name: String, + /// Convenience constructor which uses the current time. + pub fn now( + user_id: String, + assignment_id: Option, + group_id: Option, + file_path: Option, + event_type: impl Into, + data: serde_json::Value, + ) -> Self { + Self::new( + user_id, + assignment_id, + group_id, + file_path, + event_type, + Utc::now(), + data, + ) + } } -/* - - ## The EventCapture Structure: - - The `EventCapture` struct provides methods to interact with the database. It -holds a `tokio_postgres::Client` for database operations. - -### Usage Example - -#\[tokio::main\] async fn main() -> Result<(), Box> { - -``` - // Create an instance of EventCapture using the configuration file - let event_capture = EventCapture::new("config.json").await?; - - // Create an event - let event = Event { - user_id: "user123".to_string(), - event_type: "keystroke".to_string(), - data: Some("Pressed key A".to_string()), - }; - - // Insert the event into the database - event_capture.insert_event(event).await?; - - Ok(()) -``` -} */ +/// Internal worker message. Identical to `CaptureEvent`, but separated in case +/// we later want to add batching / flush control signals. +type WorkerMsg = CaptureEvent; +/// Handle used by the rest of the server to record events. +/// +/// Cloning this handle is cheap: it only clones an `mpsc::UnboundedSender`. +#[derive(Clone)] pub struct EventCapture { - db_client: Arc>, + tx: mpsc::UnboundedSender, } -/* - ## The EventCapture Implementation -*/ - impl EventCapture { - /* - Creates a new `EventCapture` instance by reading the database connection parameters from the `config.json` file and connecting to the PostgreSQL database. - # Arguments - - config_path: The file path to the config.json file. - - # Returns - - A `Result` containing an `EventCapture` instance - */ - - pub async fn new>(config_path: P) -> Result { - // Read the configuration file - let config_content = fs::read_to_string(config_path).map_err(io::Error::other)?; - let config: Config = serde_json::from_str(&config_content) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - // Build the connection string for the PostgreSQL database - let conn_str = format!( - "host={} user={} password={} dbname={}", - config.db_ip, config.db_user, config.db_password, config.db_name - ); - + /// Create a new `EventCapture` instance and spawn a background worker which + /// consumes events and inserts them into PostgreSQL. + /// + /// This function is synchronous so it can be called from non-async server + /// setup code. It spawns an async task internally which performs the + /// database connection and event processing. + pub fn new(config: CaptureConfig) -> Result { + let conn_str = config.to_conn_str(); + + // High-level DB connection details (no password). info!( - "Attempting Capture Database Connection. IP:[{}] Username:[{}] Database Name:[{}]", - config.db_ip, config.db_user, config.db_name + "Capture: preparing PostgreSQL connection (host={}, dbname={}, user={}, app_id={:?})", + config.host, config.dbname, config.user, config.app_id ); + debug!("Capture: raw PostgreSQL connection string: {}", conn_str); - // Connect to the database asynchronously - let (client, connection) = tokio_postgres::connect(&conn_str, NoTls) - .await - .map_err(|e| io::Error::new(io::ErrorKind::ConnectionRefused, e))?; + let (tx, mut rx) = mpsc::unbounded_channel::(); - // Spawn a task to manage the database connection in the background + // Spawn a background task that will connect to PostgreSQL and then + // process events. This task runs on the Tokio/Actix runtime once the + // system starts, so the caller does not need to be async. tokio::spawn(async move { - if let Err(e) = connection.await { - error!("Database connection error: [{e}]"); + info!("Capture: attempting to connect to PostgreSQL…"); + + match tokio_postgres::connect(&conn_str, NoTls).await { + Ok((client, connection)) => { + info!("Capture: successfully connected to PostgreSQL."); + + // Drive the connection in its own task. + tokio::spawn(async move { + if let Err(err) = connection.await { + error!("Capture PostgreSQL connection error: {err}"); + } + }); + + // Main event loop: pull events off the channel and insert + // them into the database. + while let Some(event) = rx.recv().await { + debug!( + "Capture: inserting event: type={}, user_id={}, assignment_id={:?}, group_id={:?}, file_path={:?}", + event.event_type, + event.user_id, + event.assignment_id, + event.group_id, + event.file_path + ); + + if let Err(err) = insert_event(&client, &event).await { + error!( + "Capture: FAILED to insert event (type={}, user_id={}): {err}", + event.event_type, event.user_id + ); + } else { + debug!("Capture: event insert successful."); + } + } + + info!("Capture: event channel closed; background worker exiting."); + } + Err(err) => { + // NOTE: we *don't* pass `err` twice here; `{err}` in the format + // string already grabs the local `err` binding. + error!( + "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={}): {err}", + config.host, + config.dbname, + config.user, + ); + // Drain and drop any events so we don't hold the sender. + warn!("Capture: draining pending events after failed DB connection."); + while rx.recv().await.is_some() {} + warn!("Capture: all pending events dropped due to connection failure."); + } } }); - info!( - "Connected to Database [{}] as User [{}]", - config.db_name, config.db_user + Ok(Self { tx }) + } + + /// Enqueue an event for insertion. This is non-blocking. + pub fn log(&self, event: CaptureEvent) { + debug!( + "Capture: queueing event: type={}, user_id={}, assignment_id={:?}, group_id={:?}, file_path={:?}", + event.event_type, + event.user_id, + event.assignment_id, + event.group_id, + event.file_path ); - Ok(EventCapture { - db_client: Arc::new(Mutex::new(client)), - }) + if let Err(err) = self.tx.send(event) { + error!("Capture: FAILED to enqueue capture event: {err}"); + } } +} - /* - Inserts an event into the database. +/// Insert a single event into the `events` table. +async fn insert_event(client: &Client, event: &CaptureEvent) -> Result { + let timestamp = event.timestamp.to_rfc3339(); + let data_text = event.data.to_string(); + + debug!( + "Capture: executing INSERT for user_id={}, event_type={}, timestamp={}", + event.user_id, event.event_type, timestamp + ); + + client + .execute( + "INSERT INTO events \ + (user_id, assignment_id, group_id, file_path, event_type, timestamp, data) \ + VALUES ($1, $2, $3, $4, $5, $6, $7)", + &[ + &event.user_id, + &event.assignment_id, + &event.group_id, + &event.file_path, + &event.event_type, + ×tamp, + &data_text, + ], + ) + .await +} - # Arguments - - `event`: An `Event` instance containing the event data to insert. +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn capture_config_to_conn_str_is_well_formed() { + let cfg = CaptureConfig { + host: "localhost".to_string(), + user: "alice".to_string(), + password: "secret".to_string(), + dbname: "codechat_capture".to_string(), + app_id: Some("spring25-study".to_string()), + }; + + let conn = cfg.to_conn_str(); + // Very simple checks: we don't care about ordering beyond what we format. + assert!(conn.contains("host=localhost")); + assert!(conn.contains("user=alice")); + assert!(conn.contains("password=secret")); + assert!(conn.contains("dbname=codechat_capture")); + } - # Returns - A `Result` indicating success or containing a `tokio_postgres::Error`. + #[test] + fn capture_event_new_sets_all_fields() { + let ts = Utc::now(); + + let ev = CaptureEvent::new( + "user123".to_string(), + Some("lab1".to_string()), + Some("groupA".to_string()), + Some("/path/to/file.rs".to_string()), + "write_doc", + ts, + json!({ "chars_typed": 42 }), + ); - # Example - #[tokio::main] - async fn main() -> Result<(), Box> { - let event_capture = EventCapture::new("config.json").await?; + assert_eq!(ev.user_id, "user123"); + assert_eq!(ev.assignment_id.as_deref(), Some("lab1")); + assert_eq!(ev.group_id.as_deref(), Some("groupA")); + assert_eq!(ev.file_path.as_deref(), Some("/path/to/file.rs")); + assert_eq!(ev.event_type, "write_doc"); + assert_eq!(ev.timestamp, ts); + assert_eq!(ev.data, json!({ "chars_typed": 42 })); + } - let event = Event { - user_id: "user123".to_string(), - event_type: "keystroke".to_string(), - data: Some("Pressed key A".to_string()), - }; + #[test] + fn capture_event_now_uses_current_time_and_fields() { + let before = Utc::now(); + let ev = CaptureEvent::now( + "user123".to_string(), + None, + None, + None, + "save", + json!({ "reason": "manual" }), + ); + let after = Utc::now(); + + assert_eq!(ev.user_id, "user123"); + assert!(ev.assignment_id.is_none()); + assert!(ev.group_id.is_none()); + assert!(ev.file_path.is_none()); + assert_eq!(ev.event_type, "save"); + assert_eq!(ev.data, json!({ "reason": "manual" })); + + // Timestamp sanity check: it should be between before and after + assert!(ev.timestamp >= before); + assert!(ev.timestamp <= after); + } - event_capture.insert_event(event).await?; - Ok(()) - } - */ + #[test] + fn capture_config_json_round_trip() { + let json_text = r#" + { + "host": "db.example.com", + "user": "bob", + "password": "hunter2", + "dbname": "cc_events", + "app_id": "fall25" + } + "#; + + let cfg: CaptureConfig = serde_json::from_str(json_text).expect("JSON should parse"); + assert_eq!(cfg.host, "db.example.com"); + assert_eq!(cfg.user, "bob"); + assert_eq!(cfg.password, "hunter2"); + assert_eq!(cfg.dbname, "cc_events"); + assert_eq!(cfg.app_id.as_deref(), Some("fall25")); + + // And it should serialize back to JSON without error + let _back = serde_json::to_string(&cfg).expect("Should serialize"); + } - pub async fn insert_event(&self, event: Event) -> Result<(), io::Error> { - let current_time = Local::now(); - let formatted_time = current_time.to_rfc3339(); + use std::fs; + use tokio::time::{sleep, Duration}; + + /// Integration-style test: verify that EventCapture actually inserts into the DB. + /// + /// Reads connection parameters from `capture_config.json` in the current working directory. + /// Logs the config and connection details via log4rs so you can confirm what is used. + /// + /// Run this test with: + /// cargo test event_capture_inserts_event_into_db -- --ignored --nocapture + /// + /// You must have a PostgreSQL database and a `capture_config.json` file such as: + /// { + /// "host": "localhost", + /// "user": "codechat_test_user", + /// "password": "codechat_test_password", + /// "dbname": "codechat_capture_test", + /// "app_id": "integration-test" + /// } + #[tokio::test] + #[ignore] + async fn event_capture_inserts_event_into_db() -> Result<(), Box> { + // Initialize logging for this test, using the same log4rs.yml as the server. + // If logging is already initialized, this will just return an error which we ignore. + let _ = log4rs::init_file("log4rs.yml", Default::default()); + + // 1. Load the capture configuration from file. + let cfg_text = fs::read_to_string("capture_config.json") + .expect("capture_config.json must exist in project root for this test"); + let cfg: CaptureConfig = + serde_json::from_str(&cfg_text).expect("capture_config.json must be valid JSON"); + + log::info!( + "TEST: Loaded DB config from capture_config.json: host={}, user={}, dbname={}, app_id={:?}", + cfg.host, + cfg.user, + cfg.dbname, + cfg.app_id + ); - // SQL statement to insert the event into the 'events' table - let stmt = indoc! {" - INSERT INTO events (user_id, event_type, timestamp, data) - VALUES ($1, $2, $3, $4) - "}; + // 2. Connect directly for setup + verification. + let conn_str = cfg.to_conn_str(); + log::info!("TEST: Attempting direct tokio_postgres connection for verification."); - // Acquire a lock on the database client for thread-safe access - let client = self.db_client.lock().await; + let (client, connection) = tokio_postgres::connect(&conn_str, NoTls).await?; + tokio::spawn(async move { + if let Err(e) = connection.await { + log::error!("TEST: direct connection error: {e}"); + } + }); - // Execute the SQL statement with the event data + // 3. Ensure the `events` table exists and is empty. client - .execute( - stmt, - &[ - &event.user_id, - &event.event_type, - &formatted_time, - &event.data, - ], + .batch_execute( + "CREATE TABLE IF NOT EXISTS events ( + id SERIAL PRIMARY KEY, + user_id TEXT NOT NULL, + assignment_id TEXT, + group_id TEXT, + file_path TEXT, + event_type TEXT NOT NULL, + timestamp TEXT NOT NULL, + data TEXT + ); + TRUNCATE TABLE events;", ) - .await - .map_err(io::Error::other)?; + .await?; + log::info!("TEST: events table ensured and truncated."); + + // 4. Start the EventCapture worker using the loaded config. + let capture = EventCapture::new(cfg.clone())?; + log::info!("TEST: EventCapture worker started."); + + // 5. Log a test event. + let expected_data = json!({ "chars_typed": 123 }); + let event = CaptureEvent::now( + "test-user".to_string(), + Some("hw1".to_string()), + Some("groupA".to_string()), + Some("/tmp/test.rs".to_string()), + event_types::WRITE_DOC, + expected_data.clone(), + ); + + log::info!("TEST: logging a test capture event."); + capture.log(event); - info!("Event inserted into database: {event:?}"); + // 6. Give the background worker time to insert the event. + sleep(Duration::from_millis(300)).await; + // 7. Verify the inserted record. + let row = client + .query_one( + "SELECT user_id, assignment_id, group_id, file_path, event_type, data + FROM events + ORDER BY id DESC + LIMIT 1", + &[], + ) + .await?; + + let user_id: String = row.get(0); + let assignment_id: Option = row.get(1); + let group_id: Option = row.get(2); + let file_path: Option = row.get(3); + let event_type: String = row.get(4); + let data_text: String = row.get(5); + let data_value: serde_json::Value = serde_json::from_str(&data_text)?; + + assert_eq!(user_id, "test-user"); + assert_eq!(assignment_id.as_deref(), Some("hw1")); + assert_eq!(group_id.as_deref(), Some("groupA")); + assert_eq!(file_path.as_deref(), Some("/tmp/test.rs")); + assert_eq!(event_type, event_types::WRITE_DOC); + assert_eq!(data_value, expected_data); + + log::info!("✅ TEST: EventCapture integration test succeeded and wrote to database."); Ok(()) } } - -/* Database Schema (SQL DDL) - -The following SQL statement creates the `events` table used by this library: - -CREATE TABLE events ( id SERIAL PRIMARY KEY, user_id TEXT NOT NULL, -event_type TEXT NOT NULL, timestamp TEXT NOT NULL, data TEXT ); - -- **`id SERIAL PRIMARY KEY`**: Auto-incrementing primary key. -- **`user_id TEXT NOT NULL`**: The ID of the user associated with the event. -- **`event_type TEXT NOT NULL`**: The type of event. -- **`timestamp TEXT NOT NULL`**: The timestamp of the event. -- **`data TEXT`**: Optional additional data associated with the event. - **Note:** Ensure this table exists in your PostgreSQL database before using - the library. */ diff --git a/server/src/webserver.rs b/server/src/webserver.rs index c178db35..3fbb8242 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -36,15 +36,17 @@ use std::{ // ### Third-party use actix_files; + use actix_web::{ App, HttpRequest, HttpResponse, HttpServer, dev::{Server, ServerHandle, ServiceFactory, ServiceRequest}, error::Error, - get, + get, post, http::header::{ContentType, DispositionType}, middleware, web::{self, Data}, }; + use actix_web_httpauth::{extractors::basic::BasicAuth, middleware::HttpAuthentication}; use actix_ws::AggregatedMessage; use bytes::Bytes; @@ -86,6 +88,10 @@ use crate::processing::{ CodeChatForWeb, TranslationResultsString, find_path_to_toc, source_to_codechat_for_web_string, }; +use crate::capture::{EventCapture, CaptureConfig, CaptureEvent}; + +use chrono::Utc; + // Data structures // --------------- // @@ -302,6 +308,8 @@ pub struct AppState { pub connection_id: Arc>>, /// The auth credentials if authentication is used. credentials: Option, + // Added to support capture - JDS - 11/2025 + pub capture: Option, } pub type WebAppState = web::Data; @@ -312,6 +320,20 @@ pub struct Credentials { pub password: String, } +/// JSON payload received from clients for capture events. +/// +/// The server will supply the timestamp; clients do not need to send it. +#[derive(Debug, Deserialize)] +pub struct CaptureEventWire { + pub user_id: String, + pub assignment_id: Option, + pub group_id: Option, + pub file_path: Option, + pub event_type: String, + /// Arbitrary event-specific data stored as JSON. + pub data: serde_json::Value, +} + // Macros // ------ /// Create a macro to report an error when enqueueing an item. @@ -495,6 +517,31 @@ async fn stop(app_state: WebAppState) -> HttpResponse { HttpResponse::NoContent().finish() } +#[post("/capture")] +async fn capture_endpoint( + app_state: WebAppState, + payload: web::Json, +) -> HttpResponse { + let wire = payload.into_inner(); + + if let Some(capture) = &app_state.capture { + let event = CaptureEvent { + user_id: wire.user_id, + assignment_id: wire.assignment_id, + group_id: wire.group_id, + file_path: wire.file_path, + event_type: wire.event_type, + // Server decides when the event is recorded. + timestamp: Utc::now(), + data: wire.data, + }; + + capture.log(event); + } + + HttpResponse::Ok().finish() +} + // Get the `mode` query parameter to determine `is_test_mode`; default to // `false`. pub fn get_test_mode(req: &HttpRequest) -> bool { @@ -1326,8 +1373,6 @@ pub fn setup_server( addr: &SocketAddr, credentials: Option, ) -> std::io::Result<(Server, Data)> { - // Connect to the Capture Database - //let _event_capture = EventCapture::new("config.json").await?; // Pre-load the bundled files before starting the webserver. let _ = &*BUNDLED_FILES_MAP; @@ -1411,6 +1456,42 @@ pub fn configure_logger(level: LevelFilter) -> Result<(), Box) -> WebAppState { + // Initialize event capture from a config file (optional). + let capture: Option = { + // Build path: /capture_config.json + let mut config_path = ROOT_PATH.lock().unwrap().clone(); + config_path.push("capture_config.json"); + + match fs::read_to_string(&config_path) { + Ok(json) => { + match serde_json::from_str::(&json) { + Ok(cfg) => match EventCapture::new(cfg) { + Ok(ec) => { + eprintln!("Capture: enabled (config file: {config_path:?})"); + Some(ec) + } + Err(err) => { + eprintln!("Capture: failed to initialize from {config_path:?}: {err}"); + None + } + }, + Err(err) => { + eprintln!( + "Capture: invalid JSON in {config_path:?}: {err}" + ); + None + } + } + } + Err(err) => { + eprintln!( + "Capture: disabled (config file not found or unreadable: {config_path:?}: {err})" + ); + None + } + } + }; + web::Data::new(AppState { server_handle: Mutex::new(None), filewatcher_next_connection_id: Mutex::new(0), @@ -1421,6 +1502,7 @@ pub fn make_app_data(credentials: Option) -> WebAppState { client_queues: Arc::new(Mutex::new(HashMap::new())), connection_id: Arc::new(Mutex::new(HashSet::new())), credentials, + capture, }) } @@ -1450,6 +1532,7 @@ where .service(vscode_client_framework) .service(ping) .service(stop) + .service(capture_endpoint) // Reroute to the filewatcher filesystem for typical user-requested // URLs. .route("/", web::get().to(filewatcher_root_fs_redirect)) From d3f6b543870c896d88d74eba9ff4e6b7bd7c3d6a Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 12 Dec 2025 10:05:03 -0700 Subject: [PATCH 02/19] Merged capture code with latest extension --- extensions/VSCode/src/extension.ts | 571 +++++++++++++++++------------ 1 file changed, 340 insertions(+), 231 deletions(-) diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index 1e2bc9bb..c09dde45 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -3,7 +3,8 @@ // This file is part of the CodeChat Editor. The CodeChat Editor is free // software: you can redistribute it and/or modify it under the terms of the GNU // General Public License as published by the Free Software Foundation, either -// version 3 of the License, or (at your option) any later version. +// version 3 of the License, or (at your option) any later version of the GNU +// General Public License. // // The CodeChat Editor is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or @@ -110,6 +111,136 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; initServer(ext.extensionPath); } +// ----------------------------------------------------------------------------- +// CAPTURE (Dissertation instrumentation) +// ----------------------------------------------------------------------------- + +// Types for talking to the Rust /capture endpoint. +// This mirrors `CaptureEventWire` in webserver.rs. +interface CaptureEventPayload { + user_id: string; + assignment_id?: string; + group_id?: string; + file_path?: string; + event_type: string; + data: any; // sent as JSON +} + +// TODO: replace these with something real (e.g., VS Code settings) +// For now, we hard-code to prove that the pipeline works end-to-end. +const CAPTURE_USER_ID = "test-user"; +const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; +const CAPTURE_GROUP_ID = "demo-group"; + +// Base URL for the CodeChat server's /capture endpoint. +// NOTE: keep this in sync with whatever port your server actually uses. +const CAPTURE_SERVER_BASE = "http://127.0.0.1:8080"; + +// Simple classification of what the user is currently doing. +type ActivityKind = "doc" | "code" | "other"; + +// Language IDs that we treat as "documentation" for the dissertation metrics. +// You can refine this later if you want. +const DOC_LANG_IDS = new Set([ + "markdown", + "plaintext", + "latex", + "restructuredtext", +]); + +// Track the last activity kind and when a reflective-writing (doc) session started. +let lastActivityKind: ActivityKind = "other"; +let docSessionStart: number | null = null; + +// Heuristic: classify a document as documentation vs. code vs. other. +function classifyDocument(doc: vscode.TextDocument | undefined): ActivityKind { + if (!doc) { + return "other"; + } + if (DOC_LANG_IDS.has(doc.languageId)) { + return "doc"; + } + // Everything else we treat as code for now. + return "code"; +} + +// Helper to send a capture event to the Rust server. +async function sendCaptureEvent( + serverBaseUrl: string, // e.g. "http://127.0.0.1:8080" + eventType: string, + filePath?: string, + data: any = {}, +): Promise { + const payload: CaptureEventPayload = { + user_id: CAPTURE_USER_ID, + assignment_id: CAPTURE_ASSIGNMENT_ID, + group_id: CAPTURE_GROUP_ID, + file_path: filePath, + event_type: eventType, + data, + }; + + try { + const resp = await fetch(`${serverBaseUrl}/capture`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + }); + + if (!resp.ok) { + console.error( + "Capture event failed:", + resp.status, + await resp.text(), + ); + } + } catch (err) { + console.error("Error sending capture event:", err); + } +} + +// Update activity state, emit switch + doc_session events as needed. +function noteActivity(kind: ActivityKind, filePath?: string) { + const now = Date.now(); + + // Handle entering / leaving a "doc" session. + if (kind === "doc") { + if (docSessionStart === null) { + // Starting a new reflective-writing session. + docSessionStart = now; + void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_start", filePath, { + mode: "doc", + }); + } + } else { + if (docSessionStart !== null) { + // Ending a reflective-writing session. + const durationMs = now - docSessionStart; + docSessionStart = null; + void sendCaptureEvent(CAPTURE_SERVER_BASE, "doc_session", filePath, { + duration_ms: durationMs, + duration_seconds: durationMs / 1000.0, + }); + void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", filePath, { + mode: "doc", + }); + } + } + + // If we switched between doc and code, log a switch_pane event. + const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; + if (docOrCode(lastActivityKind) && docOrCode(kind) && kind !== lastActivityKind) { + void sendCaptureEvent(CAPTURE_SERVER_BASE, "switch_pane", filePath, { + from: lastActivityKind, + to: kind, + }); + } + + lastActivityKind = kind; +} + // Activation/deactivation // ----------------------------------------------------------------------------- // @@ -126,6 +257,18 @@ export const activate = (context: vscode.ExtensionContext) => { async () => { console_log("CodeChat Editor extension: starting."); + // CAPTURE: mark the start of an editor session. + const active = vscode.window.activeTextEditor; + const startFilePath = active?.document.fileName; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "session_start", + startFilePath, + { + mode: "vscode_extension", + }, + ); + if (!subscribed) { subscribed = true; @@ -147,6 +290,40 @@ export const activate = (context: vscode.ExtensionContext) => { event.reason }, ${format_struct(event.contentChanges)}.`, ); + + // CAPTURE: classify this as documentation vs. code and log a write_* event. + const doc = event.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + const charsTyped = event.contentChanges + .map((c) => c.text.length) + .reduce((a, b) => a + b, 0); + + if (kind === "doc") { + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "write_doc", + filePath, + { + chars_typed: charsTyped, + languageId: doc.languageId, + }, + ); + } else if (kind === "code") { + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "write_code", + filePath, + { + chars_typed: charsTyped, + languageId: doc.languageId, + }, + ); + } + + // Update our notion of current activity + doc session. + noteActivity(kind, filePath); + send_update(true); }), ); @@ -163,48 +340,109 @@ export const activate = (context: vscode.ExtensionContext) => { ignore_active_editor_change = false; return; } + // Skip an update if we've already sent a `CurrentFile` for this editor. - if ( - current_editor === - vscode.window.activeTextEditor - ) { + if (current_editor === vscode.window.activeTextEditor) { return; } + + // CAPTURE: update activity + possible switch_pane/doc_session. + const doc = event.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + noteActivity(kind, filePath); + send_update(true); }), ); context.subscriptions.push( - vscode.window.onDidChangeTextEditorSelection( - (_event) => { - if (ignore_selection_change) { - ignore_selection_change = false; - return; - } - console_log( - "CodeChat Editor extension: sending updated cursor/scroll position.", - ); - send_update(false); - }, - ), + vscode.window.onDidChangeTextEditorSelection((event) => { + if (ignore_selection_change) { + ignore_selection_change = false; + return; + } + + console_log( + "CodeChat Editor extension: sending updated cursor/scroll position.", + ); + + // CAPTURE: treat a selection change as "activity" in this document. + const doc = event.textEditor.document; + const kind = classifyDocument(doc); + const filePath = doc.fileName; + noteActivity(kind, filePath); + + send_update(false); + }), + ); + + // CAPTURE: listen for file saves. + context.subscriptions.push( + vscode.workspace.onDidSaveTextDocument((doc) => { + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "save", + doc.fileName, + { + reason: "manual_save", + languageId: doc.languageId, + lineCount: doc.lineCount, + }, + ); + }), + ); + + // CAPTURE: start of a debug/run session. + context.subscriptions.push( + vscode.debug.onDidStartDebugSession((session) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "run", + filePath, + { + sessionName: session.name, + sessionType: session.type, + }, + ); + }), + ); + + // CAPTURE: compile/build events via VS Code tasks. + context.subscriptions.push( + vscode.tasks.onDidStartTaskProcess((e) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const task = e.execution.task; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "compile", + filePath, + { + taskName: task.name, + taskSource: task.source, + definition: task.definition, + processId: e.processId, + }, + ); + }), ); } - // Get the CodeChat Client's location from the VSCode - // configuration. + // Get the CodeChat Client's location from the VSCode configuration. const codechat_client_location_str = vscode.workspace .getConfiguration("CodeChatEditor.Server") .get("ClientLocation"); assert(typeof codechat_client_location_str === "string"); switch (codechat_client_location_str) { case "html": - codechat_client_location = - CodeChatEditorClientLocation.html; + codechat_client_location = CodeChatEditorClientLocation.html; break; case "browser": - codechat_client_location = - CodeChatEditorClientLocation.browser; + codechat_client_location = CodeChatEditorClientLocation.browser; break; default: @@ -213,45 +451,24 @@ export const activate = (context: vscode.ExtensionContext) => { // Create or reveal the webview panel; if this is an external // browser, we'll open it after the client is created. - if ( - codechat_client_location === - CodeChatEditorClientLocation.html - ) { + if (codechat_client_location === CodeChatEditorClientLocation.html) { if (webview_panel !== undefined) { - // As below, don't take the focus when revealing. webview_panel.reveal(undefined, true); } else { - // Create a webview panel. webview_panel = vscode.window.createWebviewPanel( "CodeChat Editor", "CodeChat Editor", { - // Without this, the focus becomes this webview; - // setting this allows the code window open - // before this command was executed to retain - // the focus and be immediately rendered. preserveFocus: true, - // Put this in the a column beside the current - // column. viewColumn: vscode.ViewColumn.Beside, }, - // See - // [WebViewOptions](https://code.visualstudio.com/api/references/vscode-api#WebviewOptions). { enableScripts: true, - // Without this, the websocket connection is - // dropped when the panel is hidden. retainContextWhenHidden: true, }, ); webview_panel.onDidDispose(async () => { - // Shut down the render client when the webview - // panel closes. - console_log( - "CodeChat Editor extension: shut down webview.", - ); - // Closing the webview abruptly closes the Client, - // which produces an error. Don't report it. + console_log("CodeChat Editor extension: shut down webview."); quiet_next_error = true; webview_panel = undefined; await stop_client(); @@ -259,13 +476,9 @@ export const activate = (context: vscode.ExtensionContext) => { } } - // Provide a simple status display while the CodeChat Editor - // Server is starting up. + // Provide a simple status display while the server is starting up. if (webview_panel !== undefined) { - // If we have an ID, then the GUI is already running; don't - // replace it. - webview_panel.webview.html = - "

CodeChat Editor

Loading...

"; + webview_panel.webview.html = "

CodeChat Editor

Loading...

"; } else { vscode.window.showInformationMessage( "The CodeChat Editor is loading in an external browser...", @@ -277,19 +490,13 @@ export const activate = (context: vscode.ExtensionContext) => { codeChatEditorServer = new CodeChatEditorServer(); const hosted_in_ide = - codechat_client_location === - CodeChatEditorClientLocation.html; + codechat_client_location === CodeChatEditorClientLocation.html; console_log( `CodeChat Editor extension: sending message Opened(${hosted_in_ide}).`, ); await codeChatEditorServer.sendMessageOpened(hosted_in_ide); - // For the external browser, we can immediately send the - // `CurrentFile` message. For the WebView, we must first wait to - // receive the HTML for the WebView (the `ClientHtml` message). - if ( - codechat_client_location === - CodeChatEditorClientLocation.browser - ) { + + if (codechat_client_location === CodeChatEditorClientLocation.browser) { send_update(false); } @@ -299,10 +506,8 @@ export const activate = (context: vscode.ExtensionContext) => { console_log("CodeChat Editor extension: queue closed."); break; } - // Parse the data into a message. - const { id, message } = JSON.parse( - message_raw, - ) as EditorMessage; + + const { id, message } = JSON.parse(message_raw) as EditorMessage; console_log( `CodeChat Editor extension: Received data id = ${id}, message = ${format_struct( message, @@ -318,11 +523,9 @@ export const activate = (context: vscode.ExtensionContext) => { const key = keys[0]; const value = Object.values(message)[0]; - // Process this message. switch (key) { case "Update": { - const current_update = - value as UpdateMessageContents; + const current_update = value as UpdateMessageContents; const doc = get_document(current_update.file_path); if (doc === undefined) { await sendResult(id, { @@ -332,38 +535,25 @@ export const activate = (context: vscode.ExtensionContext) => { } if (current_update.contents !== undefined) { const source = current_update.contents.source; - // This will produce a change event, which we'll - // ignore. The change may also produce a - // selection change, which should also be - // ignored. + ignore_text_document_change = true; ignore_selection_change = true; - // Use a workspace edit, since calls to - // `TextEditor.edit` must be made to the active - // editor only. + const wse = new vscode.WorkspaceEdit(); - // Is this plain text, or a diff? + if ("Plain" in source) { wse.replace( doc.uri, doc.validateRange( - new vscode.Range( - 0, - 0, - doc.lineCount, - 0, - ), + new vscode.Range(0, 0, doc.lineCount, 0), ), source.Plain.doc, ); } else { assert("Diff" in source); - // If this diff was not made against the - // text we currently have, reject it. + if (source.Diff.version !== version) { await sendResult(id, "OutOfSync"); - // Send an `Update` with the full text to - // re-sync the Client. console_log( "CodeChat Editor extension: sending update because Client is out of sync.", ); @@ -372,26 +562,12 @@ export const activate = (context: vscode.ExtensionContext) => { } const diffs = source.Diff.doc; for (const diff of diffs) { - // Convert from character offsets from the - // beginning of the document to a - // `Position` (line, then offset on that - // line) needed by VSCode. const from = doc.positionAt(diff.from); if (diff.to === undefined) { - // This is an insert. - wse.insert( - doc.uri, - from, - diff.insert, - ); + wse.insert(doc.uri, from, diff.insert); } else { - // This is a replace or delete. const to = doc.positionAt(diff.to); - wse.replace( - doc.uri, - new Range(from, to), - diff.insert, - ); + wse.replace(doc.uri, new Range(from, to), diff.insert); } } } @@ -399,30 +575,17 @@ export const activate = (context: vscode.ExtensionContext) => { ignore_text_document_change = false; ignore_selection_change = false; - // Now that we've updated our text, update the - // associated version as well. version = current_update.contents.version; } - // Update the cursor and scroll position if - // provided. const editor = get_text_editor(doc); + let scroll_line = current_update.scroll_position; if (scroll_line !== undefined && editor) { ignore_selection_change = true; - const scroll_position = new vscode.Position( - // The VSCode line is zero-based; the - // CodeMirror line is one-based. - scroll_line - 1, - 0, - ); + const scroll_position = new vscode.Position(scroll_line - 1, 0); editor.revealRange( - new vscode.Range( - scroll_position, - scroll_position, - ), - // This is still not the top of the - // viewport, but a bit below it. + new vscode.Range(scroll_position, scroll_position), TextEditorRevealType.AtTop, ); } @@ -430,17 +593,9 @@ export const activate = (context: vscode.ExtensionContext) => { let cursor_line = current_update.cursor_position; if (cursor_line !== undefined && editor) { ignore_selection_change = true; - const cursor_position = new vscode.Position( - // The VSCode line is zero-based; the - // CodeMirror line is one-based. - cursor_line - 1, - 0, - ); + const cursor_position = new vscode.Position(cursor_line - 1, 0); editor.selections = [ - new vscode.Selection( - cursor_position, - cursor_position, - ), + new vscode.Selection(cursor_position, cursor_position), ]; } await sendResult(id); @@ -453,55 +608,33 @@ export const activate = (context: vscode.ExtensionContext) => { if (is_text) { let document; try { - document = - await vscode.workspace.openTextDocument( - current_file, - ); + document = await vscode.workspace.openTextDocument(current_file); } catch (e) { await sendResult(id, { - OpenFileFailed: [ - current_file, - (e as Error).toString(), - ], + OpenFileFailed: [current_file, (e as Error).toString()], }); continue; } ignore_active_editor_change = true; - current_editor = - await vscode.window.showTextDocument( - document, - current_editor?.viewColumn, - ); + current_editor = await vscode.window.showTextDocument( + document, + current_editor?.viewColumn, + ); ignore_active_editor_change = false; await sendResult(id); } else { - // TODO: open using a custom document editor. - // See - // [openCustomDocument](https://code.visualstudio.com/api/references/vscode-api#CustomEditorProvider.openCustomDocument), - // which can evidently be called - // [indirectly](https://stackoverflow.com/a/65101181/4374935). - // See also - // [Built-in Commands](https://code.visualstudio.com/api/references/commands). - // For now, simply respond with an OK, since the - // following doesn't work. if (false) { commands .executeCommand( "vscode.open", vscode.Uri.file(current_file), - { - viewColumn: - current_editor?.viewColumn, - }, + { viewColumn: current_editor?.viewColumn }, ) .then( async () => await sendResult(id), async (reason) => await sendResult(id, { - OpenFileFailed: [ - current_file, - reason, - ], + OpenFileFailed: [current_file, reason], }), ); } @@ -511,7 +644,6 @@ export const activate = (context: vscode.ExtensionContext) => { } case "Result": { - // Report if this was an error. const result_contents = value as MessageResult; if ("Err" in result_contents) { show_error( @@ -523,23 +655,15 @@ export const activate = (context: vscode.ExtensionContext) => { case "LoadFile": { const load_file = value as string; - // Look through all open documents to see if we have - // the requested file. const doc = get_document(load_file); const load_file_result: null | [string, number] = - doc === undefined - ? null - : [ - doc.getText(), - (version = Math.random()), - ]; + doc === undefined ? null : [doc.getText(), (version = rand())]; console_log( - `CodeChat Editor extension: Result(LoadFile(${format_struct(load_file_result)}))`, - ); - await codeChatEditorServer.sendResultLoadfile( - id, - load_file_result, + `CodeChat Editor extension: Result(LoadFile(${format_struct( + load_file_result, + )}))`, ); + await codeChatEditorServer.sendResultLoadfile(id, load_file_result); break; } @@ -548,17 +672,13 @@ export const activate = (context: vscode.ExtensionContext) => { assert(webview_panel !== undefined); webview_panel.webview.html = client_html; await sendResult(id); - // Now that the Client is loaded, send the editor's - // current file to the server. send_update(false); break; } default: console.error( - `Unhandled message ${key}(${format_struct( - value, - )}`, + `Unhandled message ${key}(${format_struct(value)}`, ); break; } @@ -571,6 +691,33 @@ export const activate = (context: vscode.ExtensionContext) => { // On deactivation, close everything down. export const deactivate = async () => { console_log("CodeChat Editor extension: deactivating."); + + // CAPTURE: if we were in a doc session, close it out so duration is recorded. + if (docSessionStart !== null) { + const now = Date.now(); + const durationMs = now - docSessionStart; + docSessionStart = null; + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + + await sendCaptureEvent(CAPTURE_SERVER_BASE, "doc_session", filePath, { + duration_ms: durationMs, + duration_seconds: durationMs / 1000.0, + closed_by: "extension_deactivate", + }); + await sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", filePath, { + mode: "doc", + closed_by: "extension_deactivate", + }); + } + + // CAPTURE: mark the end of an editor session. + const active = vscode.window.activeTextEditor; + const endFilePath = active?.document.fileName; + await sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", endFilePath, { + mode: "vscode_extension", + }); + await stop_client(); webview_panel?.dispose(); console_log("CodeChat Editor extension: deactivated."); @@ -592,7 +739,9 @@ const format_struct = (complex_data_structure: any): string => const sendResult = async (id: number, result?: ResultErrTypes) => { assert(codeChatEditorServer); console_log( - `CodeChat Editor extension: sending Result(id = ${id}, ${format_struct(result)}).`, + `CodeChat Editor extension: sending Result(id = ${id}, ${format_struct( + result, + )}).`, ); try { await codeChatEditorServer.sendResult( @@ -610,56 +759,41 @@ const sendResult = async (id: number, result?: ResultErrTypes) => { const send_update = (this_is_dirty: boolean) => { is_dirty ||= this_is_dirty; if (can_render()) { - // Render after some inactivity: cancel any existing timer, then ... if (idle_timer !== undefined) { clearTimeout(idle_timer); } - // ... schedule a render after an autosave timeout. idle_timer = setTimeout(async () => { if (can_render()) { const ate = vscode.window.activeTextEditor; if (ate !== undefined && ate !== current_editor) { - // Send a new current file after a short delay; this allows - // the user to rapidly cycle through several editors without - // needing to reload the Client with each cycle. current_editor = ate; - const current_file = ate!.document.fileName; + const current_file = ate.document.fileName; console_log( `CodeChat Editor extension: sending CurrentFile(${current_file}}).`, ); try { - await codeChatEditorServer!.sendMessageCurrentFile( - current_file, - ); + await codeChatEditorServer!.sendMessageCurrentFile(current_file); } catch (e) { show_error(`Error sending CurrentFile message: ${e}.`); } - // Since we just requested a new file, the contents are - // clean by definition. is_dirty = false; - // Don't send an updated cursor position until this file is - // loaded. return; } - // The - // [Position](https://code.visualstudio.com/api/references/vscode-api#Position) - // encodes the line as a zero-based value. In contrast, - // CodeMirror - // [Text.line](https://codemirror.net/docs/ref/#state.Text.line) - // is 1-based. - const cursor_position = - current_editor!.selection.active.line + 1; + const cursor_position = current_editor!.selection.active.line + 1; const scroll_position = current_editor!.visibleRanges[0].start.line + 1; const file_path = current_editor!.document.fileName; - // Send contents only if necessary. + const option_contents: null | [string, number] = is_dirty ? [current_editor!.document.getText(), (version = rand())] : null; is_dirty = false; + console_log( - `CodeChat Editor extension: sending Update(${file_path}, ${cursor_position}, ${scroll_position}, ${format_struct(option_contents)})`, + `CodeChat Editor extension: sending Update(${file_path}, ${cursor_position}, ${scroll_position}, ${format_struct( + option_contents, + )})`, ); await codeChatEditorServer!.sendMessageUpdatePlain( file_path, @@ -672,8 +806,7 @@ const send_update = (this_is_dirty: boolean) => { } }; -// Gracefully shut down the render client if possible. Shut down the client as -// well. +// Gracefully shut down the render client if possible. Shut down the client as well. const stop_client = async () => { console_log("CodeChat Editor extension: stopping client."); if (codeChatEditorServer !== undefined) { @@ -682,8 +815,6 @@ const stop_client = async () => { codeChatEditorServer = undefined; } - // Shut the timer down after the client is undefined, to ensure it can't be - // started again by a call to `start_render()`. if (idle_timer !== undefined) { clearTimeout(idle_timer); idle_timer = undefined; @@ -700,10 +831,7 @@ const show_error = (message: string) => { } console.error(`CodeChat Editor extension: ${message}`); if (webview_panel !== undefined) { - // If the panel was displaying other content, reset it for errors. - if ( - !webview_panel.webview.html.startsWith("

CodeChat Editor

") - ) { + if (!webview_panel.webview.html.startsWith("

CodeChat Editor

")) { webview_panel.webview.html = "

CodeChat Editor

"; } webview_panel.webview.html += `

${escape( @@ -716,42 +844,23 @@ const show_error = (message: string) => { } }; -// Only render if the window and editor are active, we have a valid render -// client, and the webview is visible. +// Only render if the window and editor are active, we have a valid render client, +// and the webview is visible. const can_render = () => { return ( (vscode.window.activeTextEditor !== undefined || current_editor !== undefined) && codeChatEditorServer !== undefined && - // TODO: I don't think these matter -- the Server is in charge of - // sending output to the Client. (codechat_client_location === CodeChatEditorClientLocation.browser || webview_panel !== undefined) ); }; const get_document = (file_path: string) => { - // Look through all open documents to see if we have the requested file. for (const doc of vscode.workspace.textDocuments) { - // Make the possibly incorrect assumption that only Windows filesystems - // are case-insensitive; I don't know how to easily determine the - // case-sensitivity of the current filesystem without extra probing code - // (write a file in mixed case, try to open it in another mixed case.) - // Per - // [How to Work with Different Filesystems](https://nodejs.org/en/learn/manipulating-files/working-with-different-filesystems#filesystem-behavior), - // "Be wary of inferring filesystem behavior from `process.platform`. - // For example, do not assume that because your program is running on - // Darwin that you are therefore working on a case-insensitive - // filesystem (HFS+), as the user may be using a case-sensitive - // filesystem (HFSX)." - // - // The same article - // [recommends](https://nodejs.org/en/learn/manipulating-files/working-with-different-filesystems#be-prepared-for-slight-differences-in-comparison-functions) - // using `toUpperCase` for case-insensitive filename comparisons. if ( (!is_windows && doc.fileName === file_path) || - (is_windows && - doc.fileName.toUpperCase() === file_path.toUpperCase()) + (is_windows && doc.fileName.toUpperCase() === file_path.toUpperCase()) ) { return doc; } From 7f316c49d1cfb67e5c4a81348ad05f5b6aa35c50 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Sun, 14 Dec 2025 12:26:43 -0700 Subject: [PATCH 03/19] Modified capture test --- server/src/capture.rs | 244 +++++++++++++++++++++++++++++------------- 1 file changed, 171 insertions(+), 73 deletions(-) diff --git a/server/src/capture.rs b/server/src/capture.rs index 174a4cbb..5096ffda 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -15,24 +15,24 @@ // [http://www.gnu.org/licenses](http://www.gnu.org/licenses). /// `capture.rs` -- Capture CodeChat Editor Events -/// ============================================== +/// ============================================================================ /// /// This module provides an asynchronous event capture facility backed by a /// PostgreSQL database. It is designed to support the dissertation study by /// recording process-level data such as: /// -/// * Frequency and timing of writing entries -/// * Edits to documentation and code -/// * Switches between documentation and coding activity -/// * Duration of engagement with reflective writing -/// * Save, compile, and run events +/// * Frequency and timing of writing entries +/// * Edits to documentation and code +/// * Switches between documentation and coding activity +/// * Duration of engagement with reflective writing +/// * Save, compile, and run events /// /// Events are sent from the client (browser and/or VS Code extension) to the /// server as JSON. The server enqueues events into an asynchronous worker which /// performs batched inserts into the `events` table. /// /// Database schema -/// --------------- +/// ---------------------------------------------------------------------------- /// /// The following SQL statement creates the `events` table used by this module: /// @@ -49,13 +49,13 @@ /// ); /// ``` /// -/// * `user_id` – participant identifier (student id, pseudonym, etc.). -/// * `assignment_id` – logical assignment / lab identifier. -/// * `group_id` – optional grouping (treatment / comparison, section). -/// * `file_path` – logical path of the file being edited. -/// * `event_type` – coarse event type (see `event_type` constants below). -/// * `timestamp` – RFC3339 timestamp (in UTC). -/// * `data` – JSON payload with event-specific details. +/// * `user_id` – participant identifier (student id, pseudonym, etc.). +/// * `assignment_id` – logical assignment / lab identifier. +/// * `group_id` – optional grouping (treatment / comparison, section). +/// * `file_path` – logical path of the file being edited. +/// * `event_type` – coarse event type (see `event_type` constants below). +/// * `timestamp` – RFC3339 timestamp (in UTC). +/// * `data` – JSON payload with event-specific details. use std::io; @@ -64,6 +64,7 @@ use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio_postgres::{Client, NoTls}; +use std::error::Error; /// Canonical event type strings. Keep these stable for analysis. pub mod event_types { @@ -234,20 +235,30 @@ impl EventCapture { info!("Capture: event channel closed; background worker exiting."); } - Err(err) => { - // NOTE: we *don't* pass `err` twice here; `{err}` in the format - // string already grabs the local `err` binding. - error!( - "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={}): {err}", - config.host, - config.dbname, - config.user, - ); - // Drain and drop any events so we don't hold the sender. - warn!("Capture: draining pending events after failed DB connection."); - while rx.recv().await.is_some() {} - warn!("Capture: all pending events dropped due to connection failure."); - } + +Err(err) => { + let ctx = format!( + "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={})", + config.host, config.dbname, config.user + ); + + log_pg_connect_error(&ctx, &err); + + // Drain and drop any events so we don't hold the sender. + warn!("Capture: draining pending events after failed DB connection."); + while rx.recv().await.is_some() {} + warn!("Capture: all pending events dropped due to connection failure."); +} + + // Err(err) => { // NOTE: we *don't* pass `err` twice here; + // `{err}` in the format // string already grabs the local `err` + // binding. error!( "Capture: FAILED to connect to PostgreSQL + // (host={}, dbname={}, user={}): {err}", config.host, + // config.dbname, config.user, ); // Drain and drop any events + // so we don't hold the sender. warn!("Capture: draining pending + // events after failed DB connection."); while + // rx.recv().await.is\_some() {} warn!("Capture: all pending + // events dropped due to connection failure."); } } }); @@ -271,6 +282,47 @@ impl EventCapture { } } +fn log_pg_connect_error(context: &str, err: &tokio_postgres::Error) { + // If Postgres returned a structured DbError, log it ONCE and bail. + if let Some(db) = err.as_db_error() { + // Example: 28P01 = invalid\_password + error!( + "{context}: PostgreSQL {} (SQLSTATE {})", + db.message(), + db.code().code() + ); + + if let Some(detail) = db.detail() { + error!("{context}: detail: {detail}"); + } + if let Some(hint) = db.hint() { + error!("{context}: hint: {hint}"); + } + return; + } + + // Otherwise, try to find an underlying std::io::Error (refused, timed out, + // DNS, etc.) + let mut current: &(dyn Error + 'static) = err; + while let Some(source) = current.source() { + if let Some(ioe) = source.downcast_ref::() { + error!( + "{context}: I/O error kind={:?} raw_os_error={:?} msg={}", + ioe.kind(), + ioe.raw_os_error(), + ioe + ); + return; + } + current = source; + } + + // Fallback: log once (Display) + error!("{context}: {err}"); +} + + + /// Insert a single event into the `events` table. async fn insert_event(client: &Client, event: &CaptureEvent) -> Result { let timestamp = event.timestamp.to_rfc3339(); @@ -315,7 +367,8 @@ mod tests { }; let conn = cfg.to_conn_str(); - // Very simple checks: we don't care about ordering beyond what we format. + // Very simple checks: we don't care about ordering beyond what we + // format. assert!(conn.contains("host=localhost")); assert!(conn.contains("user=alice")); assert!(conn.contains("password=secret")); @@ -394,29 +447,29 @@ mod tests { } use std::fs; - use tokio::time::{sleep, Duration}; + //use tokio::time::{sleep, Duration}; - /// Integration-style test: verify that EventCapture actually inserts into the DB. + /// Integration-style test: verify that EventCapture actually inserts into + /// the DB. /// - /// Reads connection parameters from `capture_config.json` in the current working directory. - /// Logs the config and connection details via log4rs so you can confirm what is used. + /// Reads connection parameters from `capture_config.json` in the current + /// working directory. Logs the config and connection details via log4rs so + /// you can confirm what is used. /// - /// Run this test with: - /// cargo test event_capture_inserts_event_into_db -- --ignored --nocapture + /// Run this test with: cargo test event\_capture\_inserts\_event\_into\_db + /// -- --ignored --nocapture /// - /// You must have a PostgreSQL database and a `capture_config.json` file such as: - /// { - /// "host": "localhost", - /// "user": "codechat_test_user", - /// "password": "codechat_test_password", - /// "dbname": "codechat_capture_test", - /// "app_id": "integration-test" - /// } + /// You must have a PostgreSQL database and a `capture_config.json` file + /// such as: { "host": "localhost", "user": "codechat\_test\_user", + /// "password": "codechat\_test\_password", "dbname": + /// "codechat\_capture\_test", "app\_id": "integration-test" } #[tokio::test] #[ignore] async fn event_capture_inserts_event_into_db() -> Result<(), Box> { - // Initialize logging for this test, using the same log4rs.yml as the server. - // If logging is already initialized, this will just return an error which we ignore. + + // Initialize logging for this test, using the same log4rs.yml as the + // server. If logging is already initialized, this will just return an + // error which we ignore. let _ = log4rs::init_file("log4rs.yml", Default::default()); // 1. Load the capture configuration from file. @@ -444,23 +497,52 @@ mod tests { } }); - // 3. Ensure the `events` table exists and is empty. - client - .batch_execute( - "CREATE TABLE IF NOT EXISTS events ( - id SERIAL PRIMARY KEY, - user_id TEXT NOT NULL, - assignment_id TEXT, - group_id TEXT, - file_path TEXT, - event_type TEXT NOT NULL, - timestamp TEXT NOT NULL, - data TEXT - ); - TRUNCATE TABLE events;", + // Verify the events table already exists + let row = client + .query_one( + r#" + SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = 'public' + AND table_name = 'events' + ) AS exists + "#, + &[], ) .await?; - log::info!("TEST: events table ensured and truncated."); + + let exists: bool = row.get("exists"); + assert!( + exists, + "TEST SETUP ERROR: public.events table does not exist. \ + It must be created by a migration or admin step." + ); + + // Insert a single test row (this is what the app really needs) + let test_user_id = format!( + "TEST_USER_{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() + ); + + let insert_row = client + .query_one( + r#" + INSERT INTO public.events + (user_id, assignment_id, group_id, file_path, event_type, timestamp, data) + VALUES + ($1, NULL, NULL, NULL, 'test_event', $2, '{"test":true}') + RETURNING id + "#, + &[&test_user_id, &format!("{:?}", std::time::SystemTime::now())], + ) + .await?; + + let inserted_id: i32 = insert_row.get("id"); + info!("TEST: inserted event id={}", inserted_id); // 4. Start the EventCapture worker using the loaded config. let capture = EventCapture::new(cfg.clone())?; @@ -480,19 +562,35 @@ mod tests { log::info!("TEST: logging a test capture event."); capture.log(event); - // 6. Give the background worker time to insert the event. - sleep(Duration::from_millis(300)).await; - - // 7. Verify the inserted record. - let row = client - .query_one( - "SELECT user_id, assignment_id, group_id, file_path, event_type, data - FROM events - ORDER BY id DESC - LIMIT 1", - &[], - ) - .await?; + // 6. Wait (deterministically) for the background worker to insert the event, + // then fetch THAT row (instead of "latest row in the table"). + use tokio::time::{sleep, Duration, Instant}; + + let deadline = Instant::now() + Duration::from_secs(2); + + let row = loop { + match client + .query_one( + r#" + SELECT user_id, assignment_id, group_id, file_path, event_type, data + FROM events + WHERE user_id = $1 AND event_type = $2 + ORDER BY id DESC + LIMIT 1 + "#, + &[&"test-user", &event_types::WRITE_DOC], + ) + .await + { + Ok(row) => break row, // found it + Err(_) => { + if Instant::now() >= deadline { + return Err("Timed out waiting for EventCapture insert".into()); + } + sleep(Duration::from_millis(50)).await; + } + } + }; let user_id: String = row.get(0); let assignment_id: Option = row.get(1); From a70d0d7c0d84a7e4bd0ada7db28de35f01a73c40 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 19 Dec 2025 13:11:45 -0700 Subject: [PATCH 04/19] Ongoing Development --- extensions/VSCode/src/extension.ts | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index c09dde45..c791e6b8 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -52,6 +52,7 @@ import { MAX_MESSAGE_LENGTH, } from "../../../client/src/debug_enabled.mjs"; import { ResultErrTypes } from "../../../client/src/rust-types/ResultErrTypes"; +import * as os from "os"; // Globals // ----------------------------------------------------------------------------- @@ -128,7 +129,24 @@ interface CaptureEventPayload { // TODO: replace these with something real (e.g., VS Code settings) // For now, we hard-code to prove that the pipeline works end-to-end. -const CAPTURE_USER_ID = "test-user"; +const CAPTURE_USER_ID: string = (() => { + try { + const u = os.userInfo().username; + if (u && u.trim().length > 0) { + return u.trim(); + } + } catch (_) { + // fall through + } + + // Fallbacks (should rarely be needed) + return ( + process.env["USERNAME"] || + process.env["USER"] || + "unknown-user" + ); +})(); + const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; const CAPTURE_GROUP_ID = "demo-group"; @@ -879,3 +897,16 @@ const console_log = (...args: any) => { console.log(...args); } }; + +function getCurrentUsername(): string { + try { + // Most reliable on Windows/macOS/Linux + const u = os.userInfo().username; + if (u && u.trim().length > 0) return u.trim(); + } catch (_) {} + + // Fallbacks + const envUser = process.env["USERNAME"] || process.env["USER"]; + return (envUser && envUser.trim().length > 0) ? envUser.trim() : "unknown-user"; +} + From 3e02ceec3928c042868578c7cb053969634f5fbf Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 13 Feb 2026 10:26:34 -0700 Subject: [PATCH 05/19] Capture Changes --- extensions/VSCode/src/extension.ts | 116 ++++++++++++++++++++++++++++- server/src/capture.rs | 2 + server/src/webserver.rs | 56 ++++++++++---- 3 files changed, 156 insertions(+), 18 deletions(-) diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index cd110e11..6b34aedb 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -55,6 +55,8 @@ import { import { ResultErrTypes } from "../../../client/src/rust-types/ResultErrTypes.js"; import * as os from "os"; +import * as crypto from "crypto"; + // Globals // ----------------------------------------------------------------------------- enum CodeChatEditorClientLocation { @@ -62,6 +64,9 @@ enum CodeChatEditorClientLocation { browser, } +// Create a unique session ID for logging +const CAPTURE_SESSION_ID = crypto.randomUUID(); + // True on Windows, false on OS X / Linux. const is_windows = process.platform === "win32"; @@ -117,6 +122,59 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; // CAPTURE (Dissertation instrumentation) // ----------------------------------------------------------------------------- +function isInMarkdownCodeFence(doc: vscode.TextDocument, line: number): boolean { + // Very simple fence tracker: toggles when encountering ``` or ~~~ at start of line. + // Good enough for dissertation instrumentation; refine later if needed. + let inFence = false; + for (let i = 0; i <= line; i++) { + const t = doc.lineAt(i).text.trim(); + if (t.startsWith("```") || t.startsWith("~~~")) { + inFence = !inFence; + } + } + return inFence; +} + +function isInRstCodeBlock(doc: vscode.TextDocument, line: number): boolean { + // Heuristic: find the most recent ".. code-block::" (or "::") and see if we're in its indented region. + // This won’t be perfect, but it’s far better than file-level classification. + let blockLine = -1; + for (let i = line; i >= 0; i--) { + const t = doc.lineAt(i).text; + const tt = t.trim(); + if (tt.startsWith(".. code-block::") || tt === "::") { + blockLine = i; + break; + } + // If we hit a non-indented line after searching upward too far, keep going; rst blocks can be separated by blank lines. + } + if (blockLine < 0) return false; + + // RST code block content usually begins after optional blank line(s), indented. + // Determine whether current line is indented relative to block directive line. + const cur = doc.lineAt(line).text; + if (cur.trim().length === 0) return false; + + // If it's indented at least one space/tab, treat it as inside block. + return /^\s+/.test(cur); +} + +function classifyAtPosition(doc: vscode.TextDocument, pos: vscode.Position): ActivityKind { + if (DOC_LANG_IDS.has(doc.languageId)) { + if (doc.languageId === "markdown") { + return isInMarkdownCodeFence(doc, pos.line) ? "code" : "doc"; + } + if (doc.languageId === "restructuredtext") { + return isInRstCodeBlock(doc, pos.line) ? "code" : "doc"; + } + // Other doc types: default to doc + return "doc"; + } + return "code"; +} + + + // Types for talking to the Rust /capture endpoint. // This mirrors `CaptureEventWire` in webserver.rs. interface CaptureEventPayload { @@ -196,7 +254,12 @@ async function sendCaptureEvent( group_id: CAPTURE_GROUP_ID, file_path: filePath, event_type: eventType, - data, + data: { + ...data, + session_id: CAPTURE_SESSION_ID, + client_timestamp_ms: Date.now(), + client_tz_offset_min: new Date().getTimezoneOffset(), + }, }; try { @@ -312,7 +375,11 @@ export const activate = (context: vscode.ExtensionContext) => { // CAPTURE: classify this as documentation vs. code and log a write_* event. const doc = event.document; - const kind = classifyDocument(doc); +// const kind = classifyDocument(doc); + const firstChange = event.contentChanges[0]; + const pos = firstChange.range.start; + const kind = classifyAtPosition(doc, pos); + const filePath = doc.fileName; const charsTyped = event.contentChanges .map((c) => c.text.length) @@ -370,7 +437,10 @@ export const activate = (context: vscode.ExtensionContext) => { // CAPTURE: update activity + possible switch_pane/doc_session. const doc = event.document; - const kind = classifyDocument(doc); + // const kind = classifyDocument(doc); + const pos = event.selection?.active ?? new vscode.Position(0, 0); + const kind = classifyAtPosition(doc, pos); + const filePath = doc.fileName; noteActivity(kind, filePath); @@ -391,7 +461,9 @@ export const activate = (context: vscode.ExtensionContext) => { // CAPTURE: treat a selection change as "activity" in this document. const doc = event.textEditor.document; - const kind = classifyDocument(doc); + // const kind = classifyDocument(doc); + const pos = event.selections?.[0]?.active ?? event.textEditor.selection.active; + const kind = classifyAtPosition(doc, pos); const filePath = doc.fileName; noteActivity(kind, filePath); @@ -399,6 +471,42 @@ export const activate = (context: vscode.ExtensionContext) => { }), ); + // CAPTURE: end of a debug/run session. + context.subscriptions.push( + vscode.debug.onDidTerminateDebugSession((session) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "run_end", + filePath, + { + sessionName: session.name, + sessionType: session.type, + }, + ); + }), + ); + + // CAPTURE: compile/build end events via VS Code tasks. + context.subscriptions.push( + vscode.tasks.onDidEndTaskProcess((e) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const task = e.execution.task; + void sendCaptureEvent( + CAPTURE_SERVER_BASE, + "compile_end", + filePath, + { + taskName: task.name, + taskSource: task.source, + exitCode: e.exitCode, + }, + ); + }), + ); + // CAPTURE: listen for file saves. context.subscriptions.push( vscode.workspace.onDidSaveTextDocument((doc) => { diff --git a/server/src/capture.rs b/server/src/capture.rs index 5096ffda..fd7eede9 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -77,6 +77,8 @@ pub mod event_types { pub const RUN: &str = "run"; pub const SESSION_START: &str = "session_start"; pub const SESSION_END: &str = "session_end"; + pub const COMPILE_END: &str = "compile_end"; + pub const RUN_END: &str = "run_end"; } /// Configuration used to construct the PostgreSQL connection string. diff --git a/server/src/webserver.rs b/server/src/webserver.rs index c876d778..c963bab7 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -406,10 +406,18 @@ pub struct CaptureEventWire { pub group_id: Option, pub file_path: Option, pub event_type: String, - /// Arbitrary event-specific data stored as JSON. - pub data: serde_json::Value, + + /// Optional client-side timestamp (milliseconds since Unix epoch). + pub client_timestamp_ms: Option, + + /// Optional client timezone offset in minutes (JS Date().getTimezoneOffset()). + pub client_tz_offset_min: Option, + + /// Arbitrary event-specific data stored as JSON (optional). + pub data: Option, } + // Macros // ----------------------------------------------------------------------------- /// Create a macro to report an error when enqueueing an item. @@ -600,21 +608,41 @@ async fn capture_endpoint( ) -> HttpResponse { let wire = payload.into_inner(); - if let Some(capture) = &app_state.capture { - let event = CaptureEvent { - user_id: wire.user_id, - assignment_id: wire.assignment_id, - group_id: wire.group_id, - file_path: wire.file_path, - event_type: wire.event_type, - // Server decides when the event is recorded. - timestamp: Utc::now(), - data: wire.data, - }; + if let Some(capture) = &app_state.capture { + // Default missing data to empty object + let mut data = wire.data.unwrap_or_else(|| serde_json::json!({})); + + // Ensure data is an object so we can attach fields + if !data.is_object() { + data = serde_json::json!({ "value": data }); + } - capture.log(event); + // Add client timestamp fields if present (even if extension also sends them; + // overwriting is fine and consistent). + if let serde_json::Value::Object(map) = &mut data { + if let Some(ms) = wire.client_timestamp_ms { + map.insert("client_timestamp_ms".to_string(), serde_json::json!(ms)); + } + if let Some(tz) = wire.client_tz_offset_min { + map.insert("client_tz_offset_min".to_string(), serde_json::json!(tz)); + } } + let event = CaptureEvent { + user_id: wire.user_id, + assignment_id: wire.assignment_id, + group_id: wire.group_id, + file_path: wire.file_path, + event_type: wire.event_type, + // Server decides when the event is recorded. + timestamp: Utc::now(), + data, + }; + + capture.log(event); +} + + HttpResponse::Ok().finish() } From 8ad17f6c2189b729a4393789b1394e4ae6407779 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Sat, 14 Feb 2026 15:30:07 -0700 Subject: [PATCH 06/19] Capture Integration Updates --- client/src/CodeMirror-integration.mts | 8 +-- extensions/VSCode/src/extension.ts | 86 ++++++++++++++++----------- 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/client/src/CodeMirror-integration.mts b/client/src/CodeMirror-integration.mts index a1f7e83b..eefa5e26 100644 --- a/client/src/CodeMirror-integration.mts +++ b/client/src/CodeMirror-integration.mts @@ -46,7 +46,7 @@ // 5. Define a set of StateEffects to add/update/etc. doc blocks. // // Imports -// ----------------------------------------------------------------------------- +// ------- // // ### Third-party import { basicSetup } from "codemirror"; @@ -104,7 +104,7 @@ import { assert } from "./assert.mjs"; import { show_toast } from "./show_toast.mjs"; // Globals -// ----------------------------------------------------------------------------- +// ------- let current_view: EditorView; // This indicates that a call to `on_dirty` is scheduled, but hasn't run yet. let on_dirty_scheduled = false; @@ -137,7 +137,7 @@ const exceptionSink = EditorView.exceptionSink.of((exception) => { }); // Doc blocks in CodeMirror -// ----------------------------------------------------------------------------- +// ------------------------ // // The goal: given a [Range](https://codemirror.net/docs/ref/#state.Range) of // lines containing a doc block (a delimiter, indent, and contents) residing at @@ -825,7 +825,7 @@ export const DocBlockPlugin = ViewPlugin.fromClass( ); // UI -// ----------------------------------------------------------------------------- +// -- // // There doesn't seem to be any tracking of a dirty/clean flag built into // CodeMirror v6 (although diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index 6b34aedb..e2933241 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -16,13 +16,13 @@ // [http://www.gnu.org/licenses](http://www.gnu.org/licenses). // // `extension.ts` - The CodeChat Editor Visual Studio Code extension -// ============================================================================= +// ================================================================= // // This extension creates a webview, then uses a websocket connection to the // CodeChat Editor Server and Client to render editor text in that webview. // // Imports -// ----------------------------------------------------------------------------- +// ------- // // ### Node.js packages import assert from "assert"; @@ -58,7 +58,7 @@ import * as os from "os"; import * as crypto from "crypto"; // Globals -// ----------------------------------------------------------------------------- +// ------- enum CodeChatEditorClientLocation { html, browser, @@ -118,13 +118,15 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; initServer(ext.extensionPath); } -// ----------------------------------------------------------------------------- +// --- +// // CAPTURE (Dissertation instrumentation) -// ----------------------------------------------------------------------------- +// -------------------------------------- function isInMarkdownCodeFence(doc: vscode.TextDocument, line: number): boolean { - // Very simple fence tracker: toggles when encountering ``` or ~~~ at start of line. - // Good enough for dissertation instrumentation; refine later if needed. + // Very simple fence tracker: toggles when encountering \`\`\` or ~~~ at + // start of line. Good enough for dissertation instrumentation; refine later + // if needed. let inFence = false; for (let i = 0; i <= line; i++) { const t = doc.lineAt(i).text.trim(); @@ -136,8 +138,9 @@ function isInMarkdownCodeFence(doc: vscode.TextDocument, line: number): boolean } function isInRstCodeBlock(doc: vscode.TextDocument, line: number): boolean { - // Heuristic: find the most recent ".. code-block::" (or "::") and see if we're in its indented region. - // This won’t be perfect, but it’s far better than file-level classification. + // Heuristic: find the most recent ".. code-block::" (or "::") and see if + // we're in its indented region. This won’t be perfect, but it’s far better + // than file-level classification. let blockLine = -1; for (let i = line; i >= 0; i--) { const t = doc.lineAt(i).text; @@ -146,12 +149,14 @@ function isInRstCodeBlock(doc: vscode.TextDocument, line: number): boolean { blockLine = i; break; } - // If we hit a non-indented line after searching upward too far, keep going; rst blocks can be separated by blank lines. + // If we hit a non-indented line after searching upward too far, keep + // going; rst blocks can be separated by blank lines. } if (blockLine < 0) return false; - // RST code block content usually begins after optional blank line(s), indented. - // Determine whether current line is indented relative to block directive line. + // RST code block content usually begins after optional blank line(s), + // indented. Determine whether current line is indented relative to block + // directive line. const cur = doc.lineAt(line).text; if (cur.trim().length === 0) return false; @@ -175,8 +180,8 @@ function classifyAtPosition(doc: vscode.TextDocument, pos: vscode.Position): Act -// Types for talking to the Rust /capture endpoint. -// This mirrors `CaptureEventWire` in webserver.rs. +// Types for talking to the Rust /capture endpoint. This mirrors +// `CaptureEventWire` in webserver.rs. interface CaptureEventPayload { user_id: string; assignment_id?: string; @@ -186,8 +191,8 @@ interface CaptureEventPayload { data: any; // sent as JSON } -// TODO: replace these with something real (e.g., VS Code settings) -// For now, we hard-code to prove that the pipeline works end-to-end. +// TODO: replace these with something real (e.g., VS Code settings) For now, we +// hard-code to prove that the pipeline works end-to-end. const CAPTURE_USER_ID: string = (() => { try { const u = os.userInfo().username; @@ -209,8 +214,8 @@ const CAPTURE_USER_ID: string = (() => { const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; const CAPTURE_GROUP_ID = "demo-group"; -// Base URL for the CodeChat server's /capture endpoint. -// NOTE: keep this in sync with whatever port your server actually uses. +// Base URL for the CodeChat server's /capture endpoint. NOTE: keep this in sync +// with whatever port your server actually uses. const CAPTURE_SERVER_BASE = "http://127.0.0.1:8080"; // Simple classification of what the user is currently doing. @@ -225,7 +230,8 @@ const DOC_LANG_IDS = new Set([ "restructuredtext", ]); -// Track the last activity kind and when a reflective-writing (doc) session started. +// Track the last activity kind and when a reflective-writing (doc) session +// started. let lastActivityKind: ActivityKind = "other"; let docSessionStart: number | null = null; @@ -283,7 +289,7 @@ async function sendCaptureEvent( } } -// Update activity state, emit switch + doc_session events as needed. +// Update activity state, emit switch + doc\_session events as needed. function noteActivity(kind: ActivityKind, filePath?: string) { const now = Date.now(); @@ -311,7 +317,7 @@ function noteActivity(kind: ActivityKind, filePath?: string) { } } - // If we switched between doc and code, log a switch_pane event. + // If we switched between doc and code, log a switch\_pane event. const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; if (docOrCode(lastActivityKind) && docOrCode(kind) && kind !== lastActivityKind) { void sendCaptureEvent(CAPTURE_SERVER_BASE, "switch_pane", filePath, { @@ -324,7 +330,7 @@ function noteActivity(kind: ActivityKind, filePath?: string) { } // Activation/deactivation -// ----------------------------------------------------------------------------- +// ----------------------- // // This is invoked when the extension is activated. It either creates a new // CodeChat Editor Server instance or reveals the currently running one. @@ -373,9 +379,12 @@ export const activate = (context: vscode.ExtensionContext) => { }, ${format_struct(event.contentChanges)}.`, ); - // CAPTURE: classify this as documentation vs. code and log a write_* event. + // CAPTURE: classify this as documentation vs. code + // and log a write\_\* event. const doc = event.document; -// const kind = classifyDocument(doc); +// ``` +// const kind = classifyDocument(doc); +// ``` const firstChange = event.contentChanges[0]; const pos = firstChange.range.start; const kind = classifyAtPosition(doc, pos); @@ -407,7 +416,8 @@ export const activate = (context: vscode.ExtensionContext) => { ); } - // Update our notion of current activity + doc session. + // Update our notion of current activity + doc + // session. noteActivity(kind, filePath); send_update(true); @@ -435,7 +445,8 @@ export const activate = (context: vscode.ExtensionContext) => { return; } - // CAPTURE: update activity + possible switch_pane/doc_session. + // CAPTURE: update activity + possible + // switch\_pane/doc\_session. const doc = event.document; // const kind = classifyDocument(doc); const pos = event.selection?.active ?? new vscode.Position(0, 0); @@ -459,7 +470,8 @@ export const activate = (context: vscode.ExtensionContext) => { "CodeChat Editor extension: sending updated cursor/scroll position.", ); - // CAPTURE: treat a selection change as "activity" in this document. + // CAPTURE: treat a selection change as "activity" + // in this document. const doc = event.textEditor.document; // const kind = classifyDocument(doc); const pos = event.selections?.[0]?.active ?? event.textEditor.selection.active; @@ -561,7 +573,8 @@ export const activate = (context: vscode.ExtensionContext) => { ); } - // Get the CodeChat Client's location from the VSCode configuration. + // Get the CodeChat Client's location from the VSCode + // configuration. const codechat_client_location_str = vscode.workspace .getConfiguration("CodeChatEditor.Server") .get("ClientLocation"); @@ -606,7 +619,8 @@ export const activate = (context: vscode.ExtensionContext) => { } } - // Provide a simple status display while the server is starting up. + // Provide a simple status display while the server is starting + // up. if (webview_panel !== undefined) { webview_panel.webview.html = "

CodeChat Editor

Loading...

"; } else { @@ -866,7 +880,8 @@ export const activate = (context: vscode.ExtensionContext) => { export const deactivate = async () => { console_log("CodeChat Editor extension: deactivating."); - // CAPTURE: if we were in a doc session, close it out so duration is recorded. + // CAPTURE: if we were in a doc session, close it out so duration is + // recorded. if (docSessionStart !== null) { const now = Date.now(); const durationMs = now - docSessionStart; @@ -898,7 +913,7 @@ export const deactivate = async () => { }; // Supporting functions -// ----------------------------------------------------------------------------- +// -------------------- // // Format a complex data structure as a string when in debug mode. /*eslint-disable-next-line @typescript-eslint/no-explicit-any */ @@ -981,7 +996,8 @@ const send_update = (this_is_dirty: boolean) => { } }; -// Gracefully shut down the render client if possible. Shut down the client as well. +// Gracefully shut down the render client if possible. Shut down the client as +// well. const stop_client = async () => { console_log("CodeChat Editor extension: stopping client."); if (codeChatEditorServer !== undefined) { @@ -1019,8 +1035,8 @@ const show_error = (message: string) => { } }; -// Only render if the window and editor are active, we have a valid render client, -// and the webview is visible. +// Only render if the window and editor are active, we have a valid render +// client, and the webview is visible. const can_render = () => { return ( (vscode.window.activeTextEditor !== undefined || @@ -1032,7 +1048,7 @@ const can_render = () => { }; const get_document = (file_path: string) => { - for (const doc of vscode.workspace.textDocuments) { + for ( const doc of vscode.workspace.textDocuments) { if ( (!is_windows && doc.fileName === file_path) || (is_windows && doc.fileName.toUpperCase() === file_path.toUpperCase()) From 1edd6dd9c99b5f9642ac99a234a8b038c1e7d36d Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:31:43 -0600 Subject: [PATCH 07/19] Minor statrup fix --- extensions/VSCode/.gitignore | 2 +- server/src/capture.rs | 130 ++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 65 deletions(-) diff --git a/extensions/VSCode/.gitignore b/extensions/VSCode/.gitignore index 8c5160c5..3780ba9f 100644 --- a/extensions/VSCode/.gitignore +++ b/extensions/VSCode/.gitignore @@ -33,5 +33,5 @@ src/index.d.ts src/index.js src/codechat-editor-client.win32-x64-msvc.node .windows/ - +*.log # CodeChat Editor lexer: python. See TODO. diff --git a/server/src/capture.rs b/server/src/capture.rs index fd7eede9..842b5815 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -57,7 +57,7 @@ /// * `timestamp` – RFC3339 timestamp (in UTC). /// * `data` – JSON payload with event-specific details. -use std::io; +use std::{io, thread}; use chrono::{DateTime, Utc}; use log::{debug, error, info, warn}; @@ -196,73 +196,75 @@ impl EventCapture { let (tx, mut rx) = mpsc::unbounded_channel::(); - // Spawn a background task that will connect to PostgreSQL and then - // process events. This task runs on the Tokio/Actix runtime once the - // system starts, so the caller does not need to be async. - tokio::spawn(async move { - info!("Capture: attempting to connect to PostgreSQL…"); - - match tokio_postgres::connect(&conn_str, NoTls).await { - Ok((client, connection)) => { - info!("Capture: successfully connected to PostgreSQL."); - - // Drive the connection in its own task. - tokio::spawn(async move { - if let Err(err) = connection.await { - error!("Capture PostgreSQL connection error: {err}"); - } - }); - - // Main event loop: pull events off the channel and insert - // them into the database. - while let Some(event) = rx.recv().await { - debug!( - "Capture: inserting event: type={}, user_id={}, assignment_id={:?}, group_id={:?}, file_path={:?}", - event.event_type, - event.user_id, - event.assignment_id, - event.group_id, - event.file_path - ); - - if let Err(err) = insert_event(&client, &event).await { - error!( - "Capture: FAILED to insert event (type={}, user_id={}): {err}", - event.event_type, event.user_id - ); - } else { - debug!("Capture: event insert successful."); + // Create a dedicated runtime so capture can be started from sync code + // before the Actix/Tokio server runtime exists. + thread::Builder::new() + .name("codechat-capture".to_string()) + .spawn(move || { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .expect("Capture: failed to build Tokio runtime"); + + runtime.block_on(async move { + info!("Capture: attempting to connect to PostgreSQL…"); + + match tokio_postgres::connect(&conn_str, NoTls).await { + Ok((client, connection)) => { + info!("Capture: successfully connected to PostgreSQL."); + + // Drive the connection in its own task. + tokio::spawn(async move { + if let Err(err) = connection.await { + error!("Capture PostgreSQL connection error: {err}"); + } + }); + + // Main event loop: pull events off the channel and insert + // them into the database. + while let Some(event) = rx.recv().await { + debug!( + "Capture: inserting event: type={}, user_id={}, assignment_id={:?}, group_id={:?}, file_path={:?}", + event.event_type, + event.user_id, + event.assignment_id, + event.group_id, + event.file_path + ); + + if let Err(err) = insert_event(&client, &event).await { + error!( + "Capture: FAILED to insert event (type={}, user_id={}): {err}", + event.event_type, event.user_id + ); + } else { + debug!("Capture: event insert successful."); + } + } + + info!("Capture: event channel closed; background worker exiting."); } - } - - info!("Capture: event channel closed; background worker exiting."); - } -Err(err) => { - let ctx = format!( - "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={})", - config.host, config.dbname, config.user - ); - - log_pg_connect_error(&ctx, &err); + Err(err) => { + let ctx = format!( + "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={})", + config.host, config.dbname, config.user + ); - // Drain and drop any events so we don't hold the sender. - warn!("Capture: draining pending events after failed DB connection."); - while rx.recv().await.is_some() {} - warn!("Capture: all pending events dropped due to connection failure."); -} + log_pg_connect_error(&ctx, &err); - // Err(err) => { // NOTE: we *don't* pass `err` twice here; - // `{err}` in the format // string already grabs the local `err` - // binding. error!( "Capture: FAILED to connect to PostgreSQL - // (host={}, dbname={}, user={}): {err}", config.host, - // config.dbname, config.user, ); // Drain and drop any events - // so we don't hold the sender. warn!("Capture: draining pending - // events after failed DB connection."); while - // rx.recv().await.is\_some() {} warn!("Capture: all pending - // events dropped due to connection failure."); } - } - }); + // Drain and drop any events so we don't hold the sender. + warn!("Capture: draining pending events after failed DB connection."); + while rx.recv().await.is_some() {} + warn!("Capture: all pending events dropped due to connection failure."); + } + } + }); + }) + .map_err(|err| { + io::Error::other(format!("Capture: failed to start worker thread: {err}")) + })?; Ok(Self { tx }) } From fe4abbc52ad7e0e4fac2efaaeab57f69ea1df521 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Tue, 21 Apr 2026 22:07:29 -0600 Subject: [PATCH 08/19] Code Review Workoff --- .gitignore | 3 + extensions/VSCode/src/extension.ts | 148 +++++++++---------- extensions/VSCode/src/lib.rs | 7 + server/capture_config.json | 9 -- server/src/capture.rs | 2 +- server/src/ide.rs | 8 ++ server/src/ide/filewatcher.rs | 1 + server/src/translation.rs | 222 +++++++++++++++++++++++++++-- server/src/webserver.rs | 117 +++++++-------- 9 files changed, 368 insertions(+), 149 deletions(-) delete mode 100644 server/capture_config.json diff --git a/.gitignore b/.gitignore index 7f3f076b..2b4f8c30 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,7 @@ # dist build output target/ +# The runtime capture config is resolved from the repository root. +server/capture_config.json + # CodeChat Editor lexer: python. See TODO. diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index e2933241..1f4fc7c4 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -180,7 +180,7 @@ function classifyAtPosition(doc: vscode.TextDocument, pos: vscode.Position): Act -// Types for talking to the Rust /capture endpoint. This mirrors +// Types for sending capture events to the Rust server. This mirrors // `CaptureEventWire` in webserver.rs. interface CaptureEventPayload { user_id: string; @@ -188,6 +188,8 @@ interface CaptureEventPayload { group_id?: string; file_path?: string; event_type: string; + client_timestamp_ms?: number; + client_tz_offset_min?: number; data: any; // sent as JSON } @@ -214,9 +216,10 @@ const CAPTURE_USER_ID: string = (() => { const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; const CAPTURE_GROUP_ID = "demo-group"; -// Base URL for the CodeChat server's /capture endpoint. NOTE: keep this in sync -// with whatever port your server actually uses. -const CAPTURE_SERVER_BASE = "http://127.0.0.1:8080"; +let capture_output_channel: vscode.OutputChannel | undefined; +let captureFailureLogged = false; +let captureTransportReady = false; +let extensionCaptureSessionStarted = false; // Simple classification of what the user is currently doing. type ActivityKind = "doc" | "code" | "other"; @@ -249,7 +252,6 @@ function classifyDocument(doc: vscode.TextDocument | undefined): ActivityKind { // Helper to send a capture event to the Rust server. async function sendCaptureEvent( - serverBaseUrl: string, // e.g. "http://127.0.0.1:8080" eventType: string, filePath?: string, data: any = {}, @@ -260,33 +262,60 @@ async function sendCaptureEvent( group_id: CAPTURE_GROUP_ID, file_path: filePath, event_type: eventType, + client_timestamp_ms: Date.now(), + client_tz_offset_min: new Date().getTimezoneOffset(), data: { ...data, session_id: CAPTURE_SESSION_ID, - client_timestamp_ms: Date.now(), - client_tz_offset_min: new Date().getTimezoneOffset(), }, }; - try { - const resp = await fetch(`${serverBaseUrl}/capture`, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(payload), - }); + if (codeChatEditorServer === undefined) { + reportCaptureFailure("CodeChat server is not running"); + return; + } + if (!captureTransportReady) { + capture_output_channel?.appendLine( + `${new Date().toISOString()} capture skipped before server handshake: ${JSON.stringify(payload)}`, + ); + return; + } - if (!resp.ok) { - console.error( - "Capture event failed:", - resp.status, - await resp.text(), - ); - } + logCaptureEvent(payload); + + try { + await codeChatEditorServer.sendCaptureEvent(JSON.stringify(payload)); + captureFailureLogged = false; } catch (err) { - console.error("Error sending capture event:", err); + reportCaptureFailure(err instanceof Error ? err.message : String(err)); + } +} + +function logCaptureEvent(payload: CaptureEventPayload) { + capture_output_channel?.appendLine( + `${new Date().toISOString()} ${JSON.stringify(payload)}`, + ); +} + +function reportCaptureFailure(message: string) { + capture_output_channel?.appendLine( + `${new Date().toISOString()} capture send failed: ${message}`, + ); + if (captureFailureLogged) { + return; + } + captureFailureLogged = true; + console.warn(`CodeChat capture event was not queued: ${message}`); +} + +async function startExtensionCaptureSession(filePath?: string) { + if (extensionCaptureSessionStarted) { + return; } + extensionCaptureSessionStarted = true; + await sendCaptureEvent("session_start", filePath, { + mode: "vscode_extension", + }); } // Update activity state, emit switch + doc\_session events as needed. @@ -298,7 +327,7 @@ function noteActivity(kind: ActivityKind, filePath?: string) { if (docSessionStart === null) { // Starting a new reflective-writing session. docSessionStart = now; - void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_start", filePath, { + void sendCaptureEvent("session_start", filePath, { mode: "doc", }); } @@ -307,11 +336,11 @@ function noteActivity(kind: ActivityKind, filePath?: string) { // Ending a reflective-writing session. const durationMs = now - docSessionStart; docSessionStart = null; - void sendCaptureEvent(CAPTURE_SERVER_BASE, "doc_session", filePath, { + void sendCaptureEvent("doc_session", filePath, { duration_ms: durationMs, duration_seconds: durationMs / 1000.0, }); - void sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", filePath, { + void sendCaptureEvent("session_end", filePath, { mode: "doc", }); } @@ -320,7 +349,7 @@ function noteActivity(kind: ActivityKind, filePath?: string) { // If we switched between doc and code, log a switch\_pane event. const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; if (docOrCode(lastActivityKind) && docOrCode(kind) && kind !== lastActivityKind) { - void sendCaptureEvent(CAPTURE_SERVER_BASE, "switch_pane", filePath, { + void sendCaptureEvent("switch_pane", filePath, { from: lastActivityKind, to: kind, }); @@ -335,6 +364,9 @@ function noteActivity(kind: ActivityKind, filePath?: string) { // This is invoked when the extension is activated. It either creates a new // CodeChat Editor Server instance or reveals the currently running one. export const activate = (context: vscode.ExtensionContext) => { + capture_output_channel = vscode.window.createOutputChannel("CodeChat Capture"); + context.subscriptions.push(capture_output_channel); + context.subscriptions.push( vscode.commands.registerCommand( "extension.codeChatEditorDeactivate", @@ -345,18 +377,6 @@ export const activate = (context: vscode.ExtensionContext) => { async () => { console_log("CodeChat Editor extension: starting."); - // CAPTURE: mark the start of an editor session. - const active = vscode.window.activeTextEditor; - const startFilePath = active?.document.fileName; - void sendCaptureEvent( - CAPTURE_SERVER_BASE, - "session_start", - startFilePath, - { - mode: "vscode_extension", - }, - ); - if (!subscribed) { subscribed = true; @@ -379,8 +399,8 @@ export const activate = (context: vscode.ExtensionContext) => { }, ${format_struct(event.contentChanges)}.`, ); - // CAPTURE: classify this as documentation vs. code - // and log a write\_\* event. + // CAPTURE: update session/switch state. The server + // classifies write_* events after parsing. const doc = event.document; // ``` // const kind = classifyDocument(doc); @@ -390,31 +410,6 @@ export const activate = (context: vscode.ExtensionContext) => { const kind = classifyAtPosition(doc, pos); const filePath = doc.fileName; - const charsTyped = event.contentChanges - .map((c) => c.text.length) - .reduce((a, b) => a + b, 0); - - if (kind === "doc") { - void sendCaptureEvent( - CAPTURE_SERVER_BASE, - "write_doc", - filePath, - { - chars_typed: charsTyped, - languageId: doc.languageId, - }, - ); - } else if (kind === "code") { - void sendCaptureEvent( - CAPTURE_SERVER_BASE, - "write_code", - filePath, - { - chars_typed: charsTyped, - languageId: doc.languageId, - }, - ); - } // Update our notion of current activity + doc // session. @@ -489,7 +484,6 @@ export const activate = (context: vscode.ExtensionContext) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; void sendCaptureEvent( - CAPTURE_SERVER_BASE, "run_end", filePath, { @@ -507,7 +501,6 @@ export const activate = (context: vscode.ExtensionContext) => { const filePath = active?.document.fileName; const task = e.execution.task; void sendCaptureEvent( - CAPTURE_SERVER_BASE, "compile_end", filePath, { @@ -523,7 +516,6 @@ export const activate = (context: vscode.ExtensionContext) => { context.subscriptions.push( vscode.workspace.onDidSaveTextDocument((doc) => { void sendCaptureEvent( - CAPTURE_SERVER_BASE, "save", doc.fileName, { @@ -541,7 +533,6 @@ export const activate = (context: vscode.ExtensionContext) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; void sendCaptureEvent( - CAPTURE_SERVER_BASE, "run", filePath, { @@ -559,7 +550,6 @@ export const activate = (context: vscode.ExtensionContext) => { const filePath = active?.document.fileName; const task = e.execution.task; void sendCaptureEvent( - CAPTURE_SERVER_BASE, "compile", filePath, { @@ -632,6 +622,9 @@ export const activate = (context: vscode.ExtensionContext) => { // Start the server. console_log("CodeChat Editor extension: starting server."); codeChatEditorServer = new CodeChatEditorServer(); + captureFailureLogged = false; + captureTransportReady = false; + extensionCaptureSessionStarted = false; const hosted_in_ide = codechat_client_location === CodeChatEditorClientLocation.html; @@ -641,6 +634,9 @@ export const activate = (context: vscode.ExtensionContext) => { await codeChatEditorServer.sendMessageOpened(hosted_in_ide); if (codechat_client_location === CodeChatEditorClientLocation.browser) { + captureTransportReady = true; + const active = vscode.window.activeTextEditor; + void startExtensionCaptureSession(active?.document.fileName); send_update(false); } @@ -860,6 +856,9 @@ export const activate = (context: vscode.ExtensionContext) => { assert(webview_panel !== undefined); webview_panel.webview.html = client_html; await sendResult(id); + captureTransportReady = true; + const active = vscode.window.activeTextEditor; + void startExtensionCaptureSession(active?.document.fileName); send_update(false); break; } @@ -889,12 +888,12 @@ export const deactivate = async () => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; - await sendCaptureEvent(CAPTURE_SERVER_BASE, "doc_session", filePath, { + await sendCaptureEvent("doc_session", filePath, { duration_ms: durationMs, duration_seconds: durationMs / 1000.0, closed_by: "extension_deactivate", }); - await sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", filePath, { + await sendCaptureEvent("session_end", filePath, { mode: "doc", closed_by: "extension_deactivate", }); @@ -903,7 +902,7 @@ export const deactivate = async () => { // CAPTURE: mark the end of an editor session. const active = vscode.window.activeTextEditor; const endFilePath = active?.document.fileName; - await sendCaptureEvent(CAPTURE_SERVER_BASE, "session_end", endFilePath, { + await sendCaptureEvent("session_end", endFilePath, { mode: "vscode_extension", }); @@ -1005,6 +1004,7 @@ const stop_client = async () => { await codeChatEditorServer.stopServer(); codeChatEditorServer = undefined; } + captureTransportReady = false; if (idle_timer !== undefined) { clearTimeout(idle_timer); diff --git a/extensions/VSCode/src/lib.rs b/extensions/VSCode/src/lib.rs index bef47679..b869640a 100644 --- a/extensions/VSCode/src/lib.rs +++ b/extensions/VSCode/src/lib.rs @@ -80,6 +80,13 @@ impl CodeChatEditorServer { self.0.send_message_opened(hosted_in_ide).await } + #[napi] + pub async fn send_capture_event(&self, capture_event_json: String) -> std::io::Result { + let capture_event = serde_json::from_str(&capture_event_json) + .map_err(|err| std::io::Error::other(err.to_string()))?; + self.0.send_capture_event(capture_event).await + } + #[napi] pub async fn send_message_current_file(&self, url: String) -> std::io::Result { self.0.send_message_current_file(url).await diff --git a/server/capture_config.json b/server/capture_config.json deleted file mode 100644 index 574f1477..00000000 --- a/server/capture_config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "host": "3.146.138.182", - "port": 5432, - "dbname": "CodeChatCaptureDB", - "user": "CodeChatCaptureUser", - "password": "OB3yc8Hk9SuVjzXMdUDr0C7w4PqLQisn", - "max_connections": 5, - "timeout_seconds": 30 -} diff --git a/server/src/capture.rs b/server/src/capture.rs index 842b5815..bd7a73e0 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -48,7 +48,7 @@ /// data TEXT /// ); /// ``` -/// +/// New comment /// * `user_id` – participant identifier (student id, pseudonym, etc.). /// * `assignment_id` – logical assignment / lab identifier. /// * `group_id` – optional grouping (treatment / comparison, section). diff --git a/server/src/ide.rs b/server/src/ide.rs index 21037d95..92141707 100644 --- a/server/src/ide.rs +++ b/server/src/ide.rs @@ -237,6 +237,14 @@ impl CodeChatEditorServer { .await } + pub async fn send_capture_event( + &self, + capture_event: webserver::CaptureEventWire, + ) -> std::io::Result { + self.send_message_timeout(EditorMessageContents::Capture(capture_event)) + .await + } + // Send a `CurrentFile` message. The other parameter (true if text/false if // binary/None if ignored) is ignored by the server, so it's always sent as // `None`. diff --git a/server/src/ide/filewatcher.rs b/server/src/ide/filewatcher.rs index 93b2829a..3068ca23 100644 --- a/server/src/ide/filewatcher.rs +++ b/server/src/ide/filewatcher.rs @@ -668,6 +668,7 @@ async fn processing_task( EditorMessageContents::Opened(_) | EditorMessageContents::OpenUrl(_) | + EditorMessageContents::Capture(_) | EditorMessageContents::ClientHtml(_) | EditorMessageContents::RequestClose => { let err = ResultErrTypes::ClientIllegalMessage; diff --git a/server/src/translation.rs b/server/src/translation.rs index 141d822b..9a20c4e2 100644 --- a/server/src/translation.rs +++ b/server/src/translation.rs @@ -221,6 +221,7 @@ use tokio::{ // ### Local use crate::{ + capture::event_types, lexer::supported_languages::MARKDOWN_MODE, processing::{ CodeChatForWeb, CodeMirror, CodeMirrorDiff, CodeMirrorDiffable, CodeMirrorDocBlock, @@ -230,11 +231,11 @@ use crate::{ }, queue_send, queue_send_func, webserver::{ - EditorMessage, EditorMessageContents, INITIAL_MESSAGE_ID, MESSAGE_ID_INCREMENT, - ProcessingTaskHttpRequest, ProcessingTaskHttpRequestFlags, ResultErrTypes, ResultOkTypes, - SimpleHttpResponse, SimpleHttpResponseError, UpdateMessageContents, WebAppState, - WebsocketQueues, file_to_response, path_to_url, send_response, try_canonicalize, - try_read_as_text, url_to_path, + CaptureEventWire, EditorMessage, EditorMessageContents, INITIAL_MESSAGE_ID, + MESSAGE_ID_INCREMENT, ProcessingTaskHttpRequest, ProcessingTaskHttpRequestFlags, + ResultErrTypes, ResultOkTypes, SimpleHttpResponse, SimpleHttpResponseError, + UpdateMessageContents, WebAppState, WebsocketQueues, file_to_response, log_capture_event, + path_to_url, send_response, try_canonicalize, try_read_as_text, url_to_path, }, }; @@ -384,6 +385,7 @@ pub fn create_translation_queues( /// allows factoring out lengthy contents in the loop into subfunctions. struct TranslationTask { // These parameters are passed to us. + app_state: WebAppState, connection_id_raw: String, prefix: &'static [&'static str], allow_source_diffs: bool, @@ -432,6 +434,69 @@ struct TranslationTask { /// Has the full (non-diff) version of the current file been sent? Don't /// send diffs until this is sent. sent_full: bool, + capture_context: CaptureContext, +} + +#[derive(Clone, Debug, Default)] +struct CaptureContext { + user_id: Option, + assignment_id: Option, + group_id: Option, + session_id: Option, + client_tz_offset_min: Option, +} + +impl CaptureContext { + fn update_from_wire(&mut self, wire: &CaptureEventWire) { + if !wire.user_id.trim().is_empty() { + self.user_id = Some(wire.user_id.clone()); + } + if let Some(assignment_id) = &wire.assignment_id { + self.assignment_id = Some(assignment_id.clone()); + } + if let Some(group_id) = &wire.group_id { + self.group_id = Some(group_id.clone()); + } + if let Some(client_tz_offset_min) = wire.client_tz_offset_min { + self.client_tz_offset_min = Some(client_tz_offset_min); + } + if let Some(serde_json::Value::Object(data)) = &wire.data + && let Some(session_id) = data.get("session_id").and_then(serde_json::Value::as_str) + { + self.session_id = Some(session_id.to_string()); + } + } + + fn capture_event( + &self, + event_type: &str, + file_path: Option, + data: serde_json::Value, + ) -> Option { + let mut data = match data { + serde_json::Value::Object(map) => map, + other => { + let mut map = serde_json::Map::new(); + map.insert("value".to_string(), other); + map + } + }; + if let Some(session_id) = &self.session_id { + data.entry("session_id".to_string()) + .or_insert_with(|| serde_json::json!(session_id)); + } + + Some(CaptureEventWire { + user_id: self.user_id.clone()?, + assignment_id: self.assignment_id.clone(), + group_id: self.group_id.clone(), + file_path, + event_type: event_type.to_string(), + client_timestamp_ms: None, + client_tz_offset_min: self.client_tz_offset_min, + data: Some(serde_json::Value::Object(data)), + }) + } } /// This is the processing task for the Visual Studio Code IDE. It handles all @@ -463,6 +528,7 @@ pub async fn translation_task( let mut continue_loop = true; let mut tt = TranslationTask { + app_state: app_state.clone(), connection_id_raw, prefix, allow_source_diffs, @@ -486,6 +552,7 @@ pub async fn translation_task( version: 0.0, // Don't send diffs until this is sent. sent_full: false, + capture_context: CaptureContext::default(), }; while continue_loop { select! { @@ -512,6 +579,11 @@ pub async fn translation_task( EditorMessageContents::Result(_) => continue_loop = tt.ide_result(ide_message).await, EditorMessageContents::Update(_) => continue_loop = tt.ide_update(ide_message).await, + EditorMessageContents::Capture(capture_event) => { + tt.capture_context.update_from_wire(&capture_event); + log_capture_event(&app_state, capture_event); + send_response(&tt.to_ide_tx, ide_message.id, Ok(ResultOkTypes::Void)).await; + }, // Update the current file; translate it to a URL then // pass it to the Client. @@ -607,6 +679,11 @@ pub async fn translation_task( }, EditorMessageContents::Update(_) => continue_loop = tt.client_update(client_message).await, + EditorMessageContents::Capture(capture_event) => { + tt.capture_context.update_from_wire(&capture_event); + log_capture_event(&app_state, capture_event); + send_response(&tt.to_client_tx, client_message.id, Ok(ResultOkTypes::Void)).await; + }, // Update the current file; translate it to a URL then // pass it to the IDE. @@ -697,6 +774,103 @@ pub async fn translation_task( // These provide translation for messages passing through the Server. impl TranslationTask { + fn capture_file_path(file_path: &std::path::Path) -> Option { + file_path.to_str().map(str::to_string) + } + + fn log_server_capture_event( + &self, + event_type: &str, + file_path: &std::path::Path, + data: serde_json::Value, + ) { + let Some(capture_event) = self.capture_context.capture_event( + event_type, + Self::capture_file_path(file_path), + data, + ) else { + debug!("Skipping server-classified capture event; capture identity is not known yet."); + return; + }; + log_capture_event(&self.app_state, capture_event); + } + + fn log_raw_write_event(&self, file_path: &std::path::Path, before: &str, after: &str) { + if before == after { + return; + } + self.log_server_capture_event( + event_types::WRITE_CODE, + file_path, + serde_json::json!({ + "source": "server_translation", + "classification_basis": "raw_text", + "diff": diff_str(before, after), + }), + ); + } + + fn log_code_mirror_write_events( + &self, + file_path: &std::path::Path, + metadata: &SourceFileMetadata, + before_doc: &str, + before_doc_blocks: Option<&CodeMirrorDocBlockVec>, + after: &CodeMirror, + source: &str, + ) { + if metadata.mode == MARKDOWN_MODE { + if !compare_html(before_doc, &after.doc) { + self.log_server_capture_event( + event_types::WRITE_DOC, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "markdown_document", + "mode": metadata.mode, + "diff": diff_str(before_doc, &after.doc), + }), + ); + } + return; + } + + if before_doc != after.doc { + self.log_server_capture_event( + event_types::WRITE_CODE, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "codemirror_code_text", + "mode": metadata.mode, + "diff": diff_str(before_doc, &after.doc), + }), + ); + } + + let doc_blocks_changed = match before_doc_blocks { + Some(before) => !doc_block_compare(before, &after.doc_blocks), + None => !after.doc_blocks.is_empty(), + }; + if doc_blocks_changed { + let doc_block_diff = before_doc_blocks.map(|before| { + serde_json::json!(diff_code_mirror_doc_blocks(before, &after.doc_blocks)) + }); + self.log_server_capture_event( + event_types::WRITE_DOC, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "codemirror_doc_blocks", + "mode": metadata.mode, + "doc_block_count_before": before_doc_blocks.map_or(0, Vec::len), + "doc_block_count_after": after.doc_blocks.len(), + "doc_block_diff": doc_block_diff, + }), + ); + } + } + // Pass a `Result` message to the Client, unless it's a `LoadFile` result. async fn ide_result(&mut self, ide_message: EditorMessage) -> bool { let EditorMessageContents::Result(ref result) = ide_message.message else { @@ -892,6 +1066,16 @@ impl TranslationTask { else { panic!("Unexpected diff value."); }; + if self.sent_full { + self.log_code_mirror_write_events( + &clean_file_path, + &ccfw.metadata, + &self.code_mirror_doc, + self.code_mirror_doc_blocks.as_ref(), + code_mirror_translated, + "ide", + ); + } // Send a diff if possible. let client_contents = if self.sent_full { self.diff_code_mirror( @@ -937,6 +1121,13 @@ impl TranslationTask { Err(ResultErrTypes::TodoBinarySupport) } TranslationResultsString::Unknown => { + if self.sent_full { + self.log_raw_write_event( + &clean_file_path, + &self.source_code, + &code_mirror.doc, + ); + } // Send the new raw contents. debug!("Sending translated contents to Client."); queue_send_func!(self.to_client_tx.send(EditorMessage { @@ -953,13 +1144,16 @@ impl TranslationTask { mode: "".to_string(), }, source: CodeMirrorDiffable::Plain(CodeMirror { - doc: code_mirror.doc, + doc: code_mirror.doc.clone(), doc_blocks: vec![] }), version: contents.version }), }), })); + self.source_code = code_mirror.doc; + self.code_mirror_doc = self.source_code.clone(); + self.code_mirror_doc_blocks = Some(vec![]); Ok(ResultOkTypes::Void) } TranslationResultsString::Toc(_) => { @@ -1042,12 +1236,22 @@ impl TranslationTask { // what we just received. This must be updated // before we can translate back to check for changes // (the next step). - let CodeMirrorDiffable::Plain(code_mirror) = cfw.source else { + let CodeMirrorDiffable::Plain(ref code_mirror) = cfw.source else { // TODO: support diffable! panic!("Diff not supported."); }; - self.code_mirror_doc = code_mirror.doc; - self.code_mirror_doc_blocks = Some(code_mirror.doc_blocks); + if self.sent_full { + self.log_code_mirror_write_events( + &clean_file_path, + &cfw.metadata, + &self.code_mirror_doc, + self.code_mirror_doc_blocks.as_ref(), + code_mirror, + "client", + ); + } + self.code_mirror_doc = code_mirror.doc.clone(); + self.code_mirror_doc_blocks = Some(code_mirror.doc_blocks.clone()); // We may need to change this version if we send a // diff back to the Client. let mut cfw_version = cfw.version; diff --git a/server/src/webserver.rs b/server/src/webserver.rs index c963bab7..ef533981 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -42,9 +42,9 @@ use actix_web::{ App, HttpRequest, HttpResponse, HttpServer, dev::{Server, ServerHandle, ServiceFactory, ServiceRequest}, error::Error, - get, post, + get, http::header::{ContentType, DispositionType}, - middleware, + middleware, post, web::{self, Data}, }; @@ -95,7 +95,7 @@ use crate::{ }, }; -use crate::capture::{EventCapture, CaptureConfig, CaptureEvent}; +use crate::capture::{CaptureConfig, CaptureEvent, EventCapture}; use chrono::Utc; @@ -204,6 +204,8 @@ pub enum EditorMessageContents { // Server will determine the value if needed. Option, ), + /// Record an instrumentation event. Valid destinations: Server. + Capture(CaptureEventWire), // #### These messages may only be sent by the IDE. /// This is the first message sent when the IDE starts up. It may only be @@ -385,7 +387,7 @@ pub struct AppState { /// The auth credentials if authentication is used. credentials: Option, // Added to support capture - JDS - 11/2025 - pub capture: Option, + pub capture: Option, } pub type WebAppState = web::Data; @@ -399,25 +401,32 @@ pub struct Credentials { /// JSON payload received from clients for capture events. /// /// The server will supply the timestamp; clients do not need to send it. -#[derive(Debug, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq, TS)] +#[ts(export, optional_fields)] pub struct CaptureEventWire { pub user_id: String, + #[serde(skip_serializing_if = "Option::is_none")] pub assignment_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub group_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub file_path: Option, pub event_type: String, /// Optional client-side timestamp (milliseconds since Unix epoch). + #[serde(skip_serializing_if = "Option::is_none")] pub client_timestamp_ms: Option, /// Optional client timezone offset in minutes (JS Date().getTimezoneOffset()). + #[serde(skip_serializing_if = "Option::is_none")] pub client_tz_offset_min: Option, /// Arbitrary event-specific data stored as JSON (optional). + #[serde(skip_serializing_if = "Option::is_none")] + #[ts(type = "unknown")] pub data: Option, } - // Macros // ----------------------------------------------------------------------------- /// Create a macro to report an error when enqueueing an item. @@ -606,44 +615,45 @@ async fn capture_endpoint( app_state: WebAppState, payload: web::Json, ) -> HttpResponse { - let wire = payload.into_inner(); - - if let Some(capture) = &app_state.capture { - // Default missing data to empty object - let mut data = wire.data.unwrap_or_else(|| serde_json::json!({})); + log_capture_event(&app_state, payload.into_inner()); + HttpResponse::Ok().finish() +} - // Ensure data is an object so we can attach fields - if !data.is_object() { - data = serde_json::json!({ "value": data }); - } +/// Log a capture event if capture is enabled. +pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) { + if let Some(capture) = &app_state.capture { + // Default missing data to empty object + let mut data = wire.data.unwrap_or_else(|| serde_json::json!({})); - // Add client timestamp fields if present (even if extension also sends them; - // overwriting is fine and consistent). - if let serde_json::Value::Object(map) = &mut data { - if let Some(ms) = wire.client_timestamp_ms { - map.insert("client_timestamp_ms".to_string(), serde_json::json!(ms)); - } - if let Some(tz) = wire.client_tz_offset_min { - map.insert("client_tz_offset_min".to_string(), serde_json::json!(tz)); + // Ensure data is an object so we can attach fields + if !data.is_object() { + data = serde_json::json!({ "value": data }); } - } - - let event = CaptureEvent { - user_id: wire.user_id, - assignment_id: wire.assignment_id, - group_id: wire.group_id, - file_path: wire.file_path, - event_type: wire.event_type, - // Server decides when the event is recorded. - timestamp: Utc::now(), - data, - }; - capture.log(event); -} + // Add client timestamp fields if present (even if extension also sends them; + // overwriting is fine and consistent). + if let serde_json::Value::Object(map) = &mut data { + if let Some(ms) = wire.client_timestamp_ms { + map.insert("client_timestamp_ms".to_string(), serde_json::json!(ms)); + } + if let Some(tz) = wire.client_tz_offset_min { + map.insert("client_tz_offset_min".to_string(), serde_json::json!(tz)); + } + } + let event = CaptureEvent { + user_id: wire.user_id, + assignment_id: wire.assignment_id, + group_id: wire.group_id, + file_path: wire.file_path, + event_type: wire.event_type, + // Server decides when the event is recorded. + timestamp: Utc::now(), + data, + }; - HttpResponse::Ok().finish() + capture.log(event); + } } // Get the `mode` query parameter to determine `is_test_mode`; default to @@ -1489,7 +1499,6 @@ pub fn setup_server( addr: &SocketAddr, credentials: Option, ) -> std::io::Result<(Server, Data)> { - // Pre-load the bundled files before starting the webserver. let _ = &*BUNDLED_FILES_MAP; let app_data = make_app_data(credentials); @@ -1576,26 +1585,22 @@ pub fn make_app_data(credentials: Option) -> WebAppState { config_path.push("capture_config.json"); match fs::read_to_string(&config_path) { - Ok(json) => { - match serde_json::from_str::(&json) { - Ok(cfg) => match EventCapture::new(cfg) { - Ok(ec) => { - eprintln!("Capture: enabled (config file: {config_path:?})"); - Some(ec) - } - Err(err) => { - eprintln!("Capture: failed to initialize from {config_path:?}: {err}"); - None - } - }, + Ok(json) => match serde_json::from_str::(&json) { + Ok(cfg) => match EventCapture::new(cfg) { + Ok(ec) => { + eprintln!("Capture: enabled (config file: {config_path:?})"); + Some(ec) + } Err(err) => { - eprintln!( - "Capture: invalid JSON in {config_path:?}: {err}" - ); + eprintln!("Capture: failed to initialize from {config_path:?}: {err}"); None } + }, + Err(err) => { + eprintln!("Capture: invalid JSON in {config_path:?}: {err}"); + None } - } + }, Err(err) => { eprintln!( "Capture: disabled (config file not found or unreadable: {config_path:?}: {err})" @@ -1645,7 +1650,7 @@ where .service(vscode_client_framework) .service(ping) .service(stop) - .service(capture_endpoint) + .service(capture_endpoint) // Reroute to the filewatcher filesystem for typical user-requested // URLs. .route("/", web::get().to(filewatcher_root_fs_redirect)) From 54cfac70f9401baa28c585004f1d38c40c9bc3ae Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Wed, 22 Apr 2026 09:14:05 -0600 Subject: [PATCH 09/19] Fix capture lint failure --- server/src/capture.rs | 102 ++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/server/src/capture.rs b/server/src/capture.rs index bd7a73e0..c8904a75 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -14,57 +14,57 @@ // the CodeChat Editor. If not, see // [http://www.gnu.org/licenses](http://www.gnu.org/licenses). -/// `capture.rs` -- Capture CodeChat Editor Events -/// ============================================================================ -/// -/// This module provides an asynchronous event capture facility backed by a -/// PostgreSQL database. It is designed to support the dissertation study by -/// recording process-level data such as: -/// -/// * Frequency and timing of writing entries -/// * Edits to documentation and code -/// * Switches between documentation and coding activity -/// * Duration of engagement with reflective writing -/// * Save, compile, and run events -/// -/// Events are sent from the client (browser and/or VS Code extension) to the -/// server as JSON. The server enqueues events into an asynchronous worker which -/// performs batched inserts into the `events` table. -/// -/// Database schema -/// ---------------------------------------------------------------------------- -/// -/// The following SQL statement creates the `events` table used by this module: -/// -/// ```sql -/// CREATE TABLE events ( -/// id SERIAL PRIMARY KEY, -/// user_id TEXT NOT NULL, -/// assignment_id TEXT, -/// group_id TEXT, -/// file_path TEXT, -/// event_type TEXT NOT NULL, -/// timestamp TEXT NOT NULL, -/// data TEXT -/// ); -/// ``` -/// New comment -/// * `user_id` – participant identifier (student id, pseudonym, etc.). -/// * `assignment_id` – logical assignment / lab identifier. -/// * `group_id` – optional grouping (treatment / comparison, section). -/// * `file_path` – logical path of the file being edited. -/// * `event_type` – coarse event type (see `event_type` constants below). -/// * `timestamp` – RFC3339 timestamp (in UTC). -/// * `data` – JSON payload with event-specific details. +// `capture.rs` -- Capture CodeChat Editor Events +// ============================================================================ +// +// This module provides an asynchronous event capture facility backed by a +// PostgreSQL database. It is designed to support the dissertation study by +// recording process-level data such as: +// +// * Frequency and timing of writing entries +// * Edits to documentation and code +// * Switches between documentation and coding activity +// * Duration of engagement with reflective writing +// * Save, compile, and run events +// +// Events are sent from the client (browser and/or VS Code extension) to the +// server as JSON. The server enqueues events into an asynchronous worker which +// performs batched inserts into the `events` table. +// +// Database schema +// ---------------------------------------------------------------------------- +// +// The following SQL statement creates the `events` table used by this module: +// +// ```sql +// CREATE TABLE events ( +// id SERIAL PRIMARY KEY, +// user_id TEXT NOT NULL, +// assignment_id TEXT, +// group_id TEXT, +// file_path TEXT, +// event_type TEXT NOT NULL, +// timestamp TEXT NOT NULL, +// data TEXT +// ); +// ``` +// +// * `user_id` – participant identifier (student id, pseudonym, etc.). +// * `assignment_id` – logical assignment / lab identifier. +// * `group_id` – optional grouping (treatment / comparison, section). +// * `file_path` – logical path of the file being edited. +// * `event_type` – coarse event type (see `event_type` constants below). +// * `timestamp` – RFC3339 timestamp (in UTC). +// * `data` – JSON payload with event-specific details. use std::{io, thread}; use chrono::{DateTime, Utc}; use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; +use std::error::Error; use tokio::sync::mpsc; use tokio_postgres::{Client, NoTls}; -use std::error::Error; /// Canonical event type strings. Keep these stable for analysis. pub mod event_types { @@ -273,11 +273,7 @@ impl EventCapture { pub fn log(&self, event: CaptureEvent) { debug!( "Capture: queueing event: type={}, user_id={}, assignment_id={:?}, group_id={:?}, file_path={:?}", - event.event_type, - event.user_id, - event.assignment_id, - event.group_id, - event.file_path + event.event_type, event.user_id, event.assignment_id, event.group_id, event.file_path ); if let Err(err) = self.tx.send(event) { @@ -325,8 +321,6 @@ fn log_pg_connect_error(context: &str, err: &tokio_postgres::Error) { error!("{context}: {err}"); } - - /// Insert a single event into the `events` table. async fn insert_event(client: &Client, event: &CaptureEvent) -> Result { let timestamp = event.timestamp.to_rfc3339(); @@ -470,7 +464,6 @@ mod tests { #[tokio::test] #[ignore] async fn event_capture_inserts_event_into_db() -> Result<(), Box> { - // Initialize logging for this test, using the same log4rs.yml as the // server. If logging is already initialized, this will just return an // error which we ignore. @@ -541,7 +534,10 @@ mod tests { ($1, NULL, NULL, NULL, 'test_event', $2, '{"test":true}') RETURNING id "#, - &[&test_user_id, &format!("{:?}", std::time::SystemTime::now())], + &[ + &test_user_id, + &format!("{:?}", std::time::SystemTime::now()), + ], ) .await?; @@ -568,7 +564,7 @@ mod tests { // 6. Wait (deterministically) for the background worker to insert the event, // then fetch THAT row (instead of "latest row in the table"). - use tokio::time::{sleep, Duration, Instant}; + use tokio::time::{Duration, Instant, sleep}; let deadline = Instant::now() + Duration::from_secs(2); From 63ca1c175bebfecd6d788323e9b78ca5e283fdf3 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Wed, 22 Apr 2026 09:27:15 -0600 Subject: [PATCH 10/19] Update rustls-webpki for audit advisory --- server/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/Cargo.lock b/server/Cargo.lock index f93653c3..dd33246d 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -3368,9 +3368,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" [[package]] name = "rustls-webpki" -version = "0.103.12" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "aws-lc-rs", "ring", From e0f2b380743fc752506674deb58148fde3622356 Mon Sep 17 00:00:00 2001 From: John Spahn <44337821+jspahn80134@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:23:18 -0600 Subject: [PATCH 11/19] Fix VS Code extension lint --- extensions/VSCode/src/extension.ts | 290 +++++++++++++++-------------- 1 file changed, 155 insertions(+), 135 deletions(-) diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index 266e02ba..03c053bf 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -123,7 +123,10 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; // CAPTURE (Dissertation instrumentation) // -------------------------------------- -function isInMarkdownCodeFence(doc: vscode.TextDocument, line: number): boolean { +function isInMarkdownCodeFence( + doc: vscode.TextDocument, + line: number, +): boolean { // Very simple fence tracker: toggles when encountering \`\`\` or ~~~ at // start of line. Good enough for dissertation instrumentation; refine later // if needed. @@ -164,7 +167,10 @@ function isInRstCodeBlock(doc: vscode.TextDocument, line: number): boolean { return /^\s+/.test(cur); } -function classifyAtPosition(doc: vscode.TextDocument, pos: vscode.Position): ActivityKind { +function classifyAtPosition( + doc: vscode.TextDocument, + pos: vscode.Position, +): ActivityKind { if (DOC_LANG_IDS.has(doc.languageId)) { if (doc.languageId === "markdown") { return isInMarkdownCodeFence(doc, pos.line) ? "code" : "doc"; @@ -178,10 +184,10 @@ function classifyAtPosition(doc: vscode.TextDocument, pos: vscode.Position): Act return "code"; } - - // Types for sending capture events to the Rust server. This mirrors // `CaptureEventWire` in webserver.rs. +type CaptureEventData = Record; + interface CaptureEventPayload { user_id: string; assignment_id?: string; @@ -190,7 +196,7 @@ interface CaptureEventPayload { event_type: string; client_timestamp_ms?: number; client_tz_offset_min?: number; - data: any; // sent as JSON + data: CaptureEventData; } // TODO: replace these with something real (e.g., VS Code settings) For now, we @@ -206,11 +212,7 @@ const CAPTURE_USER_ID: string = (() => { } // Fallbacks (should rarely be needed) - return ( - process.env["USERNAME"] || - process.env["USER"] || - "unknown-user" - ); + return process.env["USERNAME"] || process.env["USER"] || "unknown-user"; })(); const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; @@ -238,23 +240,11 @@ const DOC_LANG_IDS = new Set([ let lastActivityKind: ActivityKind = "other"; let docSessionStart: number | null = null; -// Heuristic: classify a document as documentation vs. code vs. other. -function classifyDocument(doc: vscode.TextDocument | undefined): ActivityKind { - if (!doc) { - return "other"; - } - if (DOC_LANG_IDS.has(doc.languageId)) { - return "doc"; - } - // Everything else we treat as code for now. - return "code"; -} - // Helper to send a capture event to the Rust server. async function sendCaptureEvent( eventType: string, filePath?: string, - data: any = {}, + data: CaptureEventData = {}, ): Promise { const payload: CaptureEventPayload = { user_id: CAPTURE_USER_ID, @@ -348,7 +338,11 @@ function noteActivity(kind: ActivityKind, filePath?: string) { // If we switched between doc and code, log a switch\_pane event. const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; - if (docOrCode(lastActivityKind) && docOrCode(kind) && kind !== lastActivityKind) { + if ( + docOrCode(lastActivityKind) && + docOrCode(kind) && + kind !== lastActivityKind + ) { void sendCaptureEvent("switch_pane", filePath, { from: lastActivityKind, to: kind, @@ -364,7 +358,8 @@ function noteActivity(kind: ActivityKind, filePath?: string) { // This is invoked when the extension is activated. It either creates a new // CodeChat Editor Server instance or reveals the currently running one. export const activate = (context: vscode.ExtensionContext) => { - capture_output_channel = vscode.window.createOutputChannel("CodeChat Capture"); + capture_output_channel = + vscode.window.createOutputChannel("CodeChat Capture"); context.subscriptions.push(capture_output_channel); context.subscriptions.push( @@ -402,9 +397,6 @@ export const activate = (context: vscode.ExtensionContext) => { // CAPTURE: update session/switch state. The server // classifies write_* events after parsing. const doc = event.document; -// ``` -// const kind = classifyDocument(doc); -// ``` const firstChange = event.contentChanges[0]; const pos = firstChange.range.start; const kind = classifyAtPosition(doc, pos); @@ -443,8 +435,9 @@ export const activate = (context: vscode.ExtensionContext) => { // CAPTURE: update activity + possible // switch\_pane/doc\_session. const doc = event.document; - // const kind = classifyDocument(doc); - const pos = event.selection?.active ?? new vscode.Position(0, 0); + const pos = + event.selection?.active ?? + new vscode.Position(0, 0); const kind = classifyAtPosition(doc, pos); const filePath = doc.fileName; @@ -455,27 +448,30 @@ export const activate = (context: vscode.ExtensionContext) => { ); context.subscriptions.push( - vscode.window.onDidChangeTextEditorSelection((event) => { - if (ignore_selection_change) { - ignore_selection_change = false; - return; - } - - console_log( - "CodeChat Editor extension: sending updated cursor/scroll position.", - ); + vscode.window.onDidChangeTextEditorSelection( + (event) => { + if (ignore_selection_change) { + ignore_selection_change = false; + return; + } - // CAPTURE: treat a selection change as "activity" - // in this document. - const doc = event.textEditor.document; - // const kind = classifyDocument(doc); - const pos = event.selections?.[0]?.active ?? event.textEditor.selection.active; - const kind = classifyAtPosition(doc, pos); - const filePath = doc.fileName; - noteActivity(kind, filePath); + console_log( + "CodeChat Editor extension: sending updated cursor/scroll position.", + ); - send_update(false); - }), + // CAPTURE: treat a selection change as "activity" + // in this document. + const doc = event.textEditor.document; + const pos = + event.selections?.[0]?.active ?? + event.textEditor.selection.active; + const kind = classifyAtPosition(doc, pos); + const filePath = doc.fileName; + noteActivity(kind, filePath); + + send_update(false); + }, + ), ); // CAPTURE: end of a debug/run session. @@ -483,14 +479,10 @@ export const activate = (context: vscode.ExtensionContext) => { vscode.debug.onDidTerminateDebugSession((session) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; - void sendCaptureEvent( - "run_end", - filePath, - { - sessionName: session.name, - sessionType: session.type, - }, - ); + void sendCaptureEvent("run_end", filePath, { + sessionName: session.name, + sessionType: session.type, + }); }), ); @@ -500,30 +492,22 @@ export const activate = (context: vscode.ExtensionContext) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; const task = e.execution.task; - void sendCaptureEvent( - "compile_end", - filePath, - { - taskName: task.name, - taskSource: task.source, - exitCode: e.exitCode, - }, - ); + void sendCaptureEvent("compile_end", filePath, { + taskName: task.name, + taskSource: task.source, + exitCode: e.exitCode, + }); }), ); // CAPTURE: listen for file saves. context.subscriptions.push( vscode.workspace.onDidSaveTextDocument((doc) => { - void sendCaptureEvent( - "save", - doc.fileName, - { - reason: "manual_save", - languageId: doc.languageId, - lineCount: doc.lineCount, - }, - ); + void sendCaptureEvent("save", doc.fileName, { + reason: "manual_save", + languageId: doc.languageId, + lineCount: doc.lineCount, + }); }), ); @@ -532,14 +516,10 @@ export const activate = (context: vscode.ExtensionContext) => { vscode.debug.onDidStartDebugSession((session) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; - void sendCaptureEvent( - "run", - filePath, - { - sessionName: session.name, - sessionType: session.type, - }, - ); + void sendCaptureEvent("run", filePath, { + sessionName: session.name, + sessionType: session.type, + }); }), ); @@ -549,16 +529,12 @@ export const activate = (context: vscode.ExtensionContext) => { const active = vscode.window.activeTextEditor; const filePath = active?.document.fileName; const task = e.execution.task; - void sendCaptureEvent( - "compile", - filePath, - { - taskName: task.name, - taskSource: task.source, - definition: task.definition, - processId: e.processId, - }, - ); + void sendCaptureEvent("compile", filePath, { + taskName: task.name, + taskSource: task.source, + definition: task.definition, + processId: e.processId, + }); }), ); } @@ -571,11 +547,13 @@ export const activate = (context: vscode.ExtensionContext) => { assert(typeof codechat_client_location_str === "string"); switch (codechat_client_location_str) { case "html": - codechat_client_location = CodeChatEditorClientLocation.html; + codechat_client_location = + CodeChatEditorClientLocation.html; break; case "browser": - codechat_client_location = CodeChatEditorClientLocation.browser; + codechat_client_location = + CodeChatEditorClientLocation.browser; break; default: @@ -584,7 +562,10 @@ export const activate = (context: vscode.ExtensionContext) => { // Create or reveal the webview panel; if this is an external // browser, we'll open it after the client is created. - if (codechat_client_location === CodeChatEditorClientLocation.html) { + if ( + codechat_client_location === + CodeChatEditorClientLocation.html + ) { if (webview_panel !== undefined) { webview_panel.reveal(undefined, true); } else { @@ -601,7 +582,9 @@ export const activate = (context: vscode.ExtensionContext) => { }, ); webview_panel.onDidDispose(async () => { - console_log("CodeChat Editor extension: shut down webview."); + console_log( + "CodeChat Editor extension: shut down webview.", + ); quiet_next_error = true; webview_panel = undefined; await stop_client(); @@ -612,7 +595,8 @@ export const activate = (context: vscode.ExtensionContext) => { // Provide a simple status display while the server is starting // up. if (webview_panel !== undefined) { - webview_panel.webview.html = "

CodeChat Editor

Loading...

"; + webview_panel.webview.html = + "

CodeChat Editor

Loading...

"; } else { vscode.window.showInformationMessage( "The CodeChat Editor is loading in an external browser...", @@ -627,16 +611,22 @@ export const activate = (context: vscode.ExtensionContext) => { extensionCaptureSessionStarted = false; const hosted_in_ide = - codechat_client_location === CodeChatEditorClientLocation.html; + codechat_client_location === + CodeChatEditorClientLocation.html; console_log( `CodeChat Editor extension: sending message Opened(${hosted_in_ide}).`, ); await codeChatEditorServer.sendMessageOpened(hosted_in_ide); - if (codechat_client_location === CodeChatEditorClientLocation.browser) { + if ( + codechat_client_location === + CodeChatEditorClientLocation.browser + ) { captureTransportReady = true; const active = vscode.window.activeTextEditor; - void startExtensionCaptureSession(active?.document.fileName); + void startExtensionCaptureSession( + active?.document.fileName, + ); send_update(false); } @@ -647,7 +637,9 @@ export const activate = (context: vscode.ExtensionContext) => { break; } - const { id, message } = JSON.parse(message_raw) as EditorMessage; + const { id, message } = JSON.parse( + message_raw, + ) as EditorMessage; console_log( `CodeChat Editor extension: Received data id = ${id}, message = ${format_struct( message, @@ -666,7 +658,8 @@ export const activate = (context: vscode.ExtensionContext) => { switch (key) { case "Update": { - const current_update = value as UpdateMessageContents; + const current_update = + value as UpdateMessageContents; const doc = get_document(current_update.file_path); if (doc === undefined) { await sendResult(id, { @@ -686,7 +679,12 @@ export const activate = (context: vscode.ExtensionContext) => { wse.replace( doc.uri, doc.validateRange( - new vscode.Range(0, 0, doc.lineCount, 0), + new vscode.Range( + 0, + 0, + doc.lineCount, + 0, + ), ), source.Plain.doc, ); @@ -712,10 +710,18 @@ export const activate = (context: vscode.ExtensionContext) => { for (const diff of diffs) { const from = doc.positionAt(diff.from); if (diff.to === undefined) { - wse.insert(doc.uri, from, diff.insert); + wse.insert( + doc.uri, + from, + diff.insert, + ); } else { const to = doc.positionAt(diff.to); - wse.replace(doc.uri, new Range(from, to), diff.insert); + wse.replace( + doc.uri, + new Range(from, to), + diff.insert, + ); } } } @@ -740,7 +746,10 @@ export const activate = (context: vscode.ExtensionContext) => { 0, ); editor.revealRange( - new vscode.Range(scroll_position, scroll_position), + new vscode.Range( + scroll_position, + scroll_position, + ), TextEditorRevealType.AtTop, ); } @@ -748,9 +757,15 @@ export const activate = (context: vscode.ExtensionContext) => { const cursor_line = current_update.cursor_position; if (cursor_line !== undefined && editor) { ignore_selection_change = true; - const cursor_position = new vscode.Position(cursor_line - 1, 0); + const cursor_position = new vscode.Position( + cursor_line - 1, + 0, + ); editor.selections = [ - new vscode.Selection(cursor_position, cursor_position), + new vscode.Selection( + cursor_position, + cursor_position, + ), ]; // I'd prefer to set `ignore_selection_change = // false` here, but even doing so after a @@ -769,18 +784,25 @@ export const activate = (context: vscode.ExtensionContext) => { if (is_text) { let document; try { - document = await vscode.workspace.openTextDocument(current_file); + document = + await vscode.workspace.openTextDocument( + current_file, + ); } catch (e) { await sendResult(id, { - OpenFileFailed: [current_file, (e as Error).toString()], + OpenFileFailed: [ + current_file, + (e as Error).toString(), + ], }); continue; } ignore_active_editor_change = true; - current_editor = await vscode.window.showTextDocument( - document, - current_editor?.viewColumn, - ); + current_editor = + await vscode.window.showTextDocument( + document, + current_editor?.viewColumn, + ); ignore_active_editor_change = false; await sendResult(id); } else { @@ -857,7 +879,10 @@ export const activate = (context: vscode.ExtensionContext) => { console_log( `CodeChat Editor extension: Result(LoadFile(id = ${id}, ${format_struct(load_file_result)}))`, ); - await codeChatEditorServer.sendResultLoadfile(id, load_file_result); + await codeChatEditorServer.sendResultLoadfile( + id, + load_file_result, + ); break; } @@ -868,7 +893,9 @@ export const activate = (context: vscode.ExtensionContext) => { await sendResult(id); captureTransportReady = true; const active = vscode.window.activeTextEditor; - void startExtensionCaptureSession(active?.document.fileName); + void startExtensionCaptureSession( + active?.document.fileName, + ); send_update(false); break; } @@ -971,7 +998,9 @@ const send_update = (this_is_dirty: boolean) => { `CodeChat Editor extension: sending CurrentFile(${current_file}}).`, ); try { - await codeChatEditorServer!.sendMessageCurrentFile(current_file); + await codeChatEditorServer!.sendMessageCurrentFile( + current_file, + ); } catch (e) { show_error(`Error sending CurrentFile message: ${e}.`); } @@ -979,7 +1008,8 @@ const send_update = (this_is_dirty: boolean) => { return; } - const cursor_position = current_editor!.selection.active.line + 1; + const cursor_position = + current_editor!.selection.active.line + 1; const scroll_position = current_editor!.visibleRanges[0].start.line + 1; const file_path = current_editor!.document.fileName; @@ -1032,7 +1062,9 @@ const show_error = (message: string) => { } console.error(`CodeChat Editor extension: ${message}`); if (webview_panel !== undefined) { - if (!webview_panel.webview.html.startsWith("

CodeChat Editor

")) { + if ( + !webview_panel.webview.html.startsWith("

CodeChat Editor

") + ) { webview_panel.webview.html = "

CodeChat Editor

"; } webview_panel.webview.html += `

${escape( @@ -1058,10 +1090,11 @@ const can_render = () => { }; const get_document = (file_path: string) => { - for ( const doc of vscode.workspace.textDocuments) { + for (const doc of vscode.workspace.textDocuments) { if ( (!is_windows && doc.fileName === file_path) || - (is_windows && doc.fileName.toUpperCase() === file_path.toUpperCase()) + (is_windows && + doc.fileName.toUpperCase() === file_path.toUpperCase()) ) { return doc; } @@ -1081,16 +1114,3 @@ const console_log = (...args: any) => { console.log(...args); } }; - -function getCurrentUsername(): string { - try { - // Most reliable on Windows/macOS/Linux - const u = os.userInfo().username; - if (u && u.trim().length > 0) return u.trim(); - } catch (_) {} - - // Fallbacks - const envUser = process.env["USERNAME"] || process.env["USER"]; - return (envUser && envUser.trim().length > 0) ? envUser.trim() : "unknown-user"; -} - From a2c5e88ab0d7faf3133a5cb16332ae9e5ffe0830 Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Sun, 3 May 2026 09:07:05 -0600 Subject: [PATCH 12/19] Ongoing development --- .gitignore | 8 +- capture_config.example.json | 9 + capture_config.json | 9 - extensions/VSCode/package.json | 109 +++++- extensions/VSCode/src/extension.ts | 354 ++++++++++++++++- extensions/VSCode/src/lib.rs | 6 + server/scripts/export_capture_metrics.py | 469 +++++++++++++++++++++++ server/src/capture.rs | 280 +++++++++++++- server/src/ide.rs | 6 + server/src/main.rs | 11 +- server/src/translation.rs | 31 ++ server/src/webserver.rs | 166 ++++++-- 12 files changed, 1378 insertions(+), 80 deletions(-) create mode 100644 capture_config.example.json delete mode 100644 capture_config.json create mode 100644 server/scripts/export_capture_metrics.py diff --git a/.gitignore b/.gitignore index 2b4f8c30..867dfce6 100644 --- a/.gitignore +++ b/.gitignore @@ -21,8 +21,14 @@ # # dist build output target/ +/server/bindings/ -# The runtime capture config is resolved from the repository root. +# Runtime capture configuration and local fallback capture logs. +/capture_config.json +/capture-events-fallback.jsonl +/capture-metrics-*.csv +/server/scripts/output +/server/scripts/capture-metrics-*.csv server/capture_config.json # CodeChat Editor lexer: python. See TODO. diff --git a/capture_config.example.json b/capture_config.example.json new file mode 100644 index 00000000..22980ee2 --- /dev/null +++ b/capture_config.example.json @@ -0,0 +1,9 @@ +{ + "host": "your-aws-rds-endpoint.amazonaws.com", + "port": 5432, + "user": "your-db-user", + "password": "your-db-password", + "dbname": "your-db-name", + "app_id": "dissertation", + "fallback_path": "capture-events-fallback.jsonl" +} diff --git a/capture_config.json b/capture_config.json deleted file mode 100644 index 574f1477..00000000 --- a/capture_config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "host": "3.146.138.182", - "port": 5432, - "dbname": "CodeChatCaptureDB", - "user": "CodeChatCaptureUser", - "password": "OB3yc8Hk9SuVjzXMdUDr0C7w4PqLQisn", - "max_connections": 5, - "timeout_seconds": 30 -} diff --git a/extensions/VSCode/package.json b/extensions/VSCode/package.json index 4d1805d9..9ceabfd4 100644 --- a/extensions/VSCode/package.json +++ b/extensions/VSCode/package.json @@ -44,7 +44,15 @@ "version": "0.1.54", "activationEvents": [ "onCommand:extension.codeChatEditorActivate", - "onCommand:extension.codeChatEditorDeactivate" + "onCommand:extension.codeChatEditorDeactivate", + "onCommand:extension.codeChatCaptureStatus", + "onCommand:extension.codeChatInsertReflectionPrompt", + "onCommand:extension.codeChatCaptureTaskStart", + "onCommand:extension.codeChatCaptureTaskSubmit", + "onCommand:extension.codeChatCaptureDebugTaskStart", + "onCommand:extension.codeChatCaptureDebugTaskSubmit", + "onCommand:extension.codeChatCaptureHandoffStart", + "onCommand:extension.codeChatCaptureHandoffEnd" ], "contributes": { "configuration": { @@ -62,6 +70,73 @@ "In the default external web browser" ], "markdownDescription": "Select the location of the CodeChat Editor Client. After changing this value, you **must** close then restart the CodeChat Editor extension." + }, + "CodeChatEditor.Capture.Enabled": { + "type": "boolean", + "default": false, + "markdownDescription": "Enable dissertation instrumentation capture." + }, + "CodeChatEditor.Capture.ConsentEnabled": { + "type": "boolean", + "default": false, + "markdownDescription": "Allow capture after participant consent is recorded for the current study session." + }, + "CodeChatEditor.Capture.ParticipantId": { + "type": "string", + "default": "", + "markdownDescription": "Pseudonymous participant identifier used as the capture user_id." + }, + "CodeChatEditor.Capture.AssignmentId": { + "type": "string", + "default": "", + "markdownDescription": "Assignment, lab, or task identifier attached to capture events." + }, + "CodeChatEditor.Capture.GroupId": { + "type": "string", + "default": "", + "markdownDescription": "Study group, section, or team identifier attached to capture events." + }, + "CodeChatEditor.Capture.CourseId": { + "type": "string", + "default": "", + "markdownDescription": "Course or deployment identifier attached to capture events." + }, + "CodeChatEditor.Capture.TaskId": { + "type": "string", + "default": "", + "markdownDescription": "Current study task identifier attached to capture events." + }, + "CodeChatEditor.Capture.Mode": { + "type": "string", + "default": "treatment", + "enum": [ + "treatment", + "comparison", + "capture-only" + ], + "enumDescriptions": [ + "Capture events and enable reflective prompt commands.", + "Capture events without reflective prompt scaffolding.", + "Capture events only for baseline or pilot instrumentation." + ], + "markdownDescription": "Study condition/mode attached to capture events." + }, + "CodeChatEditor.Capture.HashFilePaths": { + "type": "boolean", + "default": true, + "markdownDescription": "Hash local file paths before they are sent to capture storage." + }, + "CodeChatEditor.Capture.PromptTemplates": { + "type": "array", + "default": [ + "What changed in your understanding of this code?", + "What assumption are you making, and how could you test it?", + "What would another developer need to know before maintaining this?" + ], + "items": { + "type": "string" + }, + "markdownDescription": "Reflective writing prompts available in treatment mode." } } }, @@ -73,6 +148,38 @@ { "command": "extension.codeChatEditorDeactivate", "title": "Disable the CodeChat Editor" + }, + { + "command": "extension.codeChatCaptureStatus", + "title": "Show CodeChat Capture Status" + }, + { + "command": "extension.codeChatInsertReflectionPrompt", + "title": "CodeChat: Insert Reflection Prompt" + }, + { + "command": "extension.codeChatCaptureTaskStart", + "title": "CodeChat Capture: Task Start" + }, + { + "command": "extension.codeChatCaptureTaskSubmit", + "title": "CodeChat Capture: Task Submit" + }, + { + "command": "extension.codeChatCaptureDebugTaskStart", + "title": "CodeChat Capture: Debug Task Start" + }, + { + "command": "extension.codeChatCaptureDebugTaskSubmit", + "title": "CodeChat Capture: Debug Task Submit" + }, + { + "command": "extension.codeChatCaptureHandoffStart", + "title": "CodeChat Capture: Handoff Start" + }, + { + "command": "extension.codeChatCaptureHandoffEnd", + "title": "CodeChat Capture: Handoff End" } ] }, diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index 03c053bf..64268504 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -53,7 +53,6 @@ import { MAX_MESSAGE_LENGTH, } from "../../../client/src/debug_enabled.mjs"; import { ResultErrTypes } from "../../../client/src/rust-types/ResultErrTypes.js"; -import * as os from "os"; import * as crypto from "crypto"; @@ -189,9 +188,18 @@ function classifyAtPosition( type CaptureEventData = Record; interface CaptureEventPayload { + event_id?: string; + sequence_number?: number; + schema_version?: number; user_id: string; assignment_id?: string; group_id?: string; + condition?: string; + course_id?: string; + task_id?: string; + event_source?: string; + language_id?: string; + file_hash?: string; file_path?: string; event_type: string; client_timestamp_ms?: number; @@ -199,29 +207,47 @@ interface CaptureEventPayload { data: CaptureEventData; } -// TODO: replace these with something real (e.g., VS Code settings) For now, we -// hard-code to prove that the pipeline works end-to-end. -const CAPTURE_USER_ID: string = (() => { - try { - const u = os.userInfo().username; - if (u && u.trim().length > 0) { - return u.trim(); - } - } catch (_) { - // fall through - } +type CaptureMode = "treatment" | "comparison" | "capture-only"; + +interface StudySettings { + enabled: boolean; + consentEnabled: boolean; + participantId: string; + assignmentId?: string; + groupId?: string; + condition: CaptureMode; + courseId?: string; + taskId?: string; + hashFilePaths: boolean; + promptTemplates: string[]; +} - // Fallbacks (should rarely be needed) - return process.env["USERNAME"] || process.env["USER"] || "unknown-user"; -})(); +interface CaptureStatus { + enabled: boolean; + state: string; + queued_events: number; + persisted_events: number; + fallback_events: number; + failed_events: number; + last_error?: string | null; + fallback_path?: string | null; +} -const CAPTURE_ASSIGNMENT_ID = "demo-assignment"; -const CAPTURE_GROUP_ID = "demo-group"; +const CAPTURE_SCHEMA_VERSION = 2; +const CAPTURE_EVENT_SOURCE = "vscode_extension"; +const DEFAULT_REFLECTION_PROMPTS = [ + "What changed in your understanding of this code?", + "What assumption are you making, and how could you test it?", + "What would another developer need to know before maintaining this?", +]; let capture_output_channel: vscode.OutputChannel | undefined; let captureFailureLogged = false; let captureTransportReady = false; let extensionCaptureSessionStarted = false; +let captureSequenceNumber = 0; +let capture_status_bar_item: vscode.StatusBarItem | undefined; +let capture_status_timer: NodeJS.Timeout | undefined; // Simple classification of what the user is currently doing. type ActivityKind = "doc" | "code" | "other"; @@ -240,23 +266,114 @@ const DOC_LANG_IDS = new Set([ let lastActivityKind: ActivityKind = "other"; let docSessionStart: number | null = null; +function optionalString(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 + ? value.trim() + : undefined; +} + +function loadStudySettings(): StudySettings { + const config = vscode.workspace.getConfiguration("CodeChatEditor.Capture"); + const modeValue = optionalString(config.get("Mode")); + const condition: CaptureMode = + modeValue === "comparison" || modeValue === "capture-only" + ? modeValue + : "treatment"; + const promptTemplates = config.get("PromptTemplates"); + + return { + enabled: config.get("Enabled", false), + consentEnabled: config.get("ConsentEnabled", false), + participantId: optionalString(config.get("ParticipantId")) ?? "", + assignmentId: optionalString(config.get("AssignmentId")), + groupId: optionalString(config.get("GroupId")), + condition, + courseId: optionalString(config.get("CourseId")), + taskId: optionalString(config.get("TaskId")), + hashFilePaths: config.get("HashFilePaths", true), + promptTemplates: + Array.isArray(promptTemplates) && promptTemplates.length > 0 + ? promptTemplates + .filter( + (prompt): prompt is string => + typeof prompt === "string", + ) + .map((prompt) => prompt.trim()) + .filter((prompt) => prompt.length > 0) + : DEFAULT_REFLECTION_PROMPTS, + }; +} + +function captureDisabledReason(settings: StudySettings): string | undefined { + if (!settings.enabled) { + return "disabled in settings"; + } + if (!settings.consentEnabled) { + return "waiting for consent"; + } + if (settings.participantId.length === 0) { + return "participant id is not configured"; + } + return undefined; +} + +function hashText(value: string): string { + return crypto.createHash("sha256").update(value).digest("hex"); +} + +function buildFileFields( + filePath: string | undefined, + settings: StudySettings, +): Pick { + const document = + filePath === undefined + ? vscode.window.activeTextEditor?.document + : get_document(filePath); + if (filePath === undefined) { + return { + language_id: document?.languageId, + }; + } + return { + file_path: settings.hashFilePaths ? undefined : filePath, + file_hash: settings.hashFilePaths ? hashText(filePath) : undefined, + language_id: document?.languageId, + }; +} + // Helper to send a capture event to the Rust server. async function sendCaptureEvent( eventType: string, filePath?: string, data: CaptureEventData = {}, ): Promise { + const settings = loadStudySettings(); + const disabledReason = captureDisabledReason(settings); + if (disabledReason !== undefined) { + updateCaptureStatusBar(`Capture: ${disabledReason}`, disabledReason); + return; + } + const fileFields = buildFileFields(filePath, settings); const payload: CaptureEventPayload = { - user_id: CAPTURE_USER_ID, - assignment_id: CAPTURE_ASSIGNMENT_ID, - group_id: CAPTURE_GROUP_ID, - file_path: filePath, + event_id: crypto.randomUUID(), + sequence_number: ++captureSequenceNumber, + schema_version: CAPTURE_SCHEMA_VERSION, + user_id: settings.participantId, + assignment_id: settings.assignmentId, + group_id: settings.groupId, + condition: settings.condition, + course_id: settings.courseId, + task_id: settings.taskId, + event_source: CAPTURE_EVENT_SOURCE, + ...fileFields, event_type: eventType, client_timestamp_ms: Date.now(), client_tz_offset_min: new Date().getTimezoneOffset(), data: { ...data, session_id: CAPTURE_SESSION_ID, + capture_mode: settings.condition, + path_privacy: settings.hashFilePaths ? "sha256" : "plain", }, }; @@ -276,6 +393,7 @@ async function sendCaptureEvent( try { await codeChatEditorServer.sendCaptureEvent(JSON.stringify(payload)); captureFailureLogged = false; + void refreshCaptureStatus(); } catch (err) { reportCaptureFailure(err instanceof Error ? err.message : String(err)); } @@ -291,6 +409,7 @@ function reportCaptureFailure(message: string) { capture_output_channel?.appendLine( `${new Date().toISOString()} capture send failed: ${message}`, ); + updateCaptureStatusBar("Capture: Error", message); if (captureFailureLogged) { return; } @@ -298,6 +417,140 @@ function reportCaptureFailure(message: string) { console.warn(`CodeChat capture event was not queued: ${message}`); } +function updateCaptureStatusBar(text: string, tooltip?: string) { + if (capture_status_bar_item === undefined) { + return; + } + capture_status_bar_item.text = text; + capture_status_bar_item.tooltip = tooltip; + capture_status_bar_item.show(); +} + +async function refreshCaptureStatus(): Promise { + const settings = loadStudySettings(); + const disabledReason = captureDisabledReason(settings); + if (disabledReason !== undefined) { + updateCaptureStatusBar(`Capture: ${disabledReason}`, disabledReason); + return; + } + if (codeChatEditorServer === undefined) { + updateCaptureStatusBar( + "Capture: Waiting", + "CodeChat server is not running", + ); + return; + } + + try { + const status = JSON.parse( + codeChatEditorServer.getCaptureStatus(), + ) as CaptureStatus; + const label = + status.state === "database" + ? "Capture: DB" + : status.state === "fallback" + ? "Capture: Fallback" + : status.state === "starting" + ? "Capture: Starting" + : "Capture: Off"; + updateCaptureStatusBar( + label, + [ + `state=${status.state}`, + `queued=${status.queued_events}`, + `db=${status.persisted_events}`, + `fallback=${status.fallback_events}`, + `failed=${status.failed_events}`, + status.last_error ? `last_error=${status.last_error}` : "", + status.fallback_path + ? `fallback_path=${status.fallback_path}` + : "", + ] + .filter((line) => line.length > 0) + .join("\n"), + ); + } catch (err) { + updateCaptureStatusBar( + "Capture: Error", + err instanceof Error ? err.message : String(err), + ); + } +} + +async function showCaptureStatus(): Promise { + await refreshCaptureStatus(); + const tooltip = capture_status_bar_item?.tooltip; + vscode.window.showInformationMessage( + typeof tooltip === "string" + ? tooltip + : (tooltip?.value ?? "Capture status unavailable"), + ); +} + +async function recordStudyLifecycleEvent(eventType: string): Promise { + const active = vscode.window.activeTextEditor; + await sendCaptureEvent(eventType, active?.document.fileName, { + command: eventType, + languageId: active?.document.languageId, + }); +} + +function reflectionPromptText(languageId: string, prompt: string): string { + if (languageId === "markdown") { + return `\n\n### Reflection\n\n${prompt}\n\n`; + } + if (languageId === "restructuredtext") { + return `\n.. ${prompt}\n`; + } + if (languageId === "plaintext" || languageId === "latex") { + return `\n${prompt}\n`; + } + const commentPrefix = + languageId === "python" || + languageId === "shellscript" || + languageId === "powershell" || + languageId === "ruby" + ? "#" + : "//"; + return `\n${commentPrefix} Reflection: ${prompt}\n`; +} + +async function insertReflectionPrompt(): Promise { + const settings = loadStudySettings(); + if (settings.condition !== "treatment") { + vscode.window.showInformationMessage( + "Reflection prompts are disabled for this capture mode.", + ); + return; + } + const editor = vscode.window.activeTextEditor; + if (editor === undefined) { + vscode.window.showInformationMessage("Open a text editor first."); + return; + } + const prompt = await vscode.window.showQuickPick(settings.promptTemplates, { + placeHolder: "Select a reflection prompt", + }); + if (prompt === undefined) { + return; + } + + await editor.insertSnippet( + new vscode.SnippetString( + reflectionPromptText(editor.document.languageId, prompt), + ), + ); + await sendCaptureEvent( + "reflection_prompt_inserted", + editor.document.fileName, + { + prompt_hash: hashText(prompt), + prompt_length: prompt.length, + languageId: editor.document.languageId, + }, + ); +} + async function startExtensionCaptureSession(filePath?: string) { if (extensionCaptureSessionStarted) { return; @@ -361,8 +614,65 @@ export const activate = (context: vscode.ExtensionContext) => { capture_output_channel = vscode.window.createOutputChannel("CodeChat Capture"); context.subscriptions.push(capture_output_channel); + capture_status_bar_item = vscode.window.createStatusBarItem( + vscode.StatusBarAlignment.Left, + 100, + ); + capture_status_bar_item.command = "extension.codeChatCaptureStatus"; + context.subscriptions.push(capture_status_bar_item); + capture_status_timer = setInterval(() => { + void refreshCaptureStatus(); + }, 5000); + context.subscriptions.push({ + dispose: () => { + if (capture_status_timer !== undefined) { + clearInterval(capture_status_timer); + capture_status_timer = undefined; + } + }, + }); + context.subscriptions.push( + vscode.workspace.onDidChangeConfiguration((event) => { + if (event.affectsConfiguration("CodeChatEditor.Capture")) { + void refreshCaptureStatus(); + } + }), + ); + void refreshCaptureStatus(); context.subscriptions.push( + vscode.commands.registerCommand( + "extension.codeChatCaptureStatus", + showCaptureStatus, + ), + vscode.commands.registerCommand( + "extension.codeChatInsertReflectionPrompt", + insertReflectionPrompt, + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureTaskStart", + () => recordStudyLifecycleEvent("task_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureTaskSubmit", + () => recordStudyLifecycleEvent("task_submit"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureDebugTaskStart", + () => recordStudyLifecycleEvent("debug_task_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureDebugTaskSubmit", + () => recordStudyLifecycleEvent("debug_task_submit"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureHandoffStart", + () => recordStudyLifecycleEvent("handoff_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureHandoffEnd", + () => recordStudyLifecycleEvent("handoff_end"), + ), vscode.commands.registerCommand( "extension.codeChatEditorDeactivate", deactivate, @@ -609,6 +919,7 @@ export const activate = (context: vscode.ExtensionContext) => { captureFailureLogged = false; captureTransportReady = false; extensionCaptureSessionStarted = false; + void refreshCaptureStatus(); const hosted_in_ide = codechat_client_location === @@ -1045,6 +1356,7 @@ const stop_client = async () => { codeChatEditorServer = undefined; } captureTransportReady = false; + void refreshCaptureStatus(); if (idle_timer !== undefined) { clearTimeout(idle_timer); diff --git a/extensions/VSCode/src/lib.rs b/extensions/VSCode/src/lib.rs index e94472c3..aa8169fa 100644 --- a/extensions/VSCode/src/lib.rs +++ b/extensions/VSCode/src/lib.rs @@ -87,6 +87,12 @@ impl CodeChatEditorServer { self.0.send_capture_event(capture_event).await } + #[napi] + pub fn get_capture_status(&self) -> Result { + serde_json::to_string(&self.0.capture_status()) + .map_err(|err| Error::new(Status::GenericFailure, err.to_string())) + } + #[napi] pub async fn send_message_current_file(&self, url: String) -> std::io::Result { self.0.send_message_current_file(url).await diff --git a/server/scripts/export_capture_metrics.py b/server/scripts/export_capture_metrics.py new file mode 100644 index 00000000..d95a7c66 --- /dev/null +++ b/server/scripts/export_capture_metrics.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python3 +"""Export dissertation-oriented metrics from CodeChat capture events. + +Default use pulls events directly from PostgreSQL using `capture_config.json` +or the `CODECHAT_CAPTURE_*` environment variables: + + python server/scripts/export_capture_metrics.py --out capture-metrics.csv + +The optional positional `input` is only for fallback JSONL logs: + + python server/scripts/export_capture_metrics.py capture-events-fallback.jsonl --out capture-metrics.csv +""" + +from __future__ import annotations + +import argparse +import csv +import json +import os +import re +import shutil +import subprocess +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable, Iterator + + +EVENT_FIELDS = [ + "write_doc", + "write_code", + "doc_session", + "switch_pane", + "save", + "compile", + "compile_end", + "run", + "run_end", + "task_start", + "task_submit", + "debug_task_start", + "debug_task_submit", + "handoff_start", + "handoff_end", + "reflection_prompt_inserted", +] + + +@dataclass(frozen=True) +class DbConfig: + host: str + user: str + password: str + dbname: str + port: int | None = None + + +@dataclass +class MetricRow: + user_id: str + assignment_id: str + group_id: str + session_id: str + condition: str + course_id: str + task_id: str + event_count: int = 0 + first_event_at: str = "" + last_event_at: str = "" + doc_session_seconds: float = 0.0 + counts: dict[str, int] = field(default_factory=lambda: defaultdict(int)) + + +def parse_timestamp(value: Any) -> datetime | None: + if isinstance(value, datetime): + return value + if not isinstance(value, str) or not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + +def as_data(value: Any) -> dict[str, Any]: + if isinstance(value, dict): + return value + if isinstance(value, str): + try: + parsed = json.loads(value) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + return {} + + +def iter_jsonl_events(path: Path) -> Iterator[dict[str, Any]]: + with path.open("r", encoding="utf-8") as input_file: + for line_number, line in enumerate(input_file, start=1): + line = line.strip() + if not line: + continue + try: + record = json.loads(line) + except json.JSONDecodeError as err: + raise SystemExit(f"{path}:{line_number}: invalid JSON: {err}") from err + event = record.get("event", record) + if not isinstance(event, dict): + continue + event["data"] = as_data(event.get("data")) + yield event + + +def load_db_config(config_path: Path) -> DbConfig: + env_config = db_config_from_env() + if env_config is not None: + return env_config + + config_path = resolve_config_path(config_path) + try: + config = json.loads(config_path.read_text(encoding="utf-8")) + except FileNotFoundError as err: + searched = "\n ".join(str(path) for path in config_search_paths(config_path)) + raise SystemExit( + "No DB config found. Create a local capture_config.json, set " + "CODECHAT_CAPTURE_* env vars, or pass a fallback JSONL input file.\n" + f"Searched:\n {searched}" + ) from err + except json.JSONDecodeError as err: + raise SystemExit(f"{config_path}: invalid JSON: {err}") from err + + missing = [name for name in ["host", "user", "password", "dbname"] if not config.get(name)] + if missing: + raise SystemExit(f"{config_path}: missing required DB field(s): {', '.join(missing)}") + + return DbConfig( + host=str(config["host"]), + user=str(config["user"]), + password=str(config["password"]), + dbname=str(config["dbname"]), + port=int(config["port"]) if config.get("port") is not None else None, + ) + + +def resolve_config_path(config_path: Path) -> Path: + for candidate in config_search_paths(config_path): + if candidate.exists(): + return candidate + return config_path + + +def config_search_paths(config_path: Path) -> list[Path]: + if config_path.is_absolute(): + return [config_path] + + script_repo_root = Path(__file__).resolve().parents[2] + paths = [Path.cwd() / config_path, script_repo_root / config_path] + + unique_paths: list[Path] = [] + for path in paths: + if path not in unique_paths: + unique_paths.append(path) + return unique_paths + + +def db_config_from_env() -> DbConfig | None: + host = env_value("CODECHAT_CAPTURE_HOST") + if host is None: + return None + missing = [ + name + for name in [ + "CODECHAT_CAPTURE_USER", + "CODECHAT_CAPTURE_PASSWORD", + "CODECHAT_CAPTURE_DBNAME", + ] + if env_value(name) is None + ] + if missing: + raise SystemExit( + "Missing required capture DB environment variable(s): " + ", ".join(missing) + ) + + port_text = env_value("CODECHAT_CAPTURE_PORT") + return DbConfig( + host=host, + user=env_value("CODECHAT_CAPTURE_USER") or "", + password=env_value("CODECHAT_CAPTURE_PASSWORD") or "", + dbname=env_value("CODECHAT_CAPTURE_DBNAME") or "", + port=int(port_text) if port_text is not None else None, + ) + + +def env_value(name: str) -> str | None: + value = os.environ.get(name) + if value is None: + return None + value = value.strip() + return value or None + + +def sql_identifier(identifier: str) -> str: + parts = identifier.split(".") + for part in parts: + if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", part): + raise SystemExit(f"Invalid SQL identifier: {identifier!r}") + return ".".join(f'"{part}"' for part in parts) + + +def iter_db_events(config: DbConfig, table: str) -> Iterator[dict[str, Any]]: + try: + import psycopg + except ImportError: + yield from iter_db_events_with_psql(config, table) + return + + connect_kwargs = { + "host": config.host, + "user": config.user, + "password": config.password, + "dbname": config.dbname, + } + if config.port is not None: + connect_kwargs["port"] = config.port + + query = ( + "SELECT user_id, assignment_id, group_id, file_path, event_type, timestamp, data " + f"FROM {sql_identifier(table)} " + "ORDER BY timestamp" + ) + with psycopg.connect(**connect_kwargs) as conn: + with conn.cursor() as cursor: + cursor.execute(query) + for ( + user_id, + assignment_id, + group_id, + file_path, + event_type, + timestamp, + data, + ) in cursor: + yield { + "user_id": user_id, + "assignment_id": assignment_id, + "group_id": group_id, + "file_path": file_path, + "event_type": event_type, + "timestamp": timestamp, + "data": as_data(data), + } + + +def iter_db_events_with_psql(config: DbConfig, table: str) -> Iterator[dict[str, Any]]: + psql_path = find_psql() + if psql_path is None: + raise SystemExit( + "PostgreSQL export needs a local PostgreSQL client to connect to the AWS DB.\n" + "The AWS PostgreSQL server is remote; it cannot provide Python's local DB driver.\n" + "Install one of these on this Windows machine:\n" + " python -m pip install \"psycopg[binary]\"\n" + "or install PostgreSQL command-line tools so psql.exe is available on PATH." + ) + + env = os.environ.copy() + env["PGPASSWORD"] = config.password + command = [ + psql_path, + "--no-password", + "--no-align", + "--tuples-only", + "--quiet", + "--set", + "ON_ERROR_STOP=1", + "--host", + config.host, + "--username", + config.user, + "--dbname", + config.dbname, + "--command", + psql_json_query(table), + ] + if config.port is not None: + command.extend(["--port", str(config.port)]) + + result = subprocess.run( + command, + env=env, + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise SystemExit( + "psql failed while querying the AWS PostgreSQL DB:\n" + f"{result.stderr.strip() or result.stdout.strip()}" + ) + + for line_number, line in enumerate(result.stdout.splitlines(), start=1): + line = line.strip() + if not line: + continue + try: + event = json.loads(line) + except json.JSONDecodeError as err: + raise SystemExit(f"psql output line {line_number}: invalid JSON: {err}") from err + event["data"] = as_data(event.get("data")) + yield event + + +def find_psql() -> str | None: + psql_path = shutil.which("psql") + if psql_path is not None: + return psql_path + + program_files = Path(os.environ.get("ProgramFiles", r"C:\Program Files")) + candidates = sorted(program_files.glob(r"PostgreSQL/*/bin/psql.exe"), reverse=True) + return str(candidates[0]) if candidates else None + + +def psql_json_query(table: str) -> str: + return ( + "SELECT json_build_object(" + "'user_id', user_id, " + "'assignment_id', assignment_id, " + "'group_id', group_id, " + "'file_path', file_path, " + "'event_type', event_type, " + "'timestamp', timestamp, " + "'data', data" + ")::text " + f"FROM {sql_identifier(table)} " + "ORDER BY timestamp" + ) + + +def key_for_event(event: dict[str, Any]) -> tuple[str, str, str, str, str, str, str]: + data = event["data"] + return ( + str(event.get("user_id") or ""), + str(event.get("assignment_id") or ""), + str(event.get("group_id") or ""), + str(data.get("session_id") or ""), + str(data.get("condition") or ""), + str(data.get("course_id") or ""), + str(data.get("task_id") or ""), + ) + + +def update_row(row: MetricRow, event: dict[str, Any]) -> None: + event_type = str(event.get("event_type") or "") + data = event["data"] + row.event_count += 1 + if event_type in EVENT_FIELDS: + row.counts[event_type] += 1 + if event_type == "doc_session": + duration = data.get("duration_seconds") + if isinstance(duration, (int, float)): + row.doc_session_seconds += float(duration) + + parsed_timestamp = parse_timestamp(event.get("timestamp")) + if parsed_timestamp is not None: + timestamp_text = parsed_timestamp.isoformat() + if not row.first_event_at or timestamp_text < row.first_event_at: + row.first_event_at = timestamp_text + if not row.last_event_at or timestamp_text > row.last_event_at: + row.last_event_at = timestamp_text + + +def export_metrics(events: Iterable[dict[str, Any]], output_path: Path) -> None: + rows: dict[tuple[str, str, str, str, str, str, str], MetricRow] = {} + for event in events: + key = key_for_event(event) + row = rows.setdefault(key, MetricRow(*key)) + update_row(row, event) + + fieldnames = [ + "user_id", + "assignment_id", + "group_id", + "session_id", + "condition", + "course_id", + "task_id", + "event_count", + "first_event_at", + "last_event_at", + "doc_session_seconds", + *[f"{event_type}_events" for event_type in EVENT_FIELDS], + ] + with output_path.open("w", encoding="utf-8", newline="") as output_file: + writer = csv.DictWriter(output_file, fieldnames=fieldnames) + writer.writeheader() + for row in sorted(rows.values(), key=lambda r: (r.user_id, r.session_id)): + writer.writerow( + { + "user_id": row.user_id, + "assignment_id": row.assignment_id, + "group_id": row.group_id, + "session_id": row.session_id, + "condition": row.condition, + "course_id": row.course_id, + "task_id": row.task_id, + "event_count": row.event_count, + "first_event_at": row.first_event_at, + "last_event_at": row.last_event_at, + "doc_session_seconds": f"{row.doc_session_seconds:.3f}", + **{ + f"{event_type}_events": row.counts[event_type] + for event_type in EVENT_FIELDS + }, + } + ) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "input", + nargs="?", + type=Path, + help="Optional capture JSONL fallback file. Omit to read PostgreSQL.", + ) + parser.add_argument( + "--out", + type=Path, + default=None, + help="Output CSV file. Defaults to a timestamped capture-metrics-YYYYMMDD-HHMMSS.csv file.", + ) + parser.add_argument( + "--db", + action="store_true", + help="Read PostgreSQL. This is the default when no JSONL input is supplied.", + ) + parser.add_argument( + "--config", + type=Path, + default=Path("capture_config.json"), + help="Capture DB config JSON path. Ignored when CODECHAT_CAPTURE_* env vars are set.", + ) + parser.add_argument( + "--table", + default="events", + help='Capture events table name. Defaults to "events".', + ) + args = parser.parse_args() + + if args.db and args.input is not None: + parser.error("do not pass a JSONL input path with --db") + + events = ( + iter_jsonl_events(args.input) + if args.input is not None + else iter_db_events(load_db_config(args.config), args.table) + ) + output_path = args.out or default_output_path() + export_metrics(events, output_path) + print(f"Wrote {output_path}") + + +def default_output_path() -> Path: + timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S") + return Path(f"capture-metrics-{timestamp}.csv") + + +if __name__ == "__main__": + main() diff --git a/server/src/capture.rs b/server/src/capture.rs index c8904a75..0b317038 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -57,7 +57,14 @@ // * `timestamp` – RFC3339 timestamp (in UTC). // * `data` – JSON payload with event-specific details. -use std::{io, thread}; +use std::{ + env, + fs::{self, OpenOptions}, + io::{self, Write}, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + thread, +}; use chrono::{DateTime, Utc}; use log::{debug, error, info, warn}; @@ -79,6 +86,13 @@ pub mod event_types { pub const SESSION_END: &str = "session_end"; pub const COMPILE_END: &str = "compile_end"; pub const RUN_END: &str = "run_end"; + pub const TASK_START: &str = "task_start"; + pub const TASK_SUBMIT: &str = "task_submit"; + pub const DEBUG_TASK_START: &str = "debug_task_start"; + pub const DEBUG_TASK_SUBMIT: &str = "debug_task_submit"; + pub const HANDOFF_START: &str = "handoff_start"; + pub const HANDOFF_END: &str = "handoff_end"; + pub const REFLECTION_PROMPT_INSERTED: &str = "reflection_prompt_inserted"; } /// Configuration used to construct the PostgreSQL connection string. @@ -88,6 +102,8 @@ pub mod event_types { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CaptureConfig { pub host: String, + #[serde(default)] + pub port: Option, pub user: String, pub password: String, pub dbname: String, @@ -96,16 +112,111 @@ pub struct CaptureConfig { /// in `data` if desired. #[serde(default)] pub app_id: Option, + /// Local JSONL file used when PostgreSQL is unavailable. + #[serde(default)] + pub fallback_path: Option, } impl CaptureConfig { /// Build a libpq-style connection string. pub fn to_conn_str(&self) -> String { + let mut parts = vec![ + format!("host={}", self.host), + format!("user={}", self.user), + format!("password={}", self.password), + format!("dbname={}", self.dbname), + ]; + if let Some(port) = self.port { + parts.push(format!("port={port}")); + } + parts.join(" ") + } + + /// Return a human-readable summary that never includes the password. + pub fn redacted_summary(&self) -> String { format!( - "host={} user={} password={} dbname={}", - self.host, self.user, self.password, self.dbname + "host={}, port={:?}, user={}, dbname={}, app_id={:?}, fallback_path={:?}", + self.host, self.port, self.user, self.dbname, self.app_id, self.fallback_path ) } + + /// Build capture configuration from environment variables. If no capture + /// host is configured, return `Ok(None)` so callers can fall back to a file. + pub fn from_env() -> Result, String> { + let Some(host) = env_var_trimmed("CODECHAT_CAPTURE_HOST") else { + return Ok(None); + }; + + let port = match env_var_trimmed("CODECHAT_CAPTURE_PORT") { + Some(port) => Some(port.parse::().map_err(|err| { + format!("CODECHAT_CAPTURE_PORT must be a valid port number: {err}") + })?), + None => None, + }; + + Ok(Some(Self { + host, + port, + user: required_env_var("CODECHAT_CAPTURE_USER")?, + password: required_env_var("CODECHAT_CAPTURE_PASSWORD")?, + dbname: required_env_var("CODECHAT_CAPTURE_DBNAME")?, + app_id: env_var_trimmed("CODECHAT_CAPTURE_APP_ID"), + fallback_path: env_var_trimmed("CODECHAT_CAPTURE_FALLBACK_PATH").map(PathBuf::from), + })) + } +} + +fn env_var_trimmed(name: &str) -> Option { + env::var(name) + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) +} + +fn required_env_var(name: &str) -> Result { + env_var_trimmed(name).ok_or_else(|| format!("{name} is required when capture env is used")) +} + +/// Capture worker health, exposed through `/capture/status` and the VS Code +/// status item. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct CaptureStatus { + pub enabled: bool, + pub state: String, + pub queued_events: u64, + pub persisted_events: u64, + pub fallback_events: u64, + pub failed_events: u64, + pub last_error: Option, + pub fallback_path: Option, +} + +impl CaptureStatus { + pub fn disabled() -> Self { + Self { + enabled: false, + state: "disabled".to_string(), + queued_events: 0, + persisted_events: 0, + fallback_events: 0, + failed_events: 0, + last_error: None, + fallback_path: None, + } + } + + fn starting(fallback_path: Option) -> Self { + Self { + enabled: true, + state: "starting".to_string(), + queued_events: 0, + persisted_events: 0, + fallback_events: 0, + failed_events: 0, + last_error: None, + fallback_path, + } + } } /// The in-memory representation of a single capture event. @@ -175,6 +286,7 @@ type WorkerMsg = CaptureEvent; #[derive(Clone)] pub struct EventCapture { tx: mpsc::UnboundedSender, + status: Arc>, } impl EventCapture { @@ -184,17 +296,24 @@ impl EventCapture { /// This function is synchronous so it can be called from non-async server /// setup code. It spawns an async task internally which performs the /// database connection and event processing. - pub fn new(config: CaptureConfig) -> Result { + pub fn new(mut config: CaptureConfig) -> Result { + let fallback_path = config + .fallback_path + .get_or_insert_with(|| PathBuf::from("capture-events-fallback.jsonl")) + .clone(); let conn_str = config.to_conn_str(); + let status = Arc::new(Mutex::new(CaptureStatus::starting(Some( + fallback_path.clone(), + )))); // High-level DB connection details (no password). info!( - "Capture: preparing PostgreSQL connection (host={}, dbname={}, user={}, app_id={:?})", - config.host, config.dbname, config.user, config.app_id + "Capture: preparing PostgreSQL connection ({})", + config.redacted_summary() ); - debug!("Capture: raw PostgreSQL connection string: {}", conn_str); let (tx, mut rx) = mpsc::unbounded_channel::(); + let status_worker = status.clone(); // Create a dedicated runtime so capture can be started from sync code // before the Actix/Tokio server runtime exists. @@ -208,16 +327,27 @@ impl EventCapture { .expect("Capture: failed to build Tokio runtime"); runtime.block_on(async move { - info!("Capture: attempting to connect to PostgreSQL…"); + info!("Capture: attempting to connect to PostgreSQL."); match tokio_postgres::connect(&conn_str, NoTls).await { Ok((client, connection)) => { info!("Capture: successfully connected to PostgreSQL."); + update_status(&status_worker, |status| { + status.state = "database".to_string(); + status.last_error = None; + }); // Drive the connection in its own task. + let status_connection = status_worker.clone(); tokio::spawn(async move { if let Err(err) = connection.await { error!("Capture PostgreSQL connection error: {err}"); + update_status(&status_connection, |status| { + status.state = "fallback".to_string(); + status.last_error = Some(format!( + "PostgreSQL connection error: {err}" + )); + }); } }); @@ -238,7 +368,25 @@ impl EventCapture { "Capture: FAILED to insert event (type={}, user_id={}): {err}", event.event_type, event.user_id ); + update_status(&status_worker, |status| { + status.state = "fallback".to_string(); + status.last_error = Some(format!( + "PostgreSQL insert failed: {err}" + )); + }); + write_event_to_fallback( + &fallback_path, + &event, + &status_worker, + Some(format!("PostgreSQL insert failed: {err}")), + ); } else { + update_status(&status_worker, |status| { + status.persisted_events += 1; + if status.state != "database" { + status.state = "database".to_string(); + } + }); debug!("Capture: event insert successful."); } } @@ -254,10 +402,26 @@ impl EventCapture { log_pg_connect_error(&ctx, &err); - // Drain and drop any events so we don't hold the sender. - warn!("Capture: draining pending events after failed DB connection."); - while rx.recv().await.is_some() {} - warn!("Capture: all pending events dropped due to connection failure."); + update_status(&status_worker, |status| { + status.state = "fallback".to_string(); + status.last_error = Some(format!( + "PostgreSQL connection failed: {err}" + )); + }); + + warn!( + "Capture: writing pending events to fallback JSONL at {:?}.", + fallback_path + ); + while let Some(event) = rx.recv().await { + write_event_to_fallback( + &fallback_path, + &event, + &status_worker, + Some("PostgreSQL connection unavailable".to_string()), + ); + } + warn!("Capture: event channel closed; fallback worker exiting."); } } }); @@ -266,7 +430,7 @@ impl EventCapture { io::Error::other(format!("Capture: failed to start worker thread: {err}")) })?; - Ok(Self { tx }) + Ok(Self { tx, status }) } /// Enqueue an event for insertion. This is non-blocking. @@ -278,10 +442,87 @@ impl EventCapture { if let Err(err) = self.tx.send(event) { error!("Capture: FAILED to enqueue capture event: {err}"); + update_status(&self.status, |status| { + status.failed_events += 1; + status.last_error = Some(format!("Failed to enqueue capture event: {err}")); + }); + } else { + update_status(&self.status, |status| { + status.queued_events += 1; + }); + } + } + + pub fn status(&self) -> CaptureStatus { + self.status + .lock() + .map(|status| status.clone()) + .unwrap_or_else(|_| { + let mut status = CaptureStatus::disabled(); + status.last_error = Some("Capture status lock is poisoned".to_string()); + status + }) + } +} + +fn update_status(status: &Arc>, f: impl FnOnce(&mut CaptureStatus)) { + match status.lock() { + Ok(mut guard) => f(&mut guard), + Err(err) => error!("Capture: unable to update status: {err}"), + } +} + +fn write_event_to_fallback( + fallback_path: &Path, + event: &CaptureEvent, + status: &Arc>, + last_error: Option, +) { + match append_fallback_event(fallback_path, event) { + Ok(()) => update_status(status, |status| { + status.fallback_events += 1; + status.last_error = last_error; + }), + Err(err) => { + error!( + "Capture: FAILED to write fallback event to {:?}: {err}", + fallback_path + ); + update_status(status, |status| { + status.failed_events += 1; + status.last_error = Some(format!("Fallback write failed: {err}")); + }); } } } +fn append_fallback_event(fallback_path: &Path, event: &CaptureEvent) -> io::Result<()> { + if let Some(parent) = fallback_path.parent() + && !parent.as_os_str().is_empty() + { + fs::create_dir_all(parent)?; + } + + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(fallback_path)?; + let record = serde_json::json!({ + "fallback_timestamp": Utc::now().to_rfc3339(), + "event": { + "user_id": event.user_id, + "assignment_id": event.assignment_id, + "group_id": event.group_id, + "file_path": event.file_path, + "event_type": event.event_type, + "timestamp": event.timestamp.to_rfc3339(), + "data": event.data, + } + }); + writeln!(file, "{record}")?; + Ok(()) +} + fn log_pg_connect_error(context: &str, err: &tokio_postgres::Error) { // If Postgres returned a structured DbError, log it ONCE and bail. if let Some(db) = err.as_db_error() { @@ -358,10 +599,12 @@ mod tests { fn capture_config_to_conn_str_is_well_formed() { let cfg = CaptureConfig { host: "localhost".to_string(), + port: Some(5432), user: "alice".to_string(), password: "secret".to_string(), dbname: "codechat_capture".to_string(), app_id: Some("spring25-study".to_string()), + fallback_path: Some(PathBuf::from("capture-events-fallback.jsonl")), }; let conn = cfg.to_conn_str(); @@ -371,6 +614,8 @@ mod tests { assert!(conn.contains("user=alice")); assert!(conn.contains("password=secret")); assert!(conn.contains("dbname=codechat_capture")); + assert!(conn.contains("port=5432")); + assert!(!cfg.redacted_summary().contains("secret")); } #[test] @@ -427,18 +672,25 @@ mod tests { { "host": "db.example.com", "user": "bob", + "port": 5433, "password": "hunter2", "dbname": "cc_events", - "app_id": "fall25" + "app_id": "fall25", + "fallback_path": "capture-events-fallback.jsonl" } "#; let cfg: CaptureConfig = serde_json::from_str(json_text).expect("JSON should parse"); assert_eq!(cfg.host, "db.example.com"); + assert_eq!(cfg.port, Some(5433)); assert_eq!(cfg.user, "bob"); assert_eq!(cfg.password, "hunter2"); assert_eq!(cfg.dbname, "cc_events"); assert_eq!(cfg.app_id.as_deref(), Some("fall25")); + assert_eq!( + cfg.fallback_path.as_deref(), + Some(std::path::Path::new("capture-events-fallback.jsonl")) + ); // And it should serialize back to JSON without error let _back = serde_json::to_string(&cfg).expect("Should serialize"); diff --git a/server/src/ide.rs b/server/src/ide.rs index 181bde33..bdd5147a 100644 --- a/server/src/ide.rs +++ b/server/src/ide.rs @@ -93,6 +93,7 @@ async fn start_server( // Provide a class to start and stop the server. All its fields are opaque, // since only Rust should use them. pub struct CodeChatEditorServer { + app_state: WebAppState, server_handle: ServerHandle, from_ide_tx: Sender, to_ide_rx: Arc>>, @@ -141,6 +142,7 @@ impl CodeChatEditorServer { let (expired_messages_tx, expired_messages_rx) = mpsc::channel(100); Ok(CodeChatEditorServer { + app_state, server_handle, from_ide_tx: websocket_queues.from_websocket_tx, to_ide_rx: Arc::new(Mutex::new(websocket_queues.to_websocket_rx)), @@ -259,6 +261,10 @@ impl CodeChatEditorServer { .await } + pub fn capture_status(&self) -> crate::capture::CaptureStatus { + webserver::capture_status(&self.app_state) + } + // Send a `CurrentFile` message. The other parameter (true if text/false if // binary/None if ignored) is ignored by the server, so it's always sent as // `None`. diff --git a/server/src/main.rs b/server/src/main.rs index bc443035..f98d4c96 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -332,10 +332,15 @@ fn port_in_range(s: &str) -> Result { fn parse_credentials(s: &str) -> Result { // For simplicity, require a username to have no colons. - let split: Vec<_> = s.splitn(2, ":").collect(); + let Some((username, password)) = s.split_once(':') else { + return Err("auth must use the form username:password".to_string()); + }; + if username.is_empty() { + return Err("auth username may not be empty".to_string()); + } Ok(Credentials { - username: split[0].to_string(), - password: split[1].to_string(), + username: username.to_string(), + password: password.to_string(), }) } diff --git a/server/src/translation.rs b/server/src/translation.rs index f1116ccf..46ca7b9d 100644 --- a/server/src/translation.rs +++ b/server/src/translation.rs @@ -443,8 +443,13 @@ struct CaptureContext { user_id: Option, assignment_id: Option, group_id: Option, + condition: Option, + course_id: Option, + task_id: Option, + event_source: Option, session_id: Option, client_tz_offset_min: Option, + schema_version: Option, } impl CaptureContext { @@ -458,6 +463,21 @@ impl CaptureContext { if let Some(group_id) = &wire.group_id { self.group_id = Some(group_id.clone()); } + if let Some(condition) = &wire.condition { + self.condition = Some(condition.clone()); + } + if let Some(course_id) = &wire.course_id { + self.course_id = Some(course_id.clone()); + } + if let Some(task_id) = &wire.task_id { + self.task_id = Some(task_id.clone()); + } + if let Some(event_source) = &wire.event_source { + self.event_source = Some(event_source.clone()); + } + if let Some(schema_version) = wire.schema_version { + self.schema_version = Some(schema_version); + } if let Some(client_tz_offset_min) = wire.client_tz_offset_min { self.client_tz_offset_min = Some(client_tz_offset_min); } @@ -486,11 +506,22 @@ impl CaptureContext { data.entry("session_id".to_string()) .or_insert_with(|| serde_json::json!(session_id)); } + data.entry("source".to_string()) + .or_insert_with(|| serde_json::json!("server_translation")); Some(CaptureEventWire { + event_id: None, + sequence_number: None, + schema_version: self.schema_version, user_id: self.user_id.clone()?, assignment_id: self.assignment_id.clone(), group_id: self.group_id.clone(), + condition: self.condition.clone(), + course_id: self.course_id.clone(), + task_id: self.task_id.clone(), + event_source: self.event_source.clone(), + language_id: None, + file_hash: None, file_path, event_type: event_type.to_string(), client_timestamp_ms: None, diff --git a/server/src/webserver.rs b/server/src/webserver.rs index fbeb86e6..e9d4e214 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -97,7 +97,7 @@ use crate::{ }, }; -use crate::capture::{CaptureConfig, CaptureEvent, EventCapture}; +use crate::capture::{CaptureConfig, CaptureEvent, CaptureStatus, EventCapture}; use chrono::Utc; @@ -407,12 +407,30 @@ pub struct Credentials { #[derive(Debug, Serialize, Deserialize, PartialEq, TS)] #[ts(export, optional_fields)] pub struct CaptureEventWire { + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub sequence_number: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub schema_version: Option, pub user_id: String, #[serde(skip_serializing_if = "Option::is_none")] pub assignment_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub group_id: Option, #[serde(skip_serializing_if = "Option::is_none")] + pub condition: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub course_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub task_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub event_source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub language_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub file_hash: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub file_path: Option, pub event_type: String, @@ -623,13 +641,23 @@ async fn capture_endpoint( app_state: WebAppState, payload: web::Json, ) -> HttpResponse { - log_capture_event(&app_state, payload.into_inner()); - HttpResponse::Ok().finish() + let status = log_capture_event(&app_state, payload.into_inner()); + if status.enabled { + HttpResponse::Accepted().json(status) + } else { + HttpResponse::ServiceUnavailable().json(status) + } +} + +#[get("/capture/status")] +async fn capture_status_endpoint(app_state: WebAppState) -> HttpResponse { + HttpResponse::Ok().json(capture_status(&app_state)) } /// Log a capture event if capture is enabled. -pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) { +pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) -> CaptureStatus { if let Some(capture) = &app_state.capture { + let server_timestamp = Utc::now(); // Default missing data to empty object let mut data = wire.data.unwrap_or_else(|| serde_json::json!({})); @@ -641,12 +669,49 @@ pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) { // Add client timestamp fields if present (even if extension also sends them; // overwriting is fine and consistent). if let serde_json::Value::Object(map) = &mut data { + if let Some(event_id) = &wire.event_id { + map.insert("event_id".to_string(), serde_json::json!(event_id)); + } + if let Some(sequence_number) = wire.sequence_number { + map.insert( + "sequence_number".to_string(), + serde_json::json!(sequence_number), + ); + } + if let Some(schema_version) = wire.schema_version { + map.insert( + "schema_version".to_string(), + serde_json::json!(schema_version), + ); + } + if let Some(condition) = &wire.condition { + map.insert("condition".to_string(), serde_json::json!(condition)); + } + if let Some(course_id) = &wire.course_id { + map.insert("course_id".to_string(), serde_json::json!(course_id)); + } + if let Some(task_id) = &wire.task_id { + map.insert("task_id".to_string(), serde_json::json!(task_id)); + } + if let Some(event_source) = &wire.event_source { + map.insert("event_source".to_string(), serde_json::json!(event_source)); + } + if let Some(language_id) = &wire.language_id { + map.insert("language_id".to_string(), serde_json::json!(language_id)); + } + if let Some(file_hash) = &wire.file_hash { + map.insert("file_hash".to_string(), serde_json::json!(file_hash)); + } if let Some(ms) = wire.client_timestamp_ms { map.insert("client_timestamp_ms".to_string(), serde_json::json!(ms)); } if let Some(tz) = wire.client_tz_offset_min { map.insert("client_tz_offset_min".to_string(), serde_json::json!(tz)); } + map.insert( + "server_timestamp_ms".to_string(), + serde_json::json!(server_timestamp.timestamp_millis()), + ); } let event = CaptureEvent { @@ -656,14 +721,25 @@ pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) { file_path: wire.file_path, event_type: wire.event_type, // Server decides when the event is recorded. - timestamp: Utc::now(), + timestamp: server_timestamp, data, }; capture.log(event); + capture.status() + } else { + CaptureStatus::disabled() } } +pub fn capture_status(app_state: &WebAppState) -> CaptureStatus { + app_state + .capture + .as_ref() + .map(EventCapture::status) + .unwrap_or_else(CaptureStatus::disabled) +} + // Get the `mode` query parameter to determine `is_test_mode`; default to // `false`. pub fn get_test_mode(req: &HttpRequest) -> bool { @@ -1588,36 +1664,19 @@ pub fn configure_logger(level: LevelFilter) -> Result<(), Box) -> WebAppState { // Initialize event capture from a config file (optional). - let capture: Option = { - // Build path: /capture_config.json - let mut config_path = ROOT_PATH.lock().unwrap().clone(); - config_path.push("capture_config.json"); - - match fs::read_to_string(&config_path) { - Ok(json) => match serde_json::from_str::(&json) { - Ok(cfg) => match EventCapture::new(cfg) { - Ok(ec) => { - eprintln!("Capture: enabled (config file: {config_path:?})"); - Some(ec) - } - Err(err) => { - eprintln!("Capture: failed to initialize from {config_path:?}: {err}"); - None - } - }, - Err(err) => { - eprintln!("Capture: invalid JSON in {config_path:?}: {err}"); - None - } - }, + let capture: Option = load_capture_config().and_then(|cfg| { + let summary = cfg.redacted_summary(); + match EventCapture::new(cfg) { + Ok(ec) => { + eprintln!("Capture: enabled ({summary})"); + Some(ec) + } Err(err) => { - eprintln!( - "Capture: disabled (config file not found or unreadable: {config_path:?}: {err})" - ); + eprintln!("Capture: failed to initialize ({summary}): {err}"); None } } - }; + }); web::Data::new(AppState { server_handle: Mutex::new(None), @@ -1633,6 +1692,50 @@ pub fn make_app_data(credentials: Option) -> WebAppState { }) } +fn load_capture_config() -> Option { + match CaptureConfig::from_env() { + Ok(Some(cfg)) => return Some(with_default_capture_fallback_path(cfg)), + Ok(None) => {} + Err(err) => { + eprintln!("Capture: invalid environment configuration: {err}"); + return None; + } + } + + let mut config_path = ROOT_PATH.lock().unwrap().clone(); + config_path.push("capture_config.json"); + + match fs::read_to_string(&config_path) { + Ok(json) => match serde_json::from_str::(&json) { + Ok(cfg) => Some(with_default_capture_fallback_path(cfg)), + Err(err) => { + eprintln!("Capture: invalid JSON in {config_path:?}: {err}"); + None + } + }, + Err(err) => { + eprintln!( + "Capture: disabled (no CODECHAT_CAPTURE_* env and no readable config at {config_path:?}: {err})" + ); + None + } + } +} + +fn with_default_capture_fallback_path(mut cfg: CaptureConfig) -> CaptureConfig { + let root_path = ROOT_PATH.lock().unwrap().clone(); + match &cfg.fallback_path { + Some(path) if path.is_relative() => { + cfg.fallback_path = Some(root_path.join(path)); + } + Some(_) => {} + None => { + cfg.fallback_path = Some(root_path.join("capture-events-fallback.jsonl")); + } + } + cfg +} + // Configure the web application. I'd like to make this return an // `App`, but `AppEntry` is a private module. pub fn configure_app(app: App, app_data: &WebAppState) -> App @@ -1660,6 +1763,7 @@ where .service(ping) .service(stop) .service(capture_endpoint) + .service(capture_status_endpoint) // Reroute to the filewatcher filesystem for typical user-requested // URLs. .route("/", web::get().to(filewatcher_root_fs_redirect)) From 3db0ef35a083ba036b6ef47eea2214f14c8c5c86 Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Sun, 3 May 2026 09:34:07 -0600 Subject: [PATCH 13/19] Fix capture enum clippy warning --- server/src/ide.rs | 2 +- server/src/translation.rs | 4 ++-- server/src/webserver.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/ide.rs b/server/src/ide.rs index bdd5147a..f7b4e8ee 100644 --- a/server/src/ide.rs +++ b/server/src/ide.rs @@ -257,7 +257,7 @@ impl CodeChatEditorServer { &self, capture_event: webserver::CaptureEventWire, ) -> std::io::Result { - self.send_message_timeout(EditorMessageContents::Capture(capture_event)) + self.send_message_timeout(EditorMessageContents::Capture(Box::new(capture_event))) .await } diff --git a/server/src/translation.rs b/server/src/translation.rs index 46ca7b9d..97ab61b4 100644 --- a/server/src/translation.rs +++ b/server/src/translation.rs @@ -613,7 +613,7 @@ pub async fn translation_task( EditorMessageContents::Update(_) => continue_loop = tt.ide_update(ide_message).await, EditorMessageContents::Capture(capture_event) => { tt.capture_context.update_from_wire(&capture_event); - log_capture_event(&app_state, capture_event); + log_capture_event(&app_state, *capture_event); send_response(&tt.to_ide_tx, ide_message.id, Ok(ResultOkTypes::Void)).await; }, @@ -713,7 +713,7 @@ pub async fn translation_task( EditorMessageContents::Update(_) => continue_loop = tt.client_update(client_message).await, EditorMessageContents::Capture(capture_event) => { tt.capture_context.update_from_wire(&capture_event); - log_capture_event(&app_state, capture_event); + log_capture_event(&app_state, *capture_event); send_response(&tt.to_client_tx, client_message.id, Ok(ResultOkTypes::Void)).await; }, diff --git a/server/src/webserver.rs b/server/src/webserver.rs index e9d4e214..43a93031 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -208,7 +208,7 @@ pub enum EditorMessageContents { Option, ), /// Record an instrumentation event. Valid destinations: Server. - Capture(CaptureEventWire), + Capture(Box), // #### These messages may only be sent by the IDE. /// This is the first message sent when the IDE starts up. It may only be From bf6e60d5df96f73f241a19e58397d42fc70bff7a Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 11:19:46 -0600 Subject: [PATCH 14/19] Add rich capture analysis dataset and schema --- .gitignore | 2 + server/scripts/capture_events_schema.sql | 184 ++++ server/scripts/export_capture_metrics.py | 1261 +++++++++++++++++++--- server/src/capture.rs | 176 ++- 4 files changed, 1474 insertions(+), 149 deletions(-) create mode 100644 server/scripts/capture_events_schema.sql diff --git a/.gitignore b/.gitignore index 867dfce6..d5a73a07 100644 --- a/.gitignore +++ b/.gitignore @@ -27,8 +27,10 @@ target/ /capture_config.json /capture-events-fallback.jsonl /capture-metrics-*.csv +/capture-analysis-*/ /server/scripts/output /server/scripts/capture-metrics-*.csv +/server/scripts/capture-analysis-*/ server/capture_config.json # CodeChat Editor lexer: python. See TODO. diff --git a/server/scripts/capture_events_schema.sql b/server/scripts/capture_events_schema.sql new file mode 100644 index 00000000..8e211e9c --- /dev/null +++ b/server/scripts/capture_events_schema.sql @@ -0,0 +1,184 @@ +-- CodeChat capture event schema for dissertation analysis. +-- +-- This script is safe to run on an existing legacy `events` table. It adds the +-- richer typed metadata columns, converts `timestamp` and `data` to analysis- +-- friendly PostgreSQL types, and backfills typed metadata from existing JSON +-- payloads where possible. + +BEGIN; + +CREATE TABLE IF NOT EXISTS public.events ( + id BIGSERIAL PRIMARY KEY, + event_id TEXT, + sequence_number BIGINT, + schema_version INTEGER, + user_id TEXT NOT NULL, + assignment_id TEXT, + group_id TEXT, + condition TEXT, + course_id TEXT, + task_id TEXT, + session_id TEXT, + event_source TEXT, + language_id TEXT, + file_hash TEXT, + file_path TEXT, + path_privacy TEXT, + capture_mode TEXT, + event_type TEXT NOT NULL, + "timestamp" TIMESTAMPTZ NOT NULL DEFAULT now(), + client_timestamp_ms BIGINT, + client_tz_offset_min INTEGER, + server_timestamp_ms BIGINT, + data JSONB NOT NULL DEFAULT '{}'::jsonb, + inserted_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS event_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS sequence_number BIGINT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS schema_version INTEGER; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS condition TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS course_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS task_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS session_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS event_source TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS language_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS file_hash TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS path_privacy TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS capture_mode TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS client_timestamp_ms BIGINT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS client_tz_offset_min INTEGER; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS server_timestamp_ms BIGINT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS inserted_at TIMESTAMPTZ NOT NULL DEFAULT now(); + +DO $$ +DECLARE + current_type TEXT; +BEGIN + SELECT data_type INTO current_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'events' + AND column_name = 'timestamp'; + + IF current_type IS DISTINCT FROM 'timestamp with time zone' THEN + ALTER TABLE public.events + ALTER COLUMN "timestamp" TYPE TIMESTAMPTZ + USING COALESCE(NULLIF("timestamp"::text, '')::timestamptz, now()); + END IF; +END $$; + +DO $$ +DECLARE + current_type TEXT; +BEGIN + SELECT data_type INTO current_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'events' + AND column_name = 'data'; + + IF current_type IS DISTINCT FROM 'jsonb' THEN + ALTER TABLE public.events + ALTER COLUMN data TYPE JSONB + USING CASE + WHEN data IS NULL OR btrim(data::text) = '' THEN '{}'::jsonb + ELSE data::jsonb + END; + END IF; +END $$; + +UPDATE public.events +SET data = '{}'::jsonb +WHERE data IS NULL; + +ALTER TABLE public.events ALTER COLUMN data SET DEFAULT '{}'::jsonb; +ALTER TABLE public.events ALTER COLUMN data SET NOT NULL; +ALTER TABLE public.events ALTER COLUMN "timestamp" SET DEFAULT now(); +ALTER TABLE public.events ALTER COLUMN "timestamp" SET NOT NULL; + +UPDATE public.events +SET + event_id = COALESCE(event_id, NULLIF(data->>'event_id', '')), + sequence_number = COALESCE( + sequence_number, + CASE + WHEN data->>'sequence_number' ~ '^-?[0-9]+$' + THEN (data->>'sequence_number')::bigint + END + ), + schema_version = COALESCE( + schema_version, + CASE + WHEN data->>'schema_version' ~ '^-?[0-9]+$' + THEN (data->>'schema_version')::integer + END + ), + condition = COALESCE(condition, NULLIF(data->>'condition', '')), + course_id = COALESCE(course_id, NULLIF(data->>'course_id', '')), + task_id = COALESCE(task_id, NULLIF(data->>'task_id', '')), + session_id = COALESCE(session_id, NULLIF(data->>'session_id', '')), + event_source = COALESCE(event_source, NULLIF(data->>'event_source', '')), + language_id = COALESCE( + language_id, + NULLIF(data->>'language_id', ''), + NULLIF(data->>'languageId', '') + ), + file_hash = COALESCE(file_hash, NULLIF(data->>'file_hash', '')), + path_privacy = COALESCE(path_privacy, NULLIF(data->>'path_privacy', '')), + capture_mode = COALESCE(capture_mode, NULLIF(data->>'capture_mode', '')), + client_timestamp_ms = COALESCE( + client_timestamp_ms, + CASE + WHEN data->>'client_timestamp_ms' ~ '^-?[0-9]+$' + THEN (data->>'client_timestamp_ms')::bigint + END + ), + client_tz_offset_min = COALESCE( + client_tz_offset_min, + CASE + WHEN data->>'client_tz_offset_min' ~ '^-?[0-9]+$' + THEN (data->>'client_tz_offset_min')::integer + END + ), + server_timestamp_ms = COALESCE( + server_timestamp_ms, + CASE + WHEN data->>'server_timestamp_ms' ~ '^-?[0-9]+$' + THEN (data->>'server_timestamp_ms')::bigint + ELSE floor(extract(epoch from "timestamp") * 1000)::bigint + END + ); + +CREATE INDEX IF NOT EXISTS events_timestamp_idx + ON public.events ("timestamp"); + +CREATE INDEX IF NOT EXISTS events_type_timestamp_idx + ON public.events (event_type, "timestamp"); + +CREATE INDEX IF NOT EXISTS events_participant_session_idx + ON public.events (user_id, assignment_id, session_id, task_id); + +CREATE INDEX IF NOT EXISTS events_file_hash_idx + ON public.events (file_hash) + WHERE file_hash IS NOT NULL; + +CREATE INDEX IF NOT EXISTS events_event_id_idx + ON public.events (event_id) + WHERE event_id IS NOT NULL; + +CREATE INDEX IF NOT EXISTS events_data_gin_idx + ON public.events USING GIN (data); + +COMMENT ON TABLE public.events IS + 'CodeChat dissertation capture events with typed analysis metadata and event-specific JSONB payloads.'; +COMMENT ON COLUMN public.events.user_id IS 'Participant identifier supplied by capture settings.'; +COMMENT ON COLUMN public.events.assignment_id IS 'Assignment or lab identifier supplied by capture settings.'; +COMMENT ON COLUMN public.events.group_id IS 'Study group, section, or cohort identifier.'; +COMMENT ON COLUMN public.events.condition IS 'Study condition, such as treatment, comparison, or capture-only.'; +COMMENT ON COLUMN public.events.session_id IS 'Capture session UUID emitted by the VS Code extension.'; +COMMENT ON COLUMN public.events.file_hash IS 'SHA-256 hash of the file path when path hashing is enabled.'; +COMMENT ON COLUMN public.events.file_path IS 'Raw captured file path; NULL when path hashing is enabled.'; +COMMENT ON COLUMN public.events.data IS 'Event-specific JSON payload. Duplicates typed metadata for portable fallback exports.'; + +COMMIT; diff --git a/server/scripts/export_capture_metrics.py b/server/scripts/export_capture_metrics.py index d95a7c66..e2ceea22 100644 --- a/server/scripts/export_capture_metrics.py +++ b/server/scripts/export_capture_metrics.py @@ -6,6 +6,10 @@ python server/scripts/export_capture_metrics.py --out capture-metrics.csv +To produce the richer analysis dataset: + + python server/scripts/export_capture_metrics.py --dataset-dir capture-analysis + The optional positional `input` is only for fallback JSONL logs: python server/scripts/export_capture_metrics.py capture-events-fallback.jsonl --out capture-metrics.csv @@ -15,19 +19,22 @@ import argparse import csv +import hashlib import json import os import re import shutil import subprocess -from collections import defaultdict -from dataclasses import dataclass, field +from collections import Counter, defaultdict, deque +from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any, Iterable, Iterator EVENT_FIELDS = [ + "session_start", + "session_end", "write_doc", "write_code", "doc_session", @@ -46,6 +53,292 @@ "reflection_prompt_inserted", ] +IDENTITY_FIELDS = [ + "user_id", + "assignment_id", + "group_id", + "session_id", + "condition", + "course_id", + "task_id", +] + +EVENT_ROW_FIELDS = [ + "event_index", + *IDENTITY_FIELDS, + "event_id", + "sequence_number", + "schema_version", + "event_source", + "event_type", + "timestamp", + "client_timestamp_ms", + "server_timestamp_ms", + "client_tz_offset_min", + "client_server_latency_ms", + "elapsed_session_seconds", + "gap_seconds", + "file_id", + "file_hash", + "path_privacy", + "language_id", + "capture_mode", + "classification_basis", + "write_source", + "mode", + "activity_from", + "activity_to", + "duration_seconds", + "duration_ms", + "line_count", + "prompt_hash", + "prompt_length", + "command", + "task_name", + "task_source", + "exit_code", + "run_session_name", + "run_session_type", + "save_reason", + "doc_block_count_before", + "doc_block_count_after", + "diff_hunks", + "diff_inserted_chars", + "diff_deleted_units", + "diff_replacement_hunks", + "doc_block_transactions", + "doc_block_diff_hunks", + "doc_block_inserted_chars", + "doc_block_deleted_units", +] + +SESSION_SUMMARY_FIELDS = [ + *IDENTITY_FIELDS, + "event_count", + "first_event_at", + "last_event_at", + "active_span_seconds", + "events_per_minute", + "mean_gap_seconds", + "max_gap_seconds", + "doc_session_seconds", + "doc_session_share_of_span", + "write_events", + "doc_write_share", + *[f"{event_type}_events" for event_type in EVENT_FIELDS], + "doc_to_code_switches", + "code_to_doc_switches", + "compile_success_events", + "compile_failure_events", + "total_prompt_chars", + "unique_file_count", + "unique_language_count", + "file_ids", + "language_ids", + "event_sources", + "diff_hunks", + "diff_inserted_chars", + "diff_deleted_units", + "doc_block_transactions", + "doc_block_diff_hunks", + "doc_block_inserted_chars", + "doc_block_deleted_units", + "client_server_latency_ms_mean", + "client_server_latency_ms_max", + "first_sequence_number", + "last_sequence_number", + "missing_sequence_gaps", + "duplicate_event_ids", + "data_quality_notes", +] + +FILE_SUMMARY_FIELDS = [ + *IDENTITY_FIELDS, + "file_id", + "file_hash", + "language_id", + "path_privacy", + "event_count", + "first_event_at", + "last_event_at", + "active_span_seconds", + "doc_session_seconds", + "write_doc_events", + "write_code_events", + "save_events", + "compile_events", + "compile_end_events", + "run_events", + "run_end_events", + "switch_pane_events", + "line_count_first", + "line_count_last", + "line_count_max", + "doc_block_count_before_min", + "doc_block_count_after_last", + "classification_bases", + "write_sources", + "diff_hunks", + "diff_inserted_chars", + "diff_deleted_units", + "doc_block_transactions", + "doc_block_diff_hunks", + "doc_block_inserted_chars", + "doc_block_deleted_units", +] + +TASK_LIFECYCLE_FIELDS = [ + *IDENTITY_FIELDS, + "lifecycle_kind", + "lifecycle_index", + "completed", + "start_event_type", + "end_event_type", + "start_at", + "end_at", + "duration_seconds", + "start_event_id", + "end_event_id", + "start_file_id", + "end_file_id", + "language_id", + "command", + "data_quality_notes", +] + +LIFECYCLE_PAIRS = { + "task_start": ("task", "task_submit"), + "debug_task_start": ("debug_task", "debug_task_submit"), + "handoff_start": ("handoff", "handoff_end"), +} + +LIFECYCLE_END_TYPES = { + end_type: (kind, start_type) + for start_type, (kind, end_type) in LIFECYCLE_PAIRS.items() +} + +RAW_FILE_PATH_FIELD = "file_path" + +DB_METADATA_FIELDS = [ + "event_id", + "sequence_number", + "schema_version", + "condition", + "course_id", + "task_id", + "session_id", + "event_source", + "language_id", + "file_hash", + "path_privacy", + "capture_mode", + "client_timestamp_ms", + "client_tz_offset_min", + "server_timestamp_ms", +] + +FIELD_DESCRIPTIONS = { + "event_index": "One-based event order after sorting by timestamp and sequence number.", + "user_id": "Participant identifier supplied by capture settings.", + "assignment_id": "Assignment or lab identifier supplied by capture settings.", + "group_id": "Study group, section, or cohort identifier supplied by capture settings.", + "session_id": "Capture session UUID emitted by the VS Code extension.", + "condition": "Study condition, such as treatment, comparison, or capture-only.", + "course_id": "Course or deployment identifier supplied by capture settings.", + "task_id": "Task identifier supplied by capture settings.", + "event_id": "Client-generated UUID when available.", + "sequence_number": "Client-side monotonically increasing sequence number when available.", + "schema_version": "Capture payload schema version.", + "event_source": "Capture source, such as vscode_extension.", + "event_type": "Canonical CodeChat capture event type.", + "timestamp": "Server-recorded event timestamp.", + "client_timestamp_ms": "Client-side timestamp in milliseconds since Unix epoch.", + "server_timestamp_ms": "Server-side timestamp in milliseconds since Unix epoch.", + "client_tz_offset_min": "Client timezone offset from JavaScript Date().getTimezoneOffset().", + "client_server_latency_ms": "Approximate server timestamp minus client timestamp.", + "elapsed_session_seconds": "Seconds since the first event in the same participant/session/task row.", + "gap_seconds": "Seconds since the prior event in the same participant/session/task row.", + "file_id": "Privacy-preserving file identifier. Uses captured file hash when available, otherwise a SHA-256 hash of the captured path.", + "file_hash": "Captured SHA-256 file path hash when the extension supplied one.", + "file_path": "Raw captured file path. Only exported with --include-file-paths.", + "path_privacy": "Path privacy mode reported by capture settings.", + "language_id": "VS Code language identifier when available.", + "capture_mode": "Capture mode reported in event data.", + "classification_basis": "Server-side write-classification basis when available.", + "write_source": "Write event source, such as server_translation or CodeMirror update path.", + "mode": "Event-specific mode or CodeChat lexer mode.", + "activity_from": "Previous activity kind for switch_pane events.", + "activity_to": "New activity kind for switch_pane events.", + "duration_seconds": "Event-specific duration in seconds.", + "duration_ms": "Event-specific duration in milliseconds.", + "line_count": "Document line count captured on save events.", + "prompt_hash": "SHA-256 hash of the inserted reflection prompt.", + "prompt_length": "Length of the inserted reflection prompt.", + "command": "Lifecycle command name recorded by the extension.", + "task_name": "VS Code task name for compile/build events.", + "task_source": "VS Code task source for compile/build events.", + "exit_code": "Compile/build process exit code when available.", + "run_session_name": "VS Code debug/run session name.", + "run_session_type": "VS Code debug/run session type.", + "save_reason": "Save reason reported by the extension.", + "doc_block_count_before": "Documentation block count before a classified doc-block edit.", + "doc_block_count_after": "Documentation block count after a classified doc-block edit.", + "diff_hunks": "Number of text diff hunks in the event payload.", + "diff_inserted_chars": "Characters inserted across text diff hunks.", + "diff_deleted_units": "UTF-16 code units removed across text diff hunks.", + "diff_replacement_hunks": "Text diff hunks that both removed and inserted content.", + "doc_block_transactions": "Number of doc-block add/update/delete transactions.", + "doc_block_diff_hunks": "Nested text diff hunks inside doc-block transactions.", + "doc_block_inserted_chars": "Characters inserted inside doc-block transaction diffs.", + "doc_block_deleted_units": "UTF-16 code units removed inside doc-block transaction diffs.", + "event_count": "Number of events in the aggregate row.", + "first_event_at": "Earliest event timestamp in the aggregate row.", + "last_event_at": "Latest event timestamp in the aggregate row.", + "active_span_seconds": "Seconds between first and last event in the aggregate row.", + "events_per_minute": "Event count divided by active span in minutes.", + "mean_gap_seconds": "Mean within-session gap between consecutive timestamped events.", + "max_gap_seconds": "Largest within-session gap between consecutive timestamped events.", + "doc_session_seconds": "Total duration of doc_session events.", + "doc_session_share_of_span": "doc_session_seconds divided by active_span_seconds.", + "write_events": "write_doc_events plus write_code_events.", + "doc_write_share": "write_doc_events divided by all write events.", + "doc_to_code_switches": "switch_pane events moving from documentation to code.", + "code_to_doc_switches": "switch_pane events moving from code to documentation.", + "compile_success_events": "compile_end events with exit_code equal to 0.", + "compile_failure_events": "compile_end events with nonzero exit_code.", + "total_prompt_chars": "Sum of reflection prompt lengths.", + "unique_file_count": "Number of distinct file_id values in the aggregate row.", + "unique_language_count": "Number of distinct language_id values in the aggregate row.", + "file_ids": "Semicolon-delimited file_id values in the aggregate row.", + "language_ids": "Semicolon-delimited language_id values in the aggregate row.", + "event_sources": "Semicolon-delimited event_source values in the aggregate row.", + "client_server_latency_ms_mean": "Mean approximate client-to-server timestamp delta.", + "client_server_latency_ms_max": "Largest approximate client-to-server timestamp delta.", + "first_sequence_number": "Smallest captured sequence number in the aggregate row.", + "last_sequence_number": "Largest captured sequence number in the aggregate row.", + "missing_sequence_gaps": "Count of missing sequence-number slots within the aggregate row.", + "duplicate_event_ids": "Number of repeated event_id values in the aggregate row.", + "data_quality_notes": "Semicolon-delimited notes about missing or suspicious capture metadata.", + "line_count_first": "First observed line count for the file aggregate.", + "line_count_last": "Last observed line count for the file aggregate.", + "line_count_max": "Maximum observed line count for the file aggregate.", + "doc_block_count_before_min": "Minimum observed doc block count before edits.", + "doc_block_count_after_last": "Last observed doc block count after edits.", + "classification_bases": "Semicolon-delimited write classification bases.", + "write_sources": "Semicolon-delimited write event sources.", + "lifecycle_kind": "Lifecycle family: task, debug_task, or handoff.", + "lifecycle_index": "One-based lifecycle index within participant/session/task/kind.", + "completed": "1 when a lifecycle end event was observed, otherwise 0.", + "start_event_type": "Lifecycle start event type.", + "end_event_type": "Lifecycle end/submit event type.", + "start_at": "Lifecycle start timestamp.", + "end_at": "Lifecycle end timestamp.", + "start_event_id": "event_id for the lifecycle start event.", + "end_event_id": "event_id for the lifecycle end event.", + "start_file_id": "file_id associated with the lifecycle start event.", + "end_file_id": "file_id associated with the lifecycle end event.", +} + @dataclass(frozen=True) class DbConfig: @@ -56,22 +349,6 @@ class DbConfig: port: int | None = None -@dataclass -class MetricRow: - user_id: str - assignment_id: str - group_id: str - session_id: str - condition: str - course_id: str - task_id: str - event_count: int = 0 - first_event_at: str = "" - last_event_at: str = "" - doc_session_seconds: float = 0.0 - counts: dict[str, int] = field(default_factory=lambda: defaultdict(int)) - - def parse_timestamp(value: Any) -> datetime | None: if isinstance(value, datetime): return value @@ -95,6 +372,24 @@ def as_data(value: Any) -> dict[str, Any]: return {} +def normalize_db_record(record: dict[str, Any]) -> dict[str, Any]: + data = as_data(record.get("data")) + for field_name in DB_METADATA_FIELDS: + value = record.get(field_name) + if value is not None and data.get(field_name) is None: + data[field_name] = value + + return { + "user_id": record.get("user_id"), + "assignment_id": record.get("assignment_id"), + "group_id": record.get("group_id"), + "file_path": record.get("file_path"), + "event_type": record.get("event_type"), + "timestamp": record.get("timestamp"), + "data": data, + } + + def iter_jsonl_events(path: Path) -> Iterator[dict[str, Any]]: with path.open("r", encoding="utf-8") as input_file: for line_number, line in enumerate(input_file, start=1): @@ -224,32 +519,12 @@ def iter_db_events(config: DbConfig, table: str) -> Iterator[dict[str, Any]]: if config.port is not None: connect_kwargs["port"] = config.port - query = ( - "SELECT user_id, assignment_id, group_id, file_path, event_type, timestamp, data " - f"FROM {sql_identifier(table)} " - "ORDER BY timestamp" - ) + query = psql_json_query(table) with psycopg.connect(**connect_kwargs) as conn: with conn.cursor() as cursor: cursor.execute(query) - for ( - user_id, - assignment_id, - group_id, - file_path, - event_type, - timestamp, - data, - ) in cursor: - yield { - "user_id": user_id, - "assignment_id": assignment_id, - "group_id": group_id, - "file_path": file_path, - "event_type": event_type, - "timestamp": timestamp, - "data": as_data(data), - } + for (record_text,) in cursor: + yield normalize_db_record(json.loads(record_text)) def iter_db_events_with_psql(config: DbConfig, table: str) -> Iterator[dict[str, Any]]: @@ -303,11 +578,10 @@ def iter_db_events_with_psql(config: DbConfig, table: str) -> Iterator[dict[str, if not line: continue try: - event = json.loads(line) + record = json.loads(line) except json.JSONDecodeError as err: raise SystemExit(f"psql output line {line_number}: invalid JSON: {err}") from err - event["data"] = as_data(event.get("data")) - yield event + yield normalize_db_record(record) def find_psql() -> str | None: @@ -322,97 +596,777 @@ def find_psql() -> str | None: def psql_json_query(table: str) -> str: return ( - "SELECT json_build_object(" - "'user_id', user_id, " - "'assignment_id', assignment_id, " - "'group_id', group_id, " - "'file_path', file_path, " - "'event_type', event_type, " - "'timestamp', timestamp, " - "'data', data" - ")::text " - f"FROM {sql_identifier(table)} " - "ORDER BY timestamp" + "SELECT to_jsonb(events_row)::text " + f"FROM {sql_identifier(table)} AS events_row " + 'ORDER BY events_row."timestamp"' ) -def key_for_event(event: dict[str, Any]) -> tuple[str, str, str, str, str, str, str]: - data = event["data"] - return ( - str(event.get("user_id") or ""), - str(event.get("assignment_id") or ""), - str(event.get("group_id") or ""), - str(data.get("session_id") or ""), - str(data.get("condition") or ""), - str(data.get("course_id") or ""), - str(data.get("task_id") or ""), +def text_value(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, (dict, list)): + return json.dumps(value, sort_keys=True, separators=(",", ":")) + return str(value) + + +def first_data_value(data: dict[str, Any], *names: str) -> Any: + for name in names: + if name in data and data[name] is not None: + return data[name] + return None + + +def data_text(data: dict[str, Any], *names: str) -> str: + return text_value(first_data_value(data, *names)) + + +def int_value(value: Any) -> int | None: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, float) and value.is_integer(): + return int(value) + if isinstance(value, str): + value = value.strip() + if not value: + return None + try: + return int(value) + except ValueError: + return None + return None + + +def float_value(value: Any) -> float | None: + if isinstance(value, bool): + return float(value) + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + value = value.strip() + if not value: + return None + try: + return float(value) + except ValueError: + return None + return None + + +def int_or_blank(value: Any) -> int | str: + number = int_value(value) + return number if number is not None else "" + + +def float_or_blank(value: Any) -> float | str: + number = float_value(value) + return number if number is not None else "" + + +def csv_value(value: Any) -> str | int: + if value is None: + return "" + if isinstance(value, float): + return f"{value:.3f}" + if isinstance(value, bool): + return "1" if value else "0" + return value + + +def write_csv(path: Path, fieldnames: list[str], rows: Iterable[dict[str, Any]]) -> None: + if path.parent != Path("."): + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8", newline="") as output_file: + writer = csv.DictWriter(output_file, fieldnames=fieldnames, extrasaction="ignore") + writer.writeheader() + for row in rows: + writer.writerow({field: csv_value(row.get(field, "")) for field in fieldnames}) + + +def aware_datetime(value: datetime | None) -> datetime | None: + if value is None: + return None + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) + + +def seconds_between(start: datetime | None, end: datetime | None) -> float | None: + if start is None or end is None: + return None + return (end - start).total_seconds() + + +def timestamp_for_csv(value: datetime | None, fallback: Any = "") -> str: + if value is not None: + return value.isoformat() + return text_value(fallback) + + +def sha256_text(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest() + + +def file_id_for(file_path: str, file_hash: str) -> str: + if file_hash: + return file_hash + if file_path: + return sha256_text(file_path) + return "" + + +def fields_with_optional_file_path( + fieldnames: list[str], include_file_paths: bool +) -> list[str]: + if not include_file_paths or RAW_FILE_PATH_FIELD in fieldnames: + return fieldnames + fields = list(fieldnames) + insert_after = "file_hash" if "file_hash" in fields else "file_id" + fields.insert(fields.index(insert_after) + 1, RAW_FILE_PATH_FIELD) + return fields + + +def identity_key(row: dict[str, Any]) -> tuple[str, ...]: + return tuple(text_value(row.get(field)) for field in IDENTITY_FIELDS) + + +def semicolon_join(values: Iterable[str]) -> str: + return ";".join(sorted(value for value in values if value)) + + +def add_number(acc: list[float], value: Any) -> None: + number = float_value(value) + if number is not None: + acc.append(number) + + +def string_diff_stats(value: Any) -> Counter[str]: + stats: Counter[str] = Counter() + if isinstance(value, list): + for item in value: + stats.update(string_diff_stats(item)) + return stats + if not isinstance(value, dict): + return stats + + if "from" in value and "insert" in value: + from_value = int_value(value.get("from")) or 0 + to_value = int_value(value.get("to")) + removed_units = max(0, (to_value or from_value) - from_value) + inserted_chars = len(text_value(value.get("insert"))) + stats["hunks"] += 1 + stats["inserted_chars"] += inserted_chars + stats["deleted_units"] += removed_units + if removed_units > 0 and inserted_chars > 0: + stats["replacement_hunks"] += 1 + return stats + + for child in value.values(): + stats.update(string_diff_stats(child)) + return stats + + +def doc_block_contents(value: Any) -> str: + if isinstance(value, list) and len(value) >= 5: + return text_value(value[4]) + if isinstance(value, dict): + return text_value(value.get("contents")) + return "" + + +def doc_block_diff_stats(value: Any) -> Counter[str]: + stats: Counter[str] = Counter() + if not isinstance(value, list): + return stats + + for transaction in value: + stats["transactions"] += 1 + if not isinstance(transaction, dict): + continue + if "Add" in transaction: + stats["inserted_chars"] += len(doc_block_contents(transaction["Add"])) + elif "Update" in transaction: + update_stats = string_diff_stats(transaction["Update"]) + stats["hunks"] += update_stats["hunks"] + stats["inserted_chars"] += update_stats["inserted_chars"] + stats["deleted_units"] += update_stats["deleted_units"] + elif "Delete" in transaction: + continue + else: + update_stats = string_diff_stats(transaction) + stats["hunks"] += update_stats["hunks"] + stats["inserted_chars"] += update_stats["inserted_chars"] + stats["deleted_units"] += update_stats["deleted_units"] + return stats + + +def normalize_event_rows( + events: Iterable[dict[str, Any]], include_file_paths: bool = False +) -> list[dict[str, Any]]: + sortable_rows: list[dict[str, Any]] = [] + for original_index, event in enumerate(events, start=1): + data = as_data(event.get("data")) + timestamp = aware_datetime(parse_timestamp(event.get("timestamp"))) + file_path = text_value(event.get("file_path")) + file_hash = data_text(data, "file_hash") + client_timestamp_ms = int_value(data.get("client_timestamp_ms")) + server_timestamp_ms = int_value(data.get("server_timestamp_ms")) + latency_ms = ( + server_timestamp_ms - client_timestamp_ms + if client_timestamp_ms is not None and server_timestamp_ms is not None + else "" + ) + diff_stats = string_diff_stats(data.get("diff")) + doc_block_stats = doc_block_diff_stats(data.get("doc_block_diff")) + + row: dict[str, Any] = { + "event_index": original_index, + "user_id": text_value(event.get("user_id")), + "assignment_id": text_value(event.get("assignment_id")), + "group_id": text_value(event.get("group_id")), + "session_id": data_text(data, "session_id"), + "condition": data_text(data, "condition"), + "course_id": data_text(data, "course_id"), + "task_id": data_text(data, "task_id"), + "event_id": data_text(data, "event_id"), + "sequence_number": int_or_blank(data.get("sequence_number")), + "schema_version": int_or_blank(data.get("schema_version")), + "event_source": data_text(data, "event_source"), + "event_type": text_value(event.get("event_type")), + "timestamp": timestamp_for_csv(timestamp, event.get("timestamp")), + "client_timestamp_ms": client_timestamp_ms + if client_timestamp_ms is not None + else "", + "server_timestamp_ms": server_timestamp_ms + if server_timestamp_ms is not None + else "", + "client_tz_offset_min": int_or_blank(data.get("client_tz_offset_min")), + "client_server_latency_ms": latency_ms, + "elapsed_session_seconds": "", + "gap_seconds": "", + "file_id": file_id_for(file_path, file_hash), + "file_hash": file_hash, + "path_privacy": data_text(data, "path_privacy"), + "language_id": data_text(data, "language_id", "languageId"), + "capture_mode": data_text(data, "capture_mode"), + "classification_basis": data_text(data, "classification_basis"), + "write_source": data_text(data, "source"), + "mode": data_text(data, "mode"), + "activity_from": data_text(data, "from"), + "activity_to": data_text(data, "to"), + "duration_seconds": float_or_blank(data.get("duration_seconds")), + "duration_ms": float_or_blank(data.get("duration_ms")), + "line_count": int_or_blank(first_data_value(data, "lineCount", "line_count")), + "prompt_hash": data_text(data, "prompt_hash"), + "prompt_length": int_or_blank(data.get("prompt_length")), + "command": data_text(data, "command"), + "task_name": data_text(data, "taskName", "task_name"), + "task_source": data_text(data, "taskSource", "task_source"), + "exit_code": int_or_blank(first_data_value(data, "exitCode", "exit_code")), + "run_session_name": data_text(data, "sessionName", "session_name"), + "run_session_type": data_text(data, "sessionType", "session_type"), + "save_reason": data_text(data, "reason"), + "doc_block_count_before": int_or_blank(data.get("doc_block_count_before")), + "doc_block_count_after": int_or_blank(data.get("doc_block_count_after")), + "diff_hunks": diff_stats["hunks"], + "diff_inserted_chars": diff_stats["inserted_chars"], + "diff_deleted_units": diff_stats["deleted_units"], + "diff_replacement_hunks": diff_stats["replacement_hunks"], + "doc_block_transactions": doc_block_stats["transactions"], + "doc_block_diff_hunks": doc_block_stats["hunks"], + "doc_block_inserted_chars": doc_block_stats["inserted_chars"], + "doc_block_deleted_units": doc_block_stats["deleted_units"], + } + if include_file_paths: + row[RAW_FILE_PATH_FIELD] = file_path + + sortable_rows.append( + { + "row": row, + "timestamp": timestamp, + "sequence_number": int_value(row["sequence_number"]), + "original_index": original_index, + } + ) + + max_timestamp = datetime.max.replace(tzinfo=timezone.utc) + sortable_rows.sort( + key=lambda item: ( + item["timestamp"] is None, + item["timestamp"] or max_timestamp, + item["sequence_number"] if item["sequence_number"] is not None else 10**18, + item["original_index"], + ) ) + first_by_session: dict[tuple[str, ...], datetime] = {} + previous_by_session: dict[tuple[str, ...], datetime] = {} + for event_index, item in enumerate(sortable_rows, start=1): + row = item["row"] + row["event_index"] = event_index + timestamp = item["timestamp"] + if timestamp is None: + continue + key = identity_key(row) + first = first_by_session.setdefault(key, timestamp) + row["elapsed_session_seconds"] = seconds_between(first, timestamp) or 0.0 + previous = previous_by_session.get(key) + if previous is not None: + row["gap_seconds"] = seconds_between(previous, timestamp) or 0.0 + previous_by_session[key] = timestamp + + return [item["row"] for item in sortable_rows] + + +def new_session_acc(row: dict[str, Any]) -> dict[str, Any]: + return { + "identity": {field: text_value(row.get(field)) for field in IDENTITY_FIELDS}, + "counts": Counter(), + "event_count": 0, + "first_dt": None, + "last_dt": None, + "gaps": [], + "doc_session_seconds": 0.0, + "doc_to_code_switches": 0, + "code_to_doc_switches": 0, + "compile_success_events": 0, + "compile_failure_events": 0, + "total_prompt_chars": 0, + "file_ids": set(), + "language_ids": set(), + "event_sources": set(), + "latencies": [], + "sequence_numbers": [], + "event_ids": Counter(), + "diff_hunks": 0, + "diff_inserted_chars": 0, + "diff_deleted_units": 0, + "doc_block_transactions": 0, + "doc_block_diff_hunks": 0, + "doc_block_inserted_chars": 0, + "doc_block_deleted_units": 0, + "missing_timestamp_count": 0, + "missing_session_count": 0, + "missing_schema_count": 0, + } -def update_row(row: MetricRow, event: dict[str, Any]) -> None: - event_type = str(event.get("event_type") or "") - data = event["data"] - row.event_count += 1 - if event_type in EVENT_FIELDS: - row.counts[event_type] += 1 + +def update_time_acc(acc: dict[str, Any], row: dict[str, Any]) -> None: + timestamp = aware_datetime(parse_timestamp(row.get("timestamp"))) + if timestamp is None: + acc["missing_timestamp_count"] += 1 + return + if acc["first_dt"] is None or timestamp < acc["first_dt"]: + acc["first_dt"] = timestamp + if acc["last_dt"] is None or timestamp > acc["last_dt"]: + acc["last_dt"] = timestamp + + +def update_session_acc(acc: dict[str, Any], row: dict[str, Any]) -> None: + event_type = text_value(row.get("event_type")) + acc["event_count"] += 1 + acc["counts"][event_type] += 1 + update_time_acc(acc, row) + + add_number(acc["gaps"], row.get("gap_seconds")) if event_type == "doc_session": - duration = data.get("duration_seconds") - if isinstance(duration, (int, float)): - row.doc_session_seconds += float(duration) - - parsed_timestamp = parse_timestamp(event.get("timestamp")) - if parsed_timestamp is not None: - timestamp_text = parsed_timestamp.isoformat() - if not row.first_event_at or timestamp_text < row.first_event_at: - row.first_event_at = timestamp_text - if not row.last_event_at or timestamp_text > row.last_event_at: - row.last_event_at = timestamp_text - - -def export_metrics(events: Iterable[dict[str, Any]], output_path: Path) -> None: - rows: dict[tuple[str, str, str, str, str, str, str], MetricRow] = {} - for event in events: - key = key_for_event(event) - row = rows.setdefault(key, MetricRow(*key)) - update_row(row, event) - - fieldnames = [ - "user_id", - "assignment_id", - "group_id", - "session_id", - "condition", - "course_id", - "task_id", - "event_count", - "first_event_at", - "last_event_at", - "doc_session_seconds", - *[f"{event_type}_events" for event_type in EVENT_FIELDS], + duration = float_value(row.get("duration_seconds")) + if duration is not None: + acc["doc_session_seconds"] += duration + if event_type == "switch_pane": + if row.get("activity_from") == "doc" and row.get("activity_to") == "code": + acc["doc_to_code_switches"] += 1 + if row.get("activity_from") == "code" and row.get("activity_to") == "doc": + acc["code_to_doc_switches"] += 1 + if event_type == "compile_end": + exit_code = int_value(row.get("exit_code")) + if exit_code == 0: + acc["compile_success_events"] += 1 + elif exit_code is not None: + acc["compile_failure_events"] += 1 + + acc["total_prompt_chars"] += int_value(row.get("prompt_length")) or 0 + if row.get("file_id"): + acc["file_ids"].add(text_value(row.get("file_id"))) + if row.get("language_id"): + acc["language_ids"].add(text_value(row.get("language_id"))) + if row.get("event_source"): + acc["event_sources"].add(text_value(row.get("event_source"))) + add_number(acc["latencies"], row.get("client_server_latency_ms")) + + sequence_number = int_value(row.get("sequence_number")) + if sequence_number is not None: + acc["sequence_numbers"].append(sequence_number) + event_id = text_value(row.get("event_id")) + if event_id: + acc["event_ids"][event_id] += 1 + + for field in [ + "diff_hunks", + "diff_inserted_chars", + "diff_deleted_units", + "doc_block_transactions", + "doc_block_diff_hunks", + "doc_block_inserted_chars", + "doc_block_deleted_units", + ]: + acc[field] += int_value(row.get(field)) or 0 + if not row.get("session_id"): + acc["missing_session_count"] += 1 + if not row.get("schema_version"): + acc["missing_schema_count"] += 1 + + +def finalize_session_acc(acc: dict[str, Any]) -> dict[str, Any]: + first_dt = acc["first_dt"] + last_dt = acc["last_dt"] + active_span = seconds_between(first_dt, last_dt) or 0.0 + counts = acc["counts"] + write_events = counts["write_doc"] + counts["write_code"] + sequence_numbers = sorted(set(acc["sequence_numbers"])) + missing_sequence_gaps = sum( + max(0, current - previous - 1) + for previous, current in zip(sequence_numbers, sequence_numbers[1:]) + ) + duplicate_event_ids = sum( + count - 1 for count in acc["event_ids"].values() if count > 1 + ) + notes = [] + if acc["missing_timestamp_count"]: + notes.append(f"missing_timestamp:{acc['missing_timestamp_count']}") + if acc["missing_session_count"]: + notes.append(f"missing_session_id:{acc['missing_session_count']}") + if acc["missing_schema_count"]: + notes.append(f"missing_schema_version:{acc['missing_schema_count']}") + if missing_sequence_gaps: + notes.append(f"missing_sequence_slots:{missing_sequence_gaps}") + if duplicate_event_ids: + notes.append(f"duplicate_event_ids:{duplicate_event_ids}") + + row = { + **acc["identity"], + "event_count": acc["event_count"], + "first_event_at": timestamp_for_csv(first_dt), + "last_event_at": timestamp_for_csv(last_dt), + "active_span_seconds": active_span, + "events_per_minute": (acc["event_count"] / (active_span / 60.0)) + if active_span > 0 + else "", + "mean_gap_seconds": sum(acc["gaps"]) / len(acc["gaps"]) if acc["gaps"] else "", + "max_gap_seconds": max(acc["gaps"]) if acc["gaps"] else "", + "doc_session_seconds": acc["doc_session_seconds"], + "doc_session_share_of_span": acc["doc_session_seconds"] / active_span + if active_span > 0 + else "", + "write_events": write_events, + "doc_write_share": counts["write_doc"] / write_events if write_events else "", + **{f"{event_type}_events": counts[event_type] for event_type in EVENT_FIELDS}, + "doc_to_code_switches": acc["doc_to_code_switches"], + "code_to_doc_switches": acc["code_to_doc_switches"], + "compile_success_events": acc["compile_success_events"], + "compile_failure_events": acc["compile_failure_events"], + "total_prompt_chars": acc["total_prompt_chars"], + "unique_file_count": len(acc["file_ids"]), + "unique_language_count": len(acc["language_ids"]), + "file_ids": semicolon_join(acc["file_ids"]), + "language_ids": semicolon_join(acc["language_ids"]), + "event_sources": semicolon_join(acc["event_sources"]), + "diff_hunks": acc["diff_hunks"], + "diff_inserted_chars": acc["diff_inserted_chars"], + "diff_deleted_units": acc["diff_deleted_units"], + "doc_block_transactions": acc["doc_block_transactions"], + "doc_block_diff_hunks": acc["doc_block_diff_hunks"], + "doc_block_inserted_chars": acc["doc_block_inserted_chars"], + "doc_block_deleted_units": acc["doc_block_deleted_units"], + "client_server_latency_ms_mean": sum(acc["latencies"]) / len(acc["latencies"]) + if acc["latencies"] + else "", + "client_server_latency_ms_max": max(acc["latencies"]) if acc["latencies"] else "", + "first_sequence_number": sequence_numbers[0] if sequence_numbers else "", + "last_sequence_number": sequence_numbers[-1] if sequence_numbers else "", + "missing_sequence_gaps": missing_sequence_gaps, + "duplicate_event_ids": duplicate_event_ids, + "data_quality_notes": ";".join(notes), + } + return row + + +def session_summary_rows(event_rows: Iterable[dict[str, Any]]) -> list[dict[str, Any]]: + accs: dict[tuple[str, ...], dict[str, Any]] = {} + for row in event_rows: + key = identity_key(row) + acc = accs.setdefault(key, new_session_acc(row)) + update_session_acc(acc, row) + return [ + finalize_session_acc(acc) + for _, acc in sorted(accs.items(), key=lambda item: item[0]) ] - with output_path.open("w", encoding="utf-8", newline="") as output_file: - writer = csv.DictWriter(output_file, fieldnames=fieldnames) - writer.writeheader() - for row in sorted(rows.values(), key=lambda r: (r.user_id, r.session_id)): - writer.writerow( + + +def new_file_acc(row: dict[str, Any], include_file_paths: bool) -> dict[str, Any]: + acc = { + "identity": {field: text_value(row.get(field)) for field in IDENTITY_FIELDS}, + "file_id": text_value(row.get("file_id")), + "file_hash": text_value(row.get("file_hash")), + "language_id": text_value(row.get("language_id")), + "path_privacy": text_value(row.get("path_privacy")), + "counts": Counter(), + "event_count": 0, + "first_dt": None, + "last_dt": None, + "doc_session_seconds": 0.0, + "line_count_first": "", + "line_count_last": "", + "line_count_max": "", + "doc_block_count_before_min": "", + "doc_block_count_after_last": "", + "classification_bases": set(), + "write_sources": set(), + "diff_hunks": 0, + "diff_inserted_chars": 0, + "diff_deleted_units": 0, + "doc_block_transactions": 0, + "doc_block_diff_hunks": 0, + "doc_block_inserted_chars": 0, + "doc_block_deleted_units": 0, + } + if include_file_paths: + acc[RAW_FILE_PATH_FIELD] = text_value(row.get(RAW_FILE_PATH_FIELD)) + return acc + + +def update_file_acc(acc: dict[str, Any], row: dict[str, Any]) -> None: + event_type = text_value(row.get("event_type")) + acc["event_count"] += 1 + acc["counts"][event_type] += 1 + update_time_acc(acc, row) + + if event_type == "doc_session": + duration = float_value(row.get("duration_seconds")) + if duration is not None: + acc["doc_session_seconds"] += duration + + line_count = int_value(row.get("line_count")) + if line_count is not None: + if acc["line_count_first"] == "": + acc["line_count_first"] = line_count + acc["line_count_last"] = line_count + acc["line_count_max"] = max(int_value(acc["line_count_max"]) or 0, line_count) + + before_count = int_value(row.get("doc_block_count_before")) + if before_count is not None: + current_min = int_value(acc["doc_block_count_before_min"]) + acc["doc_block_count_before_min"] = ( + before_count if current_min is None else min(current_min, before_count) + ) + after_count = int_value(row.get("doc_block_count_after")) + if after_count is not None: + acc["doc_block_count_after_last"] = after_count + + if row.get("classification_basis"): + acc["classification_bases"].add(text_value(row.get("classification_basis"))) + if row.get("write_source"): + acc["write_sources"].add(text_value(row.get("write_source"))) + + for field in [ + "diff_hunks", + "diff_inserted_chars", + "diff_deleted_units", + "doc_block_transactions", + "doc_block_diff_hunks", + "doc_block_inserted_chars", + "doc_block_deleted_units", + ]: + acc[field] += int_value(row.get(field)) or 0 + + +def finalize_file_acc(acc: dict[str, Any], include_file_paths: bool) -> dict[str, Any]: + first_dt = acc["first_dt"] + last_dt = acc["last_dt"] + row = { + **acc["identity"], + "file_id": acc["file_id"], + "file_hash": acc["file_hash"], + "language_id": acc["language_id"], + "path_privacy": acc["path_privacy"], + "event_count": acc["event_count"], + "first_event_at": timestamp_for_csv(first_dt), + "last_event_at": timestamp_for_csv(last_dt), + "active_span_seconds": seconds_between(first_dt, last_dt) or 0.0, + "doc_session_seconds": acc["doc_session_seconds"], + "write_doc_events": acc["counts"]["write_doc"], + "write_code_events": acc["counts"]["write_code"], + "save_events": acc["counts"]["save"], + "compile_events": acc["counts"]["compile"], + "compile_end_events": acc["counts"]["compile_end"], + "run_events": acc["counts"]["run"], + "run_end_events": acc["counts"]["run_end"], + "switch_pane_events": acc["counts"]["switch_pane"], + "line_count_first": acc["line_count_first"], + "line_count_last": acc["line_count_last"], + "line_count_max": acc["line_count_max"], + "doc_block_count_before_min": acc["doc_block_count_before_min"], + "doc_block_count_after_last": acc["doc_block_count_after_last"], + "classification_bases": semicolon_join(acc["classification_bases"]), + "write_sources": semicolon_join(acc["write_sources"]), + "diff_hunks": acc["diff_hunks"], + "diff_inserted_chars": acc["diff_inserted_chars"], + "diff_deleted_units": acc["diff_deleted_units"], + "doc_block_transactions": acc["doc_block_transactions"], + "doc_block_diff_hunks": acc["doc_block_diff_hunks"], + "doc_block_inserted_chars": acc["doc_block_inserted_chars"], + "doc_block_deleted_units": acc["doc_block_deleted_units"], + } + if include_file_paths: + row[RAW_FILE_PATH_FIELD] = acc.get(RAW_FILE_PATH_FIELD, "") + return row + + +def file_summary_rows( + event_rows: Iterable[dict[str, Any]], include_file_paths: bool +) -> list[dict[str, Any]]: + accs: dict[tuple[str, ...], dict[str, Any]] = {} + for row in event_rows: + key = ( + *identity_key(row), + text_value(row.get("file_id")), + text_value(row.get("language_id")), + ) + acc = accs.setdefault(key, new_file_acc(row, include_file_paths)) + update_file_acc(acc, row) + return [ + finalize_file_acc(acc, include_file_paths) + for _, acc in sorted(accs.items(), key=lambda item: item[0]) + ] + + +def lifecycle_row( + kind: str, + lifecycle_index: int, + start: dict[str, Any] | None, + end: dict[str, Any] | None, +) -> dict[str, Any]: + source = start or end or {} + start_dt = aware_datetime(parse_timestamp(start.get("timestamp") if start else None)) + end_dt = aware_datetime(parse_timestamp(end.get("timestamp") if end else None)) + notes = [] + if start is None: + notes.append("missing_start") + if end is None: + notes.append("missing_end") + return { + **{field: text_value(source.get(field)) for field in IDENTITY_FIELDS}, + "lifecycle_kind": kind, + "lifecycle_index": lifecycle_index, + "completed": 1 if end is not None else 0, + "start_event_type": text_value(start.get("event_type")) if start else "", + "end_event_type": text_value(end.get("event_type")) if end else "", + "start_at": timestamp_for_csv(start_dt), + "end_at": timestamp_for_csv(end_dt), + "duration_seconds": seconds_between(start_dt, end_dt) + if start_dt is not None and end_dt is not None + else "", + "start_event_id": text_value(start.get("event_id")) if start else "", + "end_event_id": text_value(end.get("event_id")) if end else "", + "start_file_id": text_value(start.get("file_id")) if start else "", + "end_file_id": text_value(end.get("file_id")) if end else "", + "language_id": text_value(source.get("language_id")), + "command": text_value(source.get("command")), + "data_quality_notes": ";".join(notes), + } + + +def task_lifecycle_rows(event_rows: Iterable[dict[str, Any]]) -> list[dict[str, Any]]: + starts: dict[tuple[tuple[str, ...], str], deque[dict[str, Any]]] = defaultdict(deque) + lifecycle_indexes: Counter[tuple[tuple[str, ...], str]] = Counter() + rows: list[dict[str, Any]] = [] + + for row in event_rows: + event_type = text_value(row.get("event_type")) + if event_type in LIFECYCLE_PAIRS: + kind, _ = LIFECYCLE_PAIRS[event_type] + starts[(identity_key(row), kind)].append(row) + continue + if event_type not in LIFECYCLE_END_TYPES: + continue + + kind, _start_type = LIFECYCLE_END_TYPES[event_type] + key = (identity_key(row), kind) + start = starts[key].popleft() if starts[key] else None + lifecycle_indexes[key] += 1 + rows.append(lifecycle_row(kind, lifecycle_indexes[key], start, row)) + + for key, queue in sorted(starts.items(), key=lambda item: item[0]): + identity, kind = key + while queue: + start = queue.popleft() + lifecycle_indexes[(identity, kind)] += 1 + rows.append(lifecycle_row(kind, lifecycle_indexes[(identity, kind)], start, None)) + + rows.sort( + key=lambda row: ( + row["user_id"], + row["assignment_id"], + row["session_id"], + row["task_id"], + row["lifecycle_kind"], + row["lifecycle_index"], + ) + ) + return rows + + +def data_dictionary_rows(fieldsets: dict[str, list[str]]) -> list[dict[str, str]]: + rows = [] + for dataset, fields in fieldsets.items(): + for field in fields: + rows.append( { - "user_id": row.user_id, - "assignment_id": row.assignment_id, - "group_id": row.group_id, - "session_id": row.session_id, - "condition": row.condition, - "course_id": row.course_id, - "task_id": row.task_id, - "event_count": row.event_count, - "first_event_at": row.first_event_at, - "last_event_at": row.last_event_at, - "doc_session_seconds": f"{row.doc_session_seconds:.3f}", - **{ - f"{event_type}_events": row.counts[event_type] - for event_type in EVENT_FIELDS - }, + "dataset": dataset, + "column": field, + "description": FIELD_DESCRIPTIONS.get(field, ""), } ) + return rows + + +def export_metrics(event_rows: list[dict[str, Any]], output_path: Path) -> None: + write_csv(output_path, SESSION_SUMMARY_FIELDS, session_summary_rows(event_rows)) + + +def export_analysis_dataset( + event_rows: list[dict[str, Any]], dataset_dir: Path, include_file_paths: bool +) -> list[Path]: + dataset_dir.mkdir(parents=True, exist_ok=True) + event_fields = fields_with_optional_file_path(EVENT_ROW_FIELDS, include_file_paths) + file_fields = fields_with_optional_file_path(FILE_SUMMARY_FIELDS, include_file_paths) + fieldsets = { + "events.csv": event_fields, + "session_summary.csv": SESSION_SUMMARY_FIELDS, + "file_summary.csv": file_fields, + "task_lifecycle.csv": TASK_LIFECYCLE_FIELDS, + } + outputs = [ + dataset_dir / "events.csv", + dataset_dir / "session_summary.csv", + dataset_dir / "file_summary.csv", + dataset_dir / "task_lifecycle.csv", + dataset_dir / "data_dictionary.csv", + ] + write_csv(outputs[0], event_fields, event_rows) + write_csv(outputs[1], SESSION_SUMMARY_FIELDS, session_summary_rows(event_rows)) + write_csv(outputs[2], file_fields, file_summary_rows(event_rows, include_file_paths)) + write_csv(outputs[3], TASK_LIFECYCLE_FIELDS, task_lifecycle_rows(event_rows)) + write_csv( + outputs[4], + ["dataset", "column", "description"], + data_dictionary_rows(fieldsets), + ) + return outputs def main() -> None: @@ -427,7 +1381,31 @@ def main() -> None: "--out", type=Path, default=None, - help="Output CSV file. Defaults to a timestamped capture-metrics-YYYYMMDD-HHMMSS.csv file.", + help=( + "Output session-summary CSV file. Defaults to a timestamped " + "capture-metrics-YYYYMMDD-HHMMSS.csv file when --dataset-dir is omitted." + ), + ) + parser.add_argument( + "--dataset-dir", + nargs="?", + const=Path("__DEFAULT_CAPTURE_ANALYSIS_DIR__"), + type=Path, + default=None, + help=( + "Write a richer analysis dataset directory containing events.csv, " + "session_summary.csv, file_summary.csv, task_lifecycle.csv, and " + "data_dictionary.csv. If no path is supplied, defaults to " + "capture-analysis-YYYYMMDD-HHMMSS." + ), + ) + parser.add_argument( + "--include-file-paths", + action="store_true", + help=( + "Include raw captured file paths in event/file exports. By default, " + "the dataset uses file_id/file_hash only." + ), ) parser.add_argument( "--db", @@ -455,9 +1433,31 @@ def main() -> None: if args.input is not None else iter_db_events(load_db_config(args.config), args.table) ) - output_path = args.out or default_output_path() - export_metrics(events, output_path) - print(f"Wrote {output_path}") + event_rows = normalize_event_rows(events, include_file_paths=args.include_file_paths) + + wrote_outputs = False + if args.out is not None or args.dataset_dir is None: + output_path = args.out or default_output_path() + export_metrics(event_rows, output_path) + print(f"Wrote {output_path}") + wrote_outputs = True + + if args.dataset_dir is not None: + dataset_dir = ( + default_dataset_dir() + if args.dataset_dir == Path("__DEFAULT_CAPTURE_ANALYSIS_DIR__") + else args.dataset_dir + ) + output_paths = export_analysis_dataset( + event_rows, dataset_dir, args.include_file_paths + ) + print(f"Wrote analysis dataset to {dataset_dir}") + for output_path in output_paths: + print(f" {output_path.name}") + wrote_outputs = True + + if not wrote_outputs: + raise SystemExit("No outputs were requested.") def default_output_path() -> Path: @@ -465,5 +1465,10 @@ def default_output_path() -> Path: return Path(f"capture-metrics-{timestamp}.csv") +def default_dataset_dir() -> Path: + timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S") + return Path(f"capture-analysis-{timestamp}") + + if __name__ == "__main__": main() diff --git a/server/src/capture.rs b/server/src/capture.rs index 0b317038..e461be64 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -34,28 +34,28 @@ // Database schema // ---------------------------------------------------------------------------- // -// The following SQL statement creates the `events` table used by this module: +// The canonical schema and migration DDL lives in +// `server/scripts/capture_events_schema.sql`. The important analysis columns +// are: // // ```sql -// CREATE TABLE events ( -// id SERIAL PRIMARY KEY, -// user_id TEXT NOT NULL, -// assignment_id TEXT, -// group_id TEXT, -// file_path TEXT, -// event_type TEXT NOT NULL, -// timestamp TEXT NOT NULL, -// data TEXT -// ); +// event_id, sequence_number, schema_version, +// user_id, assignment_id, group_id, condition, course_id, task_id, session_id, +// event_source, language_id, file_hash, file_path, path_privacy, capture_mode, +// event_type, timestamp, client_timestamp_ms, client_tz_offset_min, +// server_timestamp_ms, data // ``` // // * `user_id` – participant identifier (student id, pseudonym, etc.). -// * `assignment_id` – logical assignment / lab identifier. -// * `group_id` – optional grouping (treatment / comparison, section). +// * `assignment_id`, `task_id` – logical assignment / task identifiers. +// * `group_id`, `condition`, `course_id` – study grouping metadata. +// * `session_id`, `event_id`, `sequence_number`, `schema_version` – event +// integrity and versioning metadata. // * `file_path` – logical path of the file being edited. +// * `file_hash` – privacy-preserving SHA-256 hash of the file path. // * `event_type` – coarse event type (see `event_type` constants below). // * `timestamp` – RFC3339 timestamp (in UTC). -// * `data` – JSON payload with event-specific details. +// * `data` – JSONB payload with event-specific details. use std::{ env, @@ -562,13 +562,127 @@ fn log_pg_connect_error(context: &str, err: &tokio_postgres::Error) { error!("{context}: {err}"); } +fn capture_data_str(data: &serde_json::Value, names: &[&str]) -> Option { + names.iter().find_map(|name| { + data.get(*name) + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) + }) +} + +fn capture_data_i64(data: &serde_json::Value, name: &str) -> Option { + let value = data.get(name)?; + value + .as_i64() + .or_else(|| value.as_str()?.trim().parse::().ok()) +} + +fn capture_data_i32(data: &serde_json::Value, name: &str) -> Option { + capture_data_i64(data, name).and_then(|value| i32::try_from(value).ok()) +} + +fn should_retry_legacy_insert(err: &tokio_postgres::Error) -> bool { + matches!( + err.code().map(|code| code.code()), + Some("42703" | "42P01" | "42804") + ) +} + /// Insert a single event into the `events` table. async fn insert_event(client: &Client, event: &CaptureEvent) -> Result { + match insert_rich_event(client, event).await { + Ok(rows) => Ok(rows), + Err(err) if should_retry_legacy_insert(&err) => { + warn!( + "Capture: rich events insert failed against the current schema; retrying legacy insert: {err}" + ); + insert_legacy_event(client, event).await + } + Err(err) => Err(err), + } +} + +async fn insert_rich_event( + client: &Client, + event: &CaptureEvent, +) -> Result { let timestamp = event.timestamp.to_rfc3339(); + let event_id = capture_data_str(&event.data, &["event_id"]); + let sequence_number = capture_data_i64(&event.data, "sequence_number"); + let schema_version = capture_data_i32(&event.data, "schema_version"); + let condition = capture_data_str(&event.data, &["condition"]); + let course_id = capture_data_str(&event.data, &["course_id"]); + let task_id = capture_data_str(&event.data, &["task_id"]); + let session_id = capture_data_str(&event.data, &["session_id"]); + let event_source = capture_data_str(&event.data, &["event_source"]); + let language_id = capture_data_str(&event.data, &["language_id", "languageId"]); + let file_hash = capture_data_str(&event.data, &["file_hash"]); + let path_privacy = capture_data_str(&event.data, &["path_privacy"]); + let capture_mode = capture_data_str(&event.data, &["capture_mode"]); + let client_timestamp_ms = capture_data_i64(&event.data, "client_timestamp_ms"); + let client_tz_offset_min = capture_data_i32(&event.data, "client_tz_offset_min"); + let server_timestamp_ms = capture_data_i64(&event.data, "server_timestamp_ms") + .unwrap_or_else(|| event.timestamp.timestamp_millis()); let data_text = event.data.to_string(); debug!( - "Capture: executing INSERT for user_id={}, event_type={}, timestamp={}", + "Capture: executing rich INSERT for user_id={}, event_type={}, timestamp={}", + event.user_id, event.event_type, timestamp + ); + + client + .execute( + "INSERT INTO events \ + (event_id, sequence_number, schema_version, \ + user_id, assignment_id, group_id, condition, course_id, task_id, session_id, \ + event_source, language_id, file_hash, file_path, path_privacy, capture_mode, \ + event_type, timestamp, client_timestamp_ms, client_tz_offset_min, \ + server_timestamp_ms, data) \ + VALUES \ + ($1, $2, $3, \ + $4, $5, $6, $7, $8, $9, $10, \ + $11, $12, $13, $14, $15, $16, \ + $17, $18::timestamptz, $19, $20, \ + $21, $22::jsonb)", + &[ + &event_id, + &sequence_number, + &schema_version, + &event.user_id, + &event.assignment_id, + &event.group_id, + &condition, + &course_id, + &task_id, + &session_id, + &event_source, + &language_id, + &file_hash, + &event.file_path, + &path_privacy, + &capture_mode, + &event.event_type, + ×tamp, + &client_timestamp_ms, + &client_tz_offset_min, + &server_timestamp_ms, + &data_text, + ], + ) + .await +} + +async fn insert_legacy_event( + client: &Client, + event: &CaptureEvent, +) -> Result { + let timestamp = event.timestamp.to_rfc3339(); + let data_text = event.data.to_string(); + + debug!( + "Capture: executing legacy INSERT for user_id={}, event_type={}, timestamp={}", event.user_id, event.event_type, timestamp ); @@ -666,6 +780,29 @@ mod tests { assert!(ev.timestamp <= after); } + #[test] + fn capture_metadata_helpers_extract_typed_values() { + let data = json!({ + "event_id": "abc-123", + "sequence_number": "42", + "schema_version": 2, + "languageId": "rust", + "client_tz_offset_min": "-360" + }); + + assert_eq!( + capture_data_str(&data, &["language_id", "languageId"]).as_deref(), + Some("rust") + ); + assert_eq!( + capture_data_str(&data, &["event_id"]).as_deref(), + Some("abc-123") + ); + assert_eq!(capture_data_i64(&data, "sequence_number"), Some(42)); + assert_eq!(capture_data_i32(&data, "schema_version"), Some(2)); + assert_eq!(capture_data_i32(&data, "client_tz_offset_min"), Some(-360)); + } + #[test] fn capture_config_json_round_trip() { let json_text = r#" @@ -783,13 +920,10 @@ mod tests { INSERT INTO public.events (user_id, assignment_id, group_id, file_path, event_type, timestamp, data) VALUES - ($1, NULL, NULL, NULL, 'test_event', $2, '{"test":true}') + ($1, NULL, NULL, NULL, 'test_event', $2::timestamptz, '{"test":true}'::jsonb) RETURNING id "#, - &[ - &test_user_id, - &format!("{:?}", std::time::SystemTime::now()), - ], + &[&test_user_id, &Utc::now().to_rfc3339()], ) .await?; @@ -824,7 +958,7 @@ mod tests { match client .query_one( r#" - SELECT user_id, assignment_id, group_id, file_path, event_type, data + SELECT user_id, assignment_id, group_id, file_path, event_type, data::text FROM events WHERE user_id = $1 AND event_type = $2 ORDER BY id DESC From c11054606ecf8c579377e7831f2c4aaafb30dc7c Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 15:54:25 -0600 Subject: [PATCH 15/19] Verify rich capture schema writes Update the ignored PostgreSQL integration test to assert the rich events schema columns and fix timestamp/JSONB parameter casts used by the capture insert path. Verified against the AWS PostgreSQL database with event_capture_inserts_rich_schema_event_into_db. --- server/src/capture.rs | 194 +++++++++++++++++++++++++++--------------- 1 file changed, 126 insertions(+), 68 deletions(-) diff --git a/server/src/capture.rs b/server/src/capture.rs index e461be64..5d41d869 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -644,8 +644,8 @@ async fn insert_rich_event( ($1, $2, $3, \ $4, $5, $6, $7, $8, $9, $10, \ $11, $12, $13, $14, $15, $16, \ - $17, $18::timestamptz, $19, $20, \ - $21, $22::jsonb)", + $17, $18::text::timestamptz, $19, $20, \ + $21, $22::text::jsonb)", &[ &event_id, &sequence_number, @@ -690,7 +690,7 @@ async fn insert_legacy_event( .execute( "INSERT INTO events \ (user_id, assignment_id, group_id, file_path, event_type, timestamp, data) \ - VALUES ($1, $2, $3, $4, $5, $6, $7)", + VALUES ($1, $2, $3, $4, $5, $6::text::timestamptz, $7::text::jsonb)", &[ &event.user_id, &event.assignment_id, @@ -836,14 +836,15 @@ mod tests { use std::fs; //use tokio::time::{sleep, Duration}; - /// Integration-style test: verify that EventCapture actually inserts into - /// the DB. + /// Integration-style test: verify that EventCapture inserts into the rich + /// capture schema used by dissertation analysis. /// /// Reads connection parameters from `capture_config.json` in the current /// working directory. Logs the config and connection details via log4rs so /// you can confirm what is used. /// - /// Run this test with: cargo test event\_capture\_inserts\_event\_into\_db + /// Run this test with: + /// cargo test event\_capture\_inserts\_rich_schema\_event\_into\_db /// -- --ignored --nocapture /// /// You must have a PostgreSQL database and a `capture_config.json` file @@ -852,7 +853,8 @@ mod tests { /// "codechat\_capture\_test", "app\_id": "integration-test" } #[tokio::test] #[ignore] - async fn event_capture_inserts_event_into_db() -> Result<(), Box> { + async fn event_capture_inserts_rich_schema_event_into_db() + -> Result<(), Box> { // Initialize logging for this test, using the same log4rs.yml as the // server. If logging is already initialized, this will just return an // error which we ignore. @@ -860,7 +862,10 @@ mod tests { // 1. Load the capture configuration from file. let cfg_text = fs::read_to_string("capture_config.json") - .expect("capture_config.json must exist in project root for this test"); + .or_else(|_| fs::read_to_string("../capture_config.json")) + .expect( + "capture_config.json must exist in the server directory or repo root for this test", + ); let cfg: CaptureConfig = serde_json::from_str(&cfg_text).expect("capture_config.json must be valid JSON"); @@ -883,64 +888,83 @@ mod tests { } }); - // Verify the events table already exists - let row = client - .query_one( - r#" - SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables + let required_columns = [ + "event_id", + "sequence_number", + "schema_version", + "condition", + "course_id", + "task_id", + "session_id", + "event_source", + "language_id", + "file_hash", + "path_privacy", + "capture_mode", + "client_timestamp_ms", + "client_tz_offset_min", + "server_timestamp_ms", + ]; + for column in required_columns { + let row = client + .query_one( + r#" + SELECT data_type + FROM information_schema.columns WHERE table_schema = 'public' - AND table_name = 'events' - ) AS exists - "#, - &[], - ) - .await?; - - let exists: bool = row.get("exists"); - assert!( - exists, - "TEST SETUP ERROR: public.events table does not exist. \ - It must be created by a migration or admin step." - ); - - // Insert a single test row (this is what the app really needs) - let test_user_id = format!( - "TEST_USER_{}", - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_millis() - ); - - let insert_row = client - .query_one( - r#" - INSERT INTO public.events - (user_id, assignment_id, group_id, file_path, event_type, timestamp, data) - VALUES - ($1, NULL, NULL, NULL, 'test_event', $2::timestamptz, '{"test":true}'::jsonb) - RETURNING id - "#, - &[&test_user_id, &Utc::now().to_rfc3339()], - ) - .await?; - - let inserted_id: i32 = insert_row.get("id"); - info!("TEST: inserted event id={}", inserted_id); + AND table_name = 'events' + AND column_name = $1 + "#, + &[&column], + ) + .await + .map_err(|err| { + format!( + "TEST SETUP ERROR: missing public.events.{column}; \ + run server/scripts/capture_events_schema.sql first: {err}" + ) + })?; + let data_type: String = row.get(0); + info!("TEST: public.events.{column} type={data_type}"); + } // 4. Start the EventCapture worker using the loaded config. let capture = EventCapture::new(cfg.clone())?; log::info!("TEST: EventCapture worker started."); - // 5. Log a test event. - let expected_data = json!({ "chars_typed": 123 }); + // 5. Log a schema-v2 test event with all typed analysis metadata. + let test_suffix = Utc::now().timestamp_millis().to_string(); + let expected_event_id = format!("TEST_EVENT_{test_suffix}"); + let expected_user_id = format!("TEST_USER_{test_suffix}"); + let expected_session_id = format!("TEST_SESSION_{test_suffix}"); + let expected_file_hash = format!("TEST_FILE_HASH_{test_suffix}"); + let event_timestamp = Utc::now(); + let expected_server_timestamp_ms = event_timestamp.timestamp_millis(); + let expected_client_timestamp_ms = expected_server_timestamp_ms - 50; + let expected_data = json!({ + "event_id": expected_event_id, + "sequence_number": 42, + "schema_version": 2, + "condition": "treatment", + "course_id": "ece-integration", + "task_id": "capture-schema-test", + "session_id": expected_session_id, + "event_source": "integration_test", + "language_id": "rust", + "file_hash": expected_file_hash, + "path_privacy": "sha256", + "capture_mode": "treatment", + "client_timestamp_ms": expected_client_timestamp_ms, + "client_tz_offset_min": 360, + "server_timestamp_ms": expected_server_timestamp_ms, + "chars_typed": 123, + "classification_basis": "integration_test" + }); let event = CaptureEvent::now( - "test-user".to_string(), - Some("hw1".to_string()), - Some("groupA".to_string()), - Some("/tmp/test.rs".to_string()), + expected_user_id.clone(), + Some("hw-integration".to_string()), + Some("group-integration".to_string()), + None, event_types::WRITE_DOC, expected_data.clone(), ); @@ -958,13 +982,17 @@ mod tests { match client .query_one( r#" - SELECT user_id, assignment_id, group_id, file_path, event_type, data::text + SELECT user_id, assignment_id, group_id, file_path, event_type, + event_id, sequence_number, schema_version, condition, course_id, + task_id, session_id, event_source, language_id, file_hash, + path_privacy, capture_mode, client_timestamp_ms, + client_tz_offset_min, server_timestamp_ms, data::text FROM events - WHERE user_id = $1 AND event_type = $2 + WHERE event_id = $1 ORDER BY id DESC LIMIT 1 "#, - &[&"test-user", &event_types::WRITE_DOC], + &[&expected_event_id], ) .await { @@ -978,19 +1006,49 @@ mod tests { } }; - let user_id: String = row.get(0); + let user_id: String = row.get("user_id"); let assignment_id: Option = row.get(1); let group_id: Option = row.get(2); let file_path: Option = row.get(3); let event_type: String = row.get(4); - let data_text: String = row.get(5); + let event_id: Option = row.get(5); + let sequence_number: Option = row.get(6); + let schema_version: Option = row.get(7); + let condition: Option = row.get(8); + let course_id: Option = row.get(9); + let task_id: Option = row.get(10); + let session_id: Option = row.get(11); + let event_source: Option = row.get(12); + let language_id: Option = row.get(13); + let file_hash: Option = row.get(14); + let path_privacy: Option = row.get(15); + let capture_mode: Option = row.get(16); + let client_timestamp_ms: Option = row.get(17); + let client_tz_offset_min: Option = row.get(18); + let server_timestamp_ms: Option = row.get(19); + let data_text: String = row.get(20); let data_value: serde_json::Value = serde_json::from_str(&data_text)?; - assert_eq!(user_id, "test-user"); - assert_eq!(assignment_id.as_deref(), Some("hw1")); - assert_eq!(group_id.as_deref(), Some("groupA")); - assert_eq!(file_path.as_deref(), Some("/tmp/test.rs")); + assert_eq!(user_id, expected_user_id); + assert_eq!(assignment_id.as_deref(), Some("hw-integration")); + assert_eq!(group_id.as_deref(), Some("group-integration")); + assert!(file_path.is_none()); assert_eq!(event_type, event_types::WRITE_DOC); + assert_eq!(event_id.as_deref(), Some(expected_event_id.as_str())); + assert_eq!(sequence_number, Some(42)); + assert_eq!(schema_version, Some(2)); + assert_eq!(condition.as_deref(), Some("treatment")); + assert_eq!(course_id.as_deref(), Some("ece-integration")); + assert_eq!(task_id.as_deref(), Some("capture-schema-test")); + assert_eq!(session_id.as_deref(), Some(expected_session_id.as_str())); + assert_eq!(event_source.as_deref(), Some("integration_test")); + assert_eq!(language_id.as_deref(), Some("rust")); + assert_eq!(file_hash.as_deref(), Some(expected_file_hash.as_str())); + assert_eq!(path_privacy.as_deref(), Some("sha256")); + assert_eq!(capture_mode.as_deref(), Some("treatment")); + assert_eq!(client_timestamp_ms, Some(expected_client_timestamp_ms)); + assert_eq!(client_tz_offset_min, Some(360)); + assert_eq!(server_timestamp_ms, Some(expected_server_timestamp_ms)); assert_eq!(data_value, expected_data); log::info!("✅ TEST: EventCapture integration test succeeded and wrote to database."); From 980b6eb24a49d97f35bbb40db8d374eda72d584f Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 16:34:43 -0600 Subject: [PATCH 16/19] Stabilize WebDriver message timeout macOS CI occasionally delivered the loadfile acknowledgement just after the old two-second harness timeout. Increase the shared browser message wait to five seconds so test_client does not fail on that timing edge. --- server/tests/overall_common/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/tests/overall_common/mod.rs b/server/tests/overall_common/mod.rs index 894a4e40..f6fe9539 100644 --- a/server/tests/overall_common/mod.rs +++ b/server/tests/overall_common/mod.rs @@ -123,8 +123,9 @@ impl ExpectedMessages { } } -// Time to wait for `ExpectedMessages`. -pub const TIMEOUT: Duration = Duration::from_millis(2000); +// Time to wait for browser/WebDriver-backed client-server messages. macOS CI can +// take a little over two seconds to return loadfile acknowledgements. +pub const TIMEOUT: Duration = Duration::from_millis(5000); // ### Test harness // From f6ccbca13dc741dd02de654f719e65a987992bf1 Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 16:50:10 -0600 Subject: [PATCH 17/19] Match WebDriver test wait to client timeout The first CI rerun passed the original test_client wait but exposed the same timing issue in test_client_updates while waiting for the autosave content update. Use the client response window as the shared browser test wait budget. --- server/tests/overall_common/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/tests/overall_common/mod.rs b/server/tests/overall_common/mod.rs index f6fe9539..49dd644b 100644 --- a/server/tests/overall_common/mod.rs +++ b/server/tests/overall_common/mod.rs @@ -123,9 +123,10 @@ impl ExpectedMessages { } } -// Time to wait for browser/WebDriver-backed client-server messages. macOS CI can -// take a little over two seconds to return loadfile acknowledgements. -pub const TIMEOUT: Duration = Duration::from_millis(5000); +// Time to wait for browser/WebDriver-backed client-server messages. This +// matches the client-side response window and gives CI enough room for autosave +// and loadfile acknowledgements under matrix load. +pub const TIMEOUT: Duration = Duration::from_millis(15000); // ### Test harness // From c7c885b12bd66405188526eeda412d5896a92995 Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 17:10:23 -0600 Subject: [PATCH 18/19] Serialize WebDriver integration tests The overall browser tests share one WebDriver endpoint and were running concurrently inside the same test binary. This was causing test_client_updates to miss its autosave content update on CI, especially macOS/Safari. Guard the harness with a shared async mutex so each browser session runs in isolation. --- server/tests/overall_common/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/tests/overall_common/mod.rs b/server/tests/overall_common/mod.rs index 49dd644b..383a52c5 100644 --- a/server/tests/overall_common/mod.rs +++ b/server/tests/overall_common/mod.rs @@ -128,6 +128,11 @@ impl ExpectedMessages { // and loadfile acknowledgements under matrix load. pub const TIMEOUT: Duration = Duration::from_millis(15000); +// Browser-backed tests share a single WebDriver endpoint. Safari on macOS CI is +// unreliable with overlapping sessions, so serialize the harness. +pub(crate) static WEB_DRIVER_TEST_LOCK: tokio::sync::Mutex<()> = + tokio::sync::Mutex::const_new(()); + // ### Test harness // // A test harness. It runs the webdriver, the Server, opens the Client, then @@ -152,6 +157,7 @@ macro_rules! harness { // The output from calling `prep_test_dir!()`. prep_test_dir: (TempDir, PathBuf), ) -> Result<(), Box> { + let _webdriver_test_lock = $crate::overall_common::WEB_DRIVER_TEST_LOCK.lock().await; let (temp_dir, test_dir) = prep_test_dir; // The logger gets configured by (I think) // `start_webdriver_process`, which delegates to `selenium-manager`. From 686196519635df54b3ec978d2b3e0346de37911a Mon Sep 17 00:00:00 2001 From: "JDS-WORKSTATION\\jspah" <44337821+jspahn80134@users.noreply.github.com> Date: Fri, 8 May 2026 17:15:47 -0600 Subject: [PATCH 19/19] Format WebDriver test lock --- server/tests/overall_common/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/tests/overall_common/mod.rs b/server/tests/overall_common/mod.rs index 383a52c5..9fa25b0e 100644 --- a/server/tests/overall_common/mod.rs +++ b/server/tests/overall_common/mod.rs @@ -130,8 +130,7 @@ pub const TIMEOUT: Duration = Duration::from_millis(15000); // Browser-backed tests share a single WebDriver endpoint. Safari on macOS CI is // unreliable with overlapping sessions, so serialize the harness. -pub(crate) static WEB_DRIVER_TEST_LOCK: tokio::sync::Mutex<()> = - tokio::sync::Mutex::const_new(()); +pub(crate) static WEB_DRIVER_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(()); // ### Test harness //