diff --git a/examples/advanced_training.py b/examples/advanced_training.py index 82b5364..fca9559 100644 --- a/examples/advanced_training.py +++ b/examples/advanced_training.py @@ -122,6 +122,7 @@ def main(): patience_early_stopping=5, num_workers=0, trainer_params={"deterministic": True}, + raw_labels=False, # no encoding needed, labels are already integers ) classifier.train( @@ -162,6 +163,8 @@ def main(): patience_early_stopping=7, num_workers=0, trainer_params=advanced_trainer_params, + raw_labels=False, # no encoding needed, labels are already integers + ) advanced_classifier.train( @@ -196,6 +199,8 @@ def main(): patience_early_stopping=3, num_workers=0, # No multiprocessing for CPU trainer_params={"deterministic": True, "accelerator": "cpu"}, + raw_labels=False, # no encoding needed, labels are already integers + ) cpu_classifier.train( @@ -225,7 +230,6 @@ def main(): "max_epochs": 25, "enable_progress_bar": True, "log_every_n_steps": 1, - "check_val_every_n_epoch": 2, # Validate every 2 epochs "enable_checkpointing": True, "enable_model_summary": True, "deterministic": True, @@ -238,6 +242,7 @@ def main(): patience_early_stopping=8, num_workers=0, trainer_params=custom_trainer_params, + raw_labels=False, # no encoding needed, labels are already integers ) custom_classifier.train( diff --git a/examples/basic_classification.py b/examples/basic_classification.py index 9da2362..a73718b 100644 --- a/examples/basic_classification.py +++ b/examples/basic_classification.py @@ -125,6 +125,7 @@ def main(): lr=1e-3, patience_early_stopping=5, num_workers=0, # Use 0 for simple examples to avoid multiprocessing issues + raw_labels=False # no encoding needed, labels are already integers ) classifier.train( X_train, y_train, training_config=training_config, X_val=X_val, y_val=y_val, verbose=True diff --git a/examples/multiclass_classification.py b/examples/multiclass_classification.py index e50063f..20a007c 100644 --- a/examples/multiclass_classification.py +++ b/examples/multiclass_classification.py @@ -142,6 +142,7 @@ def main(): patience_early_stopping=7, num_workers=0, trainer_params={"deterministic": True}, + raw_labels=False, # no encoding needed, labels are already integers ) classifier.train( X_train, y_train, training_config=training_config, X_val=X_val, y_val=y_val, verbose=True diff --git a/examples/simple_explainability_example.py b/examples/simple_explainability_example.py index 1febde1..df8464b 100644 --- a/examples/simple_explainability_example.py +++ b/examples/simple_explainability_example.py @@ -150,6 +150,7 @@ def main(): patience_early_stopping=5, num_workers=0, trainer_params={"deterministic": True}, + raw_labels=False # no encoding needed, labels are already integers ) classifier.train( X_train, y_train, training_config=training_config, X_val=X_val, y_val=y_val, verbose=True @@ -279,7 +280,6 @@ def main(): # Extract attributions and mapping info attributions = result["attributions"][0][0] # shape: (seq_len,) offset_mapping = result["offset_mapping"][0] # List of (start, end) tuples - word_ids = result["word_ids"][0] # List of word IDs for each token # Map token-level attributions to character-level (for ASCII visualization) char_attributions = map_attributions_to_char( diff --git a/examples/using_additional_features.py b/examples/using_additional_features.py index c740510..520d508 100644 --- a/examples/using_additional_features.py +++ b/examples/using_additional_features.py @@ -156,6 +156,8 @@ def train_and_evaluate_model(X, y, model_name, use_categorical=False, use_simple patience_early_stopping=3, num_workers=0, trainer_params={"enable_progress_bar": True, "deterministic": True}, + raw_labels=False, # no encoding needed, labels are already integers + raw_categorical_inputs=False, # no encoding needed, categorical inputs are already integers ) # Create and build model @@ -172,7 +174,7 @@ def train_and_evaluate_model(X, y, model_name, use_categorical=False, use_simple if use_categorical: print(" ✅ Running validation for text-with-categorical-variables model...") try: - result = classifier.predict(X_test) + result = classifier.predict(X_test, raw_categorical_inputs=False) predictions = result["prediction"].squeeze().numpy() test_accuracy = (predictions == y_test).mean() print(f" Test accuracy: {test_accuracy:.3f}") diff --git a/torchTextClassifiers/torchTextClassifiers.py b/torchTextClassifiers/torchTextClassifiers.py index 3b05477..65949f8 100644 --- a/torchTextClassifiers/torchTextClassifiers.py +++ b/torchTextClassifiers/torchTextClassifiers.py @@ -375,9 +375,10 @@ def train( if y_val is not None: assert X_val is not None, "X_val must be provided if y_val is provided." - X_val: Optional[Dict[str, Any]] = None + X_val_checked: Optional[Dict[str, Any]] = None if X_val is not None and y_val is not None: - X_val, y_val = self._check_XY(X_val, y_val) + X_val_checked, y_val = self._check_XY(X_val, y_val, training_config.raw_categorical_inputs, training_config.raw_labels) + X_val = X_val_checked if ( (X_train["categorical_variables"] is not None)