%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

import json
import logging
import os
import sys
import warnings
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import tensorflow as tf
import xgboost as xgb
from IPython.display import Image, Markdown, display
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    auc,
    average_precision_score,
    confusion_matrix,
    f1_score,
    precision_recall_curve,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import (
    GridSearchCV,
    train_test_split,
)
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau,
)
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1_l2

from windows_malware_classifier.visualization.models_plots import (
    evaluate_model_performance,
    plot_model_feature_importance,
)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)
warnings.filterwarnings("ignore")

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)],
)

logger = logging.getLogger(__name__)

# This logger should be for models_plots, not modeling
module_logger = logging.getLogger(
    "windows_malware_classifier.visualization.models_plots"
)
module_logger.setLevel(logging.INFO)

train_df = pd.read_parquet("../data/engineered/train_df_engineered.parquet")
test_df = pd.read_parquet("../data/engineered/test_df_engineered.parquet")

logger.info(f"Training set shape: {train_df.shape}")
logger.info(f"Test set shape: {test_df.shape}")

2025-05-18 17:35:34,542 - __main__ - INFO - Training set shape: (18952, 486)
2025-05-18 17:35:34,542 - __main__ - INFO - Test set shape: (4716, 486)

logger.info(
    f"Original class distribution in training set: {dict(train_df['is_malicious'].value_counts(normalize=True) * 100)}"
)
logger.info(
    f"Original class distribution in test set: {dict(test_df['is_malicious'].value_counts(normalize=True) * 100)}"
)

combined_df = pd.concat([train_df, test_df])
x_combined = combined_df.drop("is_malicious", axis=1)
y_combined = combined_df["is_malicious"]

x_train, x_temp, y_train, y_temp = train_test_split(
    x_combined, y_combined, test_size=0.4, random_state=RANDOM_SEED, stratify=y_combined
)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=RANDOM_SEED, stratify=y_temp
)

logger.info(
    f"Class distribution after 3-way split - Training: {dict(pd.Series(y_train).value_counts(normalize=True) * 100)}, Validation: {dict(pd.Series(y_val).value_counts(normalize=True) * 100)}, Test: {dict(pd.Series(y_test).value_counts(normalize=True) * 100)}"
)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_val_scaled = scaler.transform(x_val)
x_test_scaled = scaler.transform(x_test)

logger.info(
    f"Features shape after scaling - Training: {x_train_scaled.shape}, Validation: {x_val_scaled.shape}, Test: {x_test_scaled.shape}"
)

class_weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}
logger.info(f"Class weights for handling class imbalance: {class_weight_dict}")

2025-05-18 17:35:34,650 - __main__ - INFO - Original class distribution in training set: {1.0: 61.924862811312785, 0.0: 38.07513718868721}
2025-05-18 17:35:34,651 - __main__ - INFO - Original class distribution in test set: {1.0: 61.57760814249363, 0.0: 38.42239185750636}
2025-05-18 17:35:34,797 - __main__ - INFO - Class distribution after 3-way split - Training: {1.0: 61.859154929577464, 0.0: 38.140845070422536}, Validation: {1.0: 61.85044359949303, 0.0: 38.149556400506974}, Test: {1.0: 61.85044359949303, 0.0: 38.149556400506974}
2025-05-18 17:35:34,887 - __main__ - INFO - Features shape after scaling - Training: (14200, 485), Validation: (4734, 485), Test: (4734, 485)
2025-05-18 17:35:34,889 - __main__ - INFO - Class weights for handling class imbalance: {0: 1.3109305760709011, 1: 0.808287795992714}

dummy_clf = DummyClassifier(strategy="stratified", random_state=RANDOM_SEED)
dummy_clf.fit(x_train_scaled, y_train)

y_dummy_pred = dummy_clf.predict(x_test_scaled)
y_dummy_prob = dummy_clf.predict_proba(x_test_scaled)[:, 1]

dummy_results = evaluate_model_performance(
    y_test, y_dummy_pred, y_dummy_prob, "Dummy Classifier (Random Baseline)"
)

2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - Dummy Classifier (Random Baseline) Performance Metrics:
2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.5241
2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.6153 (indicates false positive control)
2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.6151
2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.6152
2025-05-18 17:35:35,093 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.6235
2025-05-18 17:35:35,094 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.6151
2025-05-18 17:35:35,094 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.4958

rf_base = RandomForestClassifier(
    class_weight="balanced",
    random_state=RANDOM_SEED,
    n_jobs=-1,
)

param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [8, 10, 12],
    "min_samples_split": [10, 15],
    "min_samples_leaf": [4, 5],
}

train_idx = np.arange(len(x_train_scaled))
val_idx = np.arange(len(x_train_scaled), len(x_train_scaled) + len(x_val_scaled))
manual_split = [(train_idx, val_idx)]

x_train_val = np.vstack((x_train_scaled, x_val_scaled))
y_train_val = np.concatenate((y_train, y_val))

logger.info("Starting Random Forest grid search...")
rf_grid = GridSearchCV(
    rf_base,
    param_grid,
    cv=manual_split,
    scoring="precision",
    n_jobs=-1,
    verbose=0,
)

rf_grid.fit(x_train_val, y_train_val)

rf_clf = rf_grid.best_estimator_
logger.info(f"Best Random Forest parameters: {rf_grid.best_params_}")

y_rf_pred = rf_clf.predict(x_test_scaled)
y_rf_prob = rf_clf.predict_proba(x_test_scaled)[:, 1]

rf_results = {
    "model": "Random Forest (Grid Search)",
    "accuracy": accuracy_score(y_test, y_rf_pred),
    "precision": precision_score(y_test, y_rf_pred),
    "recall": recall_score(y_test, y_rf_pred),
    "f1": f1_score(y_test, y_rf_pred),
}

cm = confusion_matrix(y_test, y_rf_pred)
rf_tn, rf_fp, rf_fn, rf_tp = cm.ravel()
rf_results.update(
    {
        "true_negatives": rf_tn,
        "false_positives": rf_fp,
        "false_negatives": rf_fn,
        "true_positives": rf_tp,
        "fpr": rf_fp / (rf_fp + rf_tn),
        "tpr": rf_tp / (rf_tp + rf_fn),
    }
)

if y_rf_prob is not None:
    rf_results["auc"] = roc_auc_score(y_test, y_rf_prob)

xgb_clf = xgb.XGBClassifier(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.1,
    objective="binary:logistic",
    scale_pos_weight=class_weight_dict[1] / class_weight_dict[0],
    random_state=RANDOM_SEED,
    n_jobs=-1,
)

logger.info("Training XGBoost with early stopping...")
xgb_clf.fit(
    x_train_scaled,
    y_train,
    eval_set=[(x_train_scaled, y_train), (x_val_scaled, y_val)],
    verbose=False,
)

logger.info("XGBoost training completed with full iterations")

y_xgb_val_pred = xgb_clf.predict(x_val_scaled)
y_xgb_val_prob = xgb_clf.predict_proba(x_val_scaled)[:, 1]

2025-05-18 17:35:35,229 - __main__ - INFO - Starting Random Forest grid search...
2025-05-18 17:35:54,378 - __main__ - INFO - Best Random Forest parameters: {'max_depth': 12, 'min_samples_leaf': 5, 'min_samples_split': 15, 'n_estimators': 100}
2025-05-18 17:35:54,431 - __main__ - INFO - Training XGBoost with early stopping...
2025-05-18 17:35:59,037 - __main__ - INFO - XGBoost training completed with full iterations

xgb_val_results = evaluate_model_performance(
    y_val, y_xgb_val_pred, y_xgb_val_prob, "XGBoost (Validation)"
)

y_xgb_pred = xgb_clf.predict(x_test_scaled)
y_xgb_prob = xgb_clf.predict_proba(x_test_scaled)[:, 1]

xgb_results = evaluate_model_performance(
    y_test,
    y_xgb_pred,
    y_xgb_prob,
    "XGBoost",
    compare_with=[rf_results],
    save_path="../images/modeling/xgb_rf_evaluation.png",
)

fig = plot_model_feature_importance(
    xgb_clf,
    max_features=20,
    model_name="XGBoost",
    compare_with=[{"model": rf_clf, "name": "Random Forest"}],
    save_path="../images/modeling/xgb_rf_feature_importance.png",
)

2025-05-18 17:35:59,384 - windows_malware_classifier.visualization.models_plots - INFO - XGBoost (Validation) Performance Metrics:
2025-05-18 17:35:59,384 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.9715
2025-05-18 17:35:59,384 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.9804 (indicates false positive control)
2025-05-18 17:35:59,385 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.9734
2025-05-18 17:35:59,385 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.9769
2025-05-18 17:35:59,385 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.0316
2025-05-18 17:35:59,385 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.9734
2025-05-18 17:35:59,386 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.9973
2025-05-18 17:35:59,470 - windows_malware_classifier.visualization.models_plots - INFO - XGBoost Performance Metrics:
2025-05-18 17:35:59,470 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.9793
2025-05-18 17:35:59,471 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.9849 (indicates false positive control)
2025-05-18 17:35:59,471 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.9816
2025-05-18 17:35:59,472 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.9832
2025-05-18 17:35:59,472 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.0244
2025-05-18 17:35:59,472 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.9816
2025-05-18 17:35:59,472 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.9978

baseline_comparison = pd.DataFrame(
    [
        {
            "Model": "Dummy Classifier",
            "Accuracy": dummy_results["accuracy"],
            "Precision": dummy_results["precision"],
            "Recall": dummy_results["recall"],
            "F1": dummy_results["f1"],
            "FPR": dummy_results["fpr"],
        },
        {
            "Model": "Random Forest",
            "Accuracy": rf_results["accuracy"],
            "Precision": rf_results["precision"],
            "Recall": rf_results["recall"],
            "F1": rf_results["f1"],
            "FPR": rf_results["fpr"],
        },
        {
            "Model": "XGBoost",
            "Accuracy": xgb_results["accuracy"],
            "Precision": xgb_results["precision"],
            "Recall": xgb_results["recall"],
            "F1": xgb_results["f1"],
            "FPR": xgb_results["fpr"],
        },
    ]
)

display(Markdown("### Baseline Models Summary:"))
display(baseline_comparison)

Image(filename="../images/modeling/xgb_rf_evaluation.png")

fig = plot_model_feature_importance(
    xgb_clf,
    max_features=20,
    model_name="XGBoost",
    compare_with=[{"model": rf_clf, "name": "Random Forest"}],
    save_path="../images/modeling/xgb_rf_feature_importance.png",
)

Image(filename="../images/modeling/xgb_rf_feature_importance.png")

top_features = (
    pd.DataFrame(
        {"Feature": x_train.columns, "Importance": xgb_clf.feature_importances_}
    )
    .sort_values("Importance", ascending=False)
    .head(20)
)

logger.info("Top 20 features by importance:")
for _, row in top_features.iterrows():
    logger.info(f"  {row['Feature']}: {row['Importance']:.4f}")

2025-05-18 17:37:01,073 - __main__ - INFO - Top 20 features by importance:
2025-05-18 17:37:01,074 - __main__ - INFO -   avg_string_len * image_base: 0.5538
2025-05-18 17:37:01,074 - __main__ - INFO -   subsystem: 0.0721
2025-05-18 17:37:01,074 - __main__ - INFO -   is_exe: 0.0335
2025-05-18 17:37:01,075 - __main__ - INFO -   1 / image_base: 0.0268
2025-05-18 17:37:01,075 - __main__ - INFO -   version_composite: 0.0157
2025-05-18 17:37:01,075 - __main__ - INFO -   machine_type: 0.0116
2025-05-18 17:37:01,076 - __main__ - INFO -   major_os_version: 0.0103
2025-05-18 17:37:01,076 - __main__ - INFO -   image_base + is_signature_clean: 0.0089
2025-05-18 17:37:01,076 - __main__ - INFO -   section_2_chars: 0.0080
2025-05-18 17:37:01,077 - __main__ - INFO -   section_0_size: 0.0070
2025-05-18 17:37:01,078 - __main__ - INFO -   avg_string_len + num_sections: 0.0066
2025-05-18 17:37:01,078 - __main__ - INFO -   1 / avg_string_len: 0.0064
2025-05-18 17:37:01,079 - __main__ - INFO -   avg_string_len * num_sections: 0.0054
2025-05-18 17:37:01,079 - __main__ - INFO -   entry_point: 0.0051
2025-05-18 17:37:01,079 - __main__ - INFO -   num_urls: 0.0049
2025-05-18 17:37:01,079 - __main__ - INFO -   num_file_paths: 0.0048
2025-05-18 17:37:01,080 - __main__ - INFO -   has_debug: 0.0047
2025-05-18 17:37:01,080 - __main__ - INFO -   num_exports: 0.0045
2025-05-18 17:37:01,081 - __main__ - INFO -   dll_characteristics: 0.0044
2025-05-18 17:37:01,081 - __main__ - INFO -   1 / num_sections: 0.0043

def extract_feature_interactions(xgb_model, feature_names_list, max_interactions=30):
    """Extract important feature interactions from XGBoost model.

    Args:
        xgb_model: Trained XGBoost model
        feature_names_list: List of feature names
        max_interactions: Maximum number of interactions to return

    Returns:
        List of tuples containing (feature_idx1, feature_idx2) and interaction count
    """
    booster = xgb_model.get_booster()

    trees = booster.get_dump()

    interaction_counts = {}

    for tree_str in trees:
        lines = tree_str.strip().split("\n")

        nodes = {}
        for line in lines:
            if ":leaf=" in line:
                continue

            parts = line.split(":")
            node_id = int(parts[0])

            import re

            match = re.search(r"f(\d+)<", parts[1])
            if match:
                feature_idx = int(match.group(1))

                nodes[node_id] = feature_idx

        for feat_i in nodes.values():
            for feat_j in nodes.values():
                if feat_i < feat_j:
                    pair = (feat_i, feat_j)
                    interaction_counts[pair] = interaction_counts.get(pair, 0) + 1

    sorted_interactions = sorted(
        interaction_counts.items(), key=lambda x: x[1], reverse=True
    )

    top_interactions = sorted_interactions[:max_interactions]

    print(f"Top {min(5, len(top_interactions))} feature interactions:")
    for (f1, f2), count in top_interactions[:5]:
        f1_name = (
            feature_names_list[f1] if f1 < len(feature_names_list) else f"Feature_{f1}"
        )
        f2_name = (
            feature_names_list[f2] if f2 < len(feature_names_list) else f"Feature_{f2}"
        )
        print(f"  {f1_name} × {f2_name}: {count} occurrences")

    return top_interactions

def create_interaction_features(x_train_data, x_test_data, interactions_list):
    """Create new features based on interactions."""
    x_train_enhanced = x_train_data.copy()
    x_test_enhanced = x_test_data.copy()

    added_count = 0
    for (f1, f2), _ in interactions_list:
        if f1 >= x_train_data.shape[1] or f2 >= x_train_data.shape[1]:
            continue

        new_feature_train = x_train_data[:, f1] * x_train_data[:, f2]
        x_train_enhanced = np.column_stack([x_train_enhanced, new_feature_train])

        new_feature_test = x_test_data[:, f1] * x_test_data[:, f2]
        x_test_enhanced = np.column_stack([x_test_enhanced, new_feature_test])
        added_count += 1

    print(f"Added {added_count} interaction features")
    print(f"New feature dimensions: {x_train_enhanced.shape}")

    return x_train_enhanced, x_test_enhanced

column_names = x_train.columns
detected_interactions = extract_feature_interactions(xgb_clf, column_names)

x_train_with_interactions, x_test_with_interactions = create_interaction_features(
    x_train_scaled, x_test_scaled, detected_interactions
)

interaction_scaler = StandardScaler()
interaction_features = x_train_with_interactions[:, x_train_scaled.shape[1] :]
x_train_with_interactions[:, x_train_scaled.shape[1] :] = (
    interaction_scaler.fit_transform(interaction_features)
)
x_test_with_interactions[:, x_test_scaled.shape[1] :] = interaction_scaler.transform(
    x_test_with_interactions[:, x_test_scaled.shape[1] :]
)

x_train_scaled = x_train_with_interactions
x_test_scaled = x_test_with_interactions

Top 5 feature interactions:
  num_urls × timestamp_year: 518 occurrences
  num_urls × string_density: 394 occurrences
  timestamp_year × string_density: 358 occurrences
  timestamp_hour × timestamp_year: 334 occurrences
  timestamp_hour × num_urls: 329 occurrences
Added 30 interaction features
New feature dimensions: (14200, 515)

def focal_loss(gamma=2.0, alpha=0.75):
    """Focal Loss implementation for better handling of hard-to-detect malware.

    Args:
        gamma: Focusing parameter (higher values focus more on hard examples)
        alpha: Class balancing parameter (higher values focus more on malware)

    Returns:
        A loss function to use in model compilation
    """

    def loss_function(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)

        bce = -(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))

        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        focal_weight = tf.pow(1 - pt, gamma)

        class_weight = y_true * alpha + (1 - y_true) * (1 - alpha)

        weighted_loss = focal_weight * class_weight * bce

        return tf.reduce_mean(weighted_loss)

    return loss_function


def build_malware_detection_nn(
    input_dimension,
    learning_rate=0.001,
    hidden_layers_scale=(1.0, 0.5, 0.25),
    dropout_rates=(0.4, 0.3, 0.2, 0.1),
    fp_weight=2.5,
):
    """Build a neural network model for malware detection.

    Creates a neural network with architecture matched to feature dimensions,
    including regularization and a custom loss function to penalize false positives.

    Args:
        input_dimension: Number of input features.
        learning_rate: Learning rate for the optimizer.
        hidden_layers_scale: Scaling factors for hidden layer sizes relative to input_dimension.
        dropout_rates: Dropout rates for each layer.
        fp_weight: False positive weight for the loss function.

    Returns:
        A compiled tf.keras.Model for malware detection.
    """
    layer_sizes = [int(input_dimension * scale) for scale in hidden_layers_scale]

    if len(dropout_rates) > len(layer_sizes) + 1:
        dropout_rates = dropout_rates[: len(layer_sizes) + 1]

    model = Sequential()

    model.add(
        Dense(
            layer_sizes[0],
            activation="relu",
            input_shape=(input_dimension,),
            kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
        )
    )
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rates[0]))

    for layer_idx in range(1, len(layer_sizes)):
        model.add(
            Dense(
                layer_sizes[layer_idx],
                activation="relu",
                kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
            )
        )
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rates[layer_idx]))

    model.add(Dense(1, activation="sigmoid"))

    # Using the fp_weight parameter in the loss function
    def custom_weighted_focal_loss(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)

        bce = -(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))

        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        focal_weight = tf.pow(1 - pt, 2.0)

        weights = y_true + fp_weight * (1.0 - y_true)
        weighted_loss = weights * focal_weight * bce

        return tf.reduce_mean(weighted_loss)

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss=custom_weighted_focal_loss,
        metrics=[
            "accuracy",
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc"),
        ],
    )

    return model

input_dimensions = x_train_scaled.shape[1]
nn_model = build_malware_detection_nn(input_dimensions)

nn_model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense_7 (Dense)                 │ (None, 515)            │       265,740 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_6           │ (None, 515)            │         2,060 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_5 (Dropout)             │ (None, 515)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_8 (Dense)                 │ (None, 257)            │       132,612 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_7           │ (None, 257)            │         1,028 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_6 (Dropout)             │ (None, 257)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_9 (Dense)                 │ (None, 128)            │        33,024 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_8           │ (None, 128)            │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_7 (Dropout)             │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_10 (Dense)                │ (None, 1)              │           129 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 435,105 (1.66 MB)

 Trainable params: 433,305 (1.65 MB)

 Non-trainable params: 1,800 (7.03 KB)

class FalsePositiveMonitor(tf.keras.callbacks.Callback):
    """Monitors false positive rate during model training.

    This callback calculates and tracks the false positive rate after each epoch
    using the provided validation data, and optionally prints the rate at specified
    intervals.

    Attributes:
        x_val: Validation features.
        y_val: Validation labels.
        print_freq: Integer specifying how often (in epochs) to print the FPR.
        false_positive_rates: List tracking FPR history across epochs.
    """

    def __init__(self, validation_data, print_freq=5):
        """Initializes the FalsePositiveMonitor.

        Args:
            validation_data: Tuple of (x_val, y_val) containing validation data.
            print_freq: Integer specifying how often (in epochs) to print the FPR.
        """
        super(FalsePositiveMonitor, self).__init__()
        self.x_val, self.y_val = validation_data
        self.print_freq = print_freq
        self.false_positive_rates = []

    def on_epoch_end(self, epoch, _=None):
        """Calculates FPR at the end of each epoch.

        Args:
            epoch: Integer, index of the current epoch.
            _: Dict, unused parameter for compatibility with Keras callback API.
        """
        y_pred = np.array(self.model.predict(self.x_val) > 0.5, dtype=np.int32)
        monitor_tn, monitor_fp, monitor_fn, monitor_tp = confusion_matrix(
            self.y_val, y_pred
        ).ravel()
        monitor_fpr = monitor_fp / (monitor_fp + monitor_tn)
        self.false_positive_rates.append(monitor_fpr)

        if (epoch + 1) % self.print_freq == 0:
            print(f"Epoch {epoch + 1}: False Positive Rate = {monitor_fpr:.4f}")


x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
    x_train_scaled,
    y_train,
    test_size=0.2,
    random_state=RANDOM_SEED,
    stratify=y_train,
)

callbacks = [
    EarlyStopping(
        monitor="val_loss", patience=20, restore_best_weights=True, verbose=1
    ),
    ModelCheckpoint(
        filepath="../models/nn_malware_detector.keras",
        monitor="val_precision",
        mode="max",
        save_best_only=True,
        verbose=1,
    ),
    ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=10, min_lr=1e-6, verbose=1
    ),
    FalsePositiveMonitor(validation_data=(x_val_split, y_val_split), print_freq=5),
]

history = nn_model.fit(
    x_train_split,
    y_train_split,
    epochs=100,
    batch_size=128,
    validation_data=(x_val_split, y_val_split),
    class_weight=class_weight_dict,
    callbacks=callbacks,
    verbose=1,
)

Epoch 1/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step - accuracy: 0.8708 - auc: 0.9403 - loss: 0.4382 - precision: 0.9322 - recall: 0.8528
Epoch 1: val_precision improved from -inf to 0.97468, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 12s 54ms/step - accuracy: 0.8711 - auc: 0.9405 - loss: 0.4374 - precision: 0.9324 - recall: 0.8531 - val_accuracy: 0.9359 - val_auc: 0.9806 - val_loss: 0.2510 - val_precision: 0.9747 - val_recall: 0.9203 - learning_rate: 0.0010
Epoch 2/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step - accuracy: 0.9126 - auc: 0.9678 - loss: 0.2846 - precision: 0.9501 - recall: 0.9061
Epoch 2: val_precision did not improve from 0.97468
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 36ms/step - accuracy: 0.9126 - auc: 0.9678 - loss: 0.2846 - precision: 0.9502 - recall: 0.9061 - val_accuracy: 0.9289 - val_auc: 0.9811 - val_loss: 0.2449 - val_precision: 0.9709 - val_recall: 0.9124 - learning_rate: 0.0010
Epoch 3/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step - accuracy: 0.9136 - auc: 0.9736 - loss: 0.2609 - precision: 0.9571 - recall: 0.9005
Epoch 3: val_precision improved from 0.97468 to 0.97530, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 4s 41ms/step - accuracy: 0.9136 - auc: 0.9736 - loss: 0.2609 - precision: 0.9572 - recall: 0.9005 - val_accuracy: 0.9370 - val_auc: 0.9817 - val_loss: 0.2360 - val_precision: 0.9753 - val_recall: 0.9215 - learning_rate: 0.0010
Epoch 4/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step - accuracy: 0.9218 - auc: 0.9757 - loss: 0.2469 - precision: 0.9567 - recall: 0.9149
Epoch 4: val_precision improved from 0.97530 to 0.97687, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - accuracy: 0.9218 - auc: 0.9757 - loss: 0.2469 - precision: 0.9568 - recall: 0.9148 - val_accuracy: 0.9331 - val_auc: 0.9835 - val_loss: 0.2282 - val_precision: 0.9769 - val_recall: 0.9135 - learning_rate: 0.0010
Epoch 5/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step - accuracy: 0.9194 - auc: 0.9751 - loss: 0.2410 - precision: 0.9574 - recall: 0.9101
Epoch 5: val_precision improved from 0.97687 to 0.97738, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 5: False Positive Rate = 0.0342
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.9194 - auc: 0.9752 - loss: 0.2409 - precision: 0.9574 - recall: 0.9101 - val_accuracy: 0.9313 - val_auc: 0.9834 - val_loss: 0.2233 - val_precision: 0.9774 - val_recall: 0.9101 - learning_rate: 0.0010
Epoch 6/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step - accuracy: 0.9222 - auc: 0.9757 - loss: 0.2339 - precision: 0.9610 - recall: 0.9110
Epoch 6: val_precision did not improve from 0.97738
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.9222 - auc: 0.9756 - loss: 0.2339 - precision: 0.9610 - recall: 0.9110 - val_accuracy: 0.9303 - val_auc: 0.9830 - val_loss: 0.2187 - val_precision: 0.9756 - val_recall: 0.9101 - learning_rate: 0.0010
Epoch 7/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step - accuracy: 0.9232 - auc: 0.9767 - loss: 0.2265 - precision: 0.9576 - recall: 0.9162
Epoch 7: val_precision improved from 0.97738 to 0.97753, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 4s 40ms/step - accuracy: 0.9231 - auc: 0.9766 - loss: 0.2266 - precision: 0.9577 - recall: 0.9160 - val_accuracy: 0.9352 - val_auc: 0.9829 - val_loss: 0.2123 - val_precision: 0.9775 - val_recall: 0.9163 - learning_rate: 0.0010
Epoch 8/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step - accuracy: 0.9164 - auc: 0.9720 - loss: 0.2296 - precision: 0.9554 - recall: 0.9070
Epoch 8: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - accuracy: 0.9164 - auc: 0.9720 - loss: 0.2296 - precision: 0.9554 - recall: 0.9070 - val_accuracy: 0.9327 - val_auc: 0.9832 - val_loss: 0.2087 - val_precision: 0.9763 - val_recall: 0.9135 - learning_rate: 0.0010
Epoch 9/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9217 - auc: 0.9767 - loss: 0.2180 - precision: 0.9585 - recall: 0.9127
Epoch 9: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - accuracy: 0.9217 - auc: 0.9767 - loss: 0.2180 - precision: 0.9585 - recall: 0.9127 - val_accuracy: 0.9342 - val_auc: 0.9832 - val_loss: 0.2042 - val_precision: 0.9769 - val_recall: 0.9152 - learning_rate: 0.0010
Epoch 10/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9207 - auc: 0.9757 - loss: 0.2163 - precision: 0.9577 - recall: 0.9118
Epoch 10: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 10: False Positive Rate = 0.0351
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9207 - auc: 0.9757 - loss: 0.2163 - precision: 0.9578 - recall: 0.9118 - val_accuracy: 0.9338 - val_auc: 0.9827 - val_loss: 0.2024 - val_precision: 0.9769 - val_recall: 0.9146 - learning_rate: 0.0010
Epoch 11/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9213 - auc: 0.9759 - loss: 0.2139 - precision: 0.9572 - recall: 0.9135
Epoch 11: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9213 - auc: 0.9758 - loss: 0.2139 - precision: 0.9572 - recall: 0.9134 - val_accuracy: 0.9338 - val_auc: 0.9831 - val_loss: 0.2002 - val_precision: 0.9757 - val_recall: 0.9158 - learning_rate: 0.0010
Epoch 12/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9230 - auc: 0.9756 - loss: 0.2119 - precision: 0.9611 - recall: 0.9124
Epoch 12: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 32ms/step - accuracy: 0.9230 - auc: 0.9756 - loss: 0.2119 - precision: 0.9611 - recall: 0.9123 - val_accuracy: 0.9317 - val_auc: 0.9828 - val_loss: 0.1979 - val_precision: 0.9762 - val_recall: 0.9118 - learning_rate: 0.0010
Epoch 13/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9214 - auc: 0.9758 - loss: 0.2103 - precision: 0.9616 - recall: 0.9090
Epoch 13: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - accuracy: 0.9214 - auc: 0.9758 - loss: 0.2103 - precision: 0.9616 - recall: 0.9090 - val_accuracy: 0.9366 - val_auc: 0.9828 - val_loss: 0.1963 - val_precision: 0.9764 - val_recall: 0.9197 - learning_rate: 0.0010
Epoch 14/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - accuracy: 0.9214 - auc: 0.9754 - loss: 0.2102 - precision: 0.9608 - recall: 0.9098
Epoch 14: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - accuracy: 0.9214 - auc: 0.9754 - loss: 0.2102 - precision: 0.9609 - recall: 0.9097 - val_accuracy: 0.9349 - val_auc: 0.9836 - val_loss: 0.1938 - val_precision: 0.9746 - val_recall: 0.9186 - learning_rate: 0.0010
Epoch 15/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - accuracy: 0.9182 - auc: 0.9754 - loss: 0.2094 - precision: 0.9558 - recall: 0.9097
Epoch 15: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 15: False Positive Rate = 0.0351
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - accuracy: 0.9182 - auc: 0.9754 - loss: 0.2094 - precision: 0.9559 - recall: 0.9096 - val_accuracy: 0.9352 - val_auc: 0.9834 - val_loss: 0.1945 - val_precision: 0.9770 - val_recall: 0.9169 - learning_rate: 0.0010
Epoch 16/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step - accuracy: 0.9221 - auc: 0.9774 - loss: 0.2060 - precision: 0.9624 - recall: 0.9095
Epoch 16: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - accuracy: 0.9221 - auc: 0.9774 - loss: 0.2060 - precision: 0.9624 - recall: 0.9095 - val_accuracy: 0.9335 - val_auc: 0.9830 - val_loss: 0.1949 - val_precision: 0.9763 - val_recall: 0.9146 - learning_rate: 0.0010
Epoch 17/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step - accuracy: 0.9211 - auc: 0.9766 - loss: 0.2088 - precision: 0.9598 - recall: 0.9105
Epoch 17: val_precision did not improve from 0.97753
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 32ms/step - accuracy: 0.9210 - auc: 0.9766 - loss: 0.2089 - precision: 0.9598 - recall: 0.9104 - val_accuracy: 0.9342 - val_auc: 0.9823 - val_loss: 0.1976 - val_precision: 0.9752 - val_recall: 0.9169 - learning_rate: 0.0010
Epoch 18/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 32ms/step - accuracy: 0.9238 - auc: 0.9758 - loss: 0.2099 - precision: 0.9608 - recall: 0.9140
Epoch 18: val_precision improved from 0.97753 to 0.97816, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 4s 46ms/step - accuracy: 0.9238 - auc: 0.9758 - loss: 0.2099 - precision: 0.9608 - recall: 0.9139 - val_accuracy: 0.9363 - val_auc: 0.9826 - val_loss: 0.1976 - val_precision: 0.9782 - val_recall: 0.9175 - learning_rate: 0.0010
Epoch 19/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step - accuracy: 0.9193 - auc: 0.9741 - loss: 0.2140 - precision: 0.9577 - recall: 0.9096
Epoch 19: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.9194 - auc: 0.9741 - loss: 0.2140 - precision: 0.9577 - recall: 0.9096 - val_accuracy: 0.9331 - val_auc: 0.9834 - val_loss: 0.1975 - val_precision: 0.9780 - val_recall: 0.9124 - learning_rate: 0.0010
Epoch 20/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9200 - auc: 0.9759 - loss: 0.2126 - precision: 0.9602 - recall: 0.9083
Epoch 20: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 20: False Positive Rate = 0.0360
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 32ms/step - accuracy: 0.9200 - auc: 0.9759 - loss: 0.2126 - precision: 0.9602 - recall: 0.9082 - val_accuracy: 0.9356 - val_auc: 0.9829 - val_loss: 0.1988 - val_precision: 0.9764 - val_recall: 0.9180 - learning_rate: 0.0010
Epoch 21/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9236 - auc: 0.9763 - loss: 0.2126 - precision: 0.9601 - recall: 0.9143
Epoch 21: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - accuracy: 0.9236 - auc: 0.9763 - loss: 0.2126 - precision: 0.9601 - recall: 0.9143 - val_accuracy: 0.9356 - val_auc: 0.9819 - val_loss: 0.2015 - val_precision: 0.9752 - val_recall: 0.9192 - learning_rate: 0.0010
Epoch 22/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step - accuracy: 0.9216 - auc: 0.9775 - loss: 0.2115 - precision: 0.9620 - recall: 0.9091
Epoch 22: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 35ms/step - accuracy: 0.9216 - auc: 0.9775 - loss: 0.2116 - precision: 0.9620 - recall: 0.9091 - val_accuracy: 0.9345 - val_auc: 0.9830 - val_loss: 0.2005 - val_precision: 0.9775 - val_recall: 0.9152 - learning_rate: 0.0010
Epoch 23/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9216 - auc: 0.9760 - loss: 0.2146 - precision: 0.9616 - recall: 0.9094
Epoch 23: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9216 - auc: 0.9760 - loss: 0.2146 - precision: 0.9616 - recall: 0.9094 - val_accuracy: 0.9338 - val_auc: 0.9824 - val_loss: 0.2033 - val_precision: 0.9775 - val_recall: 0.9141 - learning_rate: 0.0010
Epoch 24/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9200 - auc: 0.9752 - loss: 0.2173 - precision: 0.9606 - recall: 0.9077
Epoch 24: val_precision did not improve from 0.97816

Epoch 24: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9200 - auc: 0.9752 - loss: 0.2173 - precision: 0.9606 - recall: 0.9076 - val_accuracy: 0.9345 - val_auc: 0.9821 - val_loss: 0.2044 - val_precision: 0.9775 - val_recall: 0.9152 - learning_rate: 0.0010
Epoch 25/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9210 - auc: 0.9764 - loss: 0.2169 - precision: 0.9581 - recall: 0.9120
Epoch 25: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 25: False Positive Rate = 0.0332
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9210 - auc: 0.9764 - loss: 0.2169 - precision: 0.9582 - recall: 0.9120 - val_accuracy: 0.9342 - val_auc: 0.9817 - val_loss: 0.2058 - val_precision: 0.9781 - val_recall: 0.9141 - learning_rate: 5.0000e-04
Epoch 26/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9222 - auc: 0.9774 - loss: 0.2144 - precision: 0.9636 - recall: 0.9084
Epoch 26: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - accuracy: 0.9222 - auc: 0.9774 - loss: 0.2144 - precision: 0.9636 - recall: 0.9083 - val_accuracy: 0.9342 - val_auc: 0.9827 - val_loss: 0.2037 - val_precision: 0.9775 - val_recall: 0.9146 - learning_rate: 5.0000e-04
Epoch 27/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9181 - auc: 0.9764 - loss: 0.2161 - precision: 0.9590 - recall: 0.9062
Epoch 27: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9182 - auc: 0.9764 - loss: 0.2161 - precision: 0.9591 - recall: 0.9062 - val_accuracy: 0.9338 - val_auc: 0.9827 - val_loss: 0.2037 - val_precision: 0.9752 - val_recall: 0.9163 - learning_rate: 5.0000e-04
Epoch 28/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9270 - auc: 0.9785 - loss: 0.2126 - precision: 0.9639 - recall: 0.9161
Epoch 28: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 32ms/step - accuracy: 0.9269 - auc: 0.9785 - loss: 0.2127 - precision: 0.9639 - recall: 0.9160 - val_accuracy: 0.9359 - val_auc: 0.9822 - val_loss: 0.2052 - val_precision: 0.9776 - val_recall: 0.9175 - learning_rate: 5.0000e-04
Epoch 29/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - accuracy: 0.9210 - auc: 0.9757 - loss: 0.2183 - precision: 0.9639 - recall: 0.9062
Epoch 29: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - accuracy: 0.9211 - auc: 0.9757 - loss: 0.2183 - precision: 0.9639 - recall: 0.9062 - val_accuracy: 0.9335 - val_auc: 0.9824 - val_loss: 0.2055 - val_precision: 0.9769 - val_recall: 0.9141 - learning_rate: 5.0000e-04
Epoch 30/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step - accuracy: 0.9217 - auc: 0.9767 - loss: 0.2174 - precision: 0.9629 - recall: 0.9083
Epoch 30: val_precision did not improve from 0.97816
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 30: False Positive Rate = 0.0342
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 34ms/step - accuracy: 0.9217 - auc: 0.9767 - loss: 0.2174 - precision: 0.9629 - recall: 0.9083 - val_accuracy: 0.9345 - val_auc: 0.9826 - val_loss: 0.2062 - val_precision: 0.9775 - val_recall: 0.9152 - learning_rate: 5.0000e-04
Epoch 31/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - accuracy: 0.9253 - auc: 0.9779 - loss: 0.2156 - precision: 0.9653 - recall: 0.9119
Epoch 31: val_precision improved from 0.97816 to 0.97862, saving model to ../models/nn_malware_detector.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 30ms/step - accuracy: 0.9253 - auc: 0.9778 - loss: 0.2157 - precision: 0.9653 - recall: 0.9119 - val_accuracy: 0.9331 - val_auc: 0.9823 - val_loss: 0.2066 - val_precision: 0.9786 - val_recall: 0.9118 - learning_rate: 5.0000e-04
Epoch 32/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - accuracy: 0.9240 - auc: 0.9787 - loss: 0.2142 - precision: 0.9667 - recall: 0.9083
Epoch 32: val_precision did not improve from 0.97862
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - accuracy: 0.9240 - auc: 0.9787 - loss: 0.2143 - precision: 0.9667 - recall: 0.9082 - val_accuracy: 0.9317 - val_auc: 0.9823 - val_loss: 0.2074 - val_precision: 0.9762 - val_recall: 0.9118 - learning_rate: 5.0000e-04
Epoch 33/100
88/89 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - accuracy: 0.9242 - auc: 0.9763 - loss: 0.2193 - precision: 0.9655 - recall: 0.9098
Epoch 33: val_precision did not improve from 0.97862
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 31ms/step - accuracy: 0.9241 - auc: 0.9763 - loss: 0.2194 - precision: 0.9655 - recall: 0.9096 - val_accuracy: 0.9289 - val_auc: 0.9822 - val_loss: 0.2096 - val_precision: 0.9773 - val_recall: 0.9061 - learning_rate: 5.0000e-04
Epoch 34/100
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - accuracy: 0.9251 - auc: 0.9758 - loss: 0.2219 - precision: 0.9654 - recall: 0.9114
Epoch 34: val_precision did not improve from 0.97862

Epoch 34: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
89/89 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - accuracy: 0.9251 - auc: 0.9758 - loss: 0.2219 - precision: 0.9654 - recall: 0.9114 - val_accuracy: 0.9349 - val_auc: 0.9821 - val_loss: 0.2111 - val_precision: 0.9775 - val_recall: 0.9158 - learning_rate: 5.0000e-04
Epoch 34: early stopping
Restoring model weights from the end of the best epoch: 14.

def tune_nn_hyperparameters(resume=True, n_trials=100):
    """Perform hyperparameter tuning for the neural network malware detection model using Optuna.

    Args:
        resume: Whether to resume from a previous study if available
        n_trials: Maximum number of trials to run

    Returns:
        tuple: Best configuration, tuned model, and Optuna study object
    """
    model_dir = "../models"

    logger.info(
        f"Starting hyperparameter tuning for neural network model with Optuna (max {n_trials} trials)"
    )

    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(f"{model_dir}/checkpoints", exist_ok=True)

    storage_name = f"sqlite:///{model_dir}/optuna_malware_detection.db"
    study_name = "malware_detection_nn"

    x_tune_train, x_tune_val, y_tune_train, y_tune_val = train_test_split(
        x_train_scaled,
        y_train,
        test_size=0.2,
        random_state=RANDOM_SEED,
        stratify=y_train,
    )

    def objective(trial):
        use_residual = trial.suggest_categorical("use_residual", [True, False])
        n_layers = trial.suggest_int("n_layers", 2, 4)

        initial_scale = trial.suggest_float("initial_scale", 0.8, 1.2)
        initial_tuned_neurons = int(x_tune_train.shape[1] * initial_scale)

        tuned_neuron_sizes = [initial_tuned_neurons]
        for tuned_layer_num in range(1, n_layers):
            reduction_factor = trial.suggest_float(
                f"layer_{tuned_layer_num}_reduction", 0.4, 0.6
            )
            tuned_neuron_sizes.append(
                int(tuned_neuron_sizes[tuned_layer_num - 1] * reduction_factor)
            )

        trial_base_dropout = trial.suggest_float("dropout_base", 0.2, 0.4)
        tuned_decay_dropout = trial.suggest_float("dropout_decay", 0.6, 0.9)
        dropouts = [
            trial_base_dropout * (tuned_decay_dropout**tuned_layer_num)
            for tuned_layer_num in range(n_layers)
        ]

        l1_reg = trial.suggest_float("l1_reg", 1e-7, 1e-5, log=True)
        l2_reg = trial.suggest_float("l2_reg", 1e-6, 1e-4, log=True)

        fp_weight_param = trial.suggest_float("fp_weight", 2.0, 4.0)
        focal_gamma = trial.suggest_float("focal_gamma", 1.0, 2.5)

        learning_rate = trial.suggest_float("learning_rate", 1e-4, 5e-3, log=True)
        batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

        checkpoint_filepath = f"{model_dir}/checkpoints/trial_{trial.number}.json"

        try:
            with open(checkpoint_filepath, "w") as checkpoint_file:
                json.dump(
                    {"trial_id": trial.number, "status": "started"}, checkpoint_file
                )

            def tuned_weighted_focal_loss(y_true, y_pred):
                y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)

                bce = -(
                    y_true * tf.math.log(y_pred)
                    + (1 - y_true) * tf.math.log(1 - y_pred)
                )

                pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
                focal_weight = tf.pow(1 - pt, focal_gamma)

                weights = y_true + fp_weight_param * (1.0 - y_true)

                weighted_loss = weights * focal_weight * bce

                return tf.reduce_mean(weighted_loss)

            inputs = tf.keras.layers.Input(shape=(x_tune_train.shape[1],))

            x = Dense(
                tuned_neuron_sizes[0],
                activation="relu",
                kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
            )(inputs)
            x = BatchNormalization()(x)
            x = Dropout(dropouts[0])(x)

            if use_residual:
                skip = x
                x = Dense(
                    tuned_neuron_sizes[0],
                    activation="relu",
                    kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
                )(x)
                x = BatchNormalization()(x)
                x = Dropout(dropouts[0] * 0.8)(x)
                x = tf.keras.layers.Add()([x, skip])

            for tuned_layer_num in range(1, n_layers):
                x = Dense(
                    tuned_neuron_sizes[tuned_layer_num],
                    activation="relu",
                    kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
                )(x)
                x = BatchNormalization()(x)
                x = Dropout(dropouts[tuned_layer_num])(x)

            outputs = Dense(1, activation="sigmoid")(x)

            model = tf.keras.Model(inputs, outputs)

            model.compile(
                optimizer=Adam(learning_rate=learning_rate),
                loss=tuned_weighted_focal_loss,
                metrics=[
                    "accuracy",
                    tf.keras.metrics.Precision(name="precision"),
                    tf.keras.metrics.Recall(name="recall"),
                    tf.keras.metrics.AUC(name="auc"),
                ],
            )

            trial_callbacks = [
                EarlyStopping(
                    monitor="val_auc",
                    mode="max",
                    patience=10,
                    restore_best_weights=True,
                    verbose=0,
                ),
                ReduceLROnPlateau(
                    monitor="val_loss",
                    factor=0.5,
                    patience=5,
                    min_lr=1e-6,
                    verbose=0,
                ),
                tf.keras.callbacks.ModelCheckpoint(
                    filepath=f"{model_dir}/checkpoints/trial_{trial.number}_best.keras",
                    monitor="val_auc",
                    mode="max",
                    save_best_only=True,
                    verbose=0,
                ),
            ]

            trial_history = model.fit(
                x_tune_train,
                y_tune_train,
                epochs=40,
                batch_size=batch_size,
                validation_data=(x_tune_val, y_tune_val),
                class_weight=class_weight_dict,
                callbacks=trial_callbacks,
                verbose=0,
            )

            y_val_prob = model.predict(x_tune_val, verbose=0).flatten()

            fpr_arr, tpr_arr, threshold_arr = roc_curve(y_tune_val, y_val_prob)

            target_fpr = 0.01
            fp_idx = np.argmin(np.abs(fpr_arr - target_fpr))
            fp_threshold = threshold_arr[fp_idx]
            achieved_tpr_at_fp = tpr_arr[fp_idx]

            target_tpr = 0.95
            tpr_idx = np.argmin(np.abs(tpr_arr - target_tpr))
            tpr_threshold = threshold_arr[tpr_idx]
            achieved_fpr_at_tpr = fpr_arr[tpr_idx]

            balanced_score = achieved_tpr_at_fp * 0.7 + (1 - achieved_fpr_at_tpr) * 0.3

            trial_results = {
                "trial_id": trial.number,
                "status": "completed",
                "score": float(balanced_score),
                "params": trial.params,
                "metrics": {
                    "tpr_at_1pct_fpr": float(achieved_tpr_at_fp),
                    "fpr_at_95pct_tpr": float(achieved_fpr_at_tpr),
                    "threshold_1pct_fpr": float(fp_threshold),
                    "threshold_95pct_tpr": float(tpr_threshold),
                    "final_val_auc": float(trial_history.history["val_auc"][-1]),
                    "final_val_loss": float(trial_history.history["val_loss"][-1]),
                },
            }

            with open(checkpoint_filepath, "w") as checkpoint_file:
                json.dump(trial_results, checkpoint_file, indent=2)

            trial.report(balanced_score, step=0)

            if trial.number % 10 == 0:
                logger.info(
                    f"Trial {trial.number}/{n_trials}: Score={balanced_score:.4f}, "
                    f"TPR@1%FPR={achieved_tpr_at_fp:.4f}, "
                    f"FPR@95%TPR={achieved_fpr_at_tpr:.4f}"
                )

            return balanced_score

        except Exception as e:
            with open(checkpoint_filepath, "w") as checkpoint_file:
                json.dump(
                    {
                        "trial_id": trial.number,
                        "status": "failed",
                        "error": str(e),
                    },
                    checkpoint_file,
                )
            logger.warning(f"Trial {trial.number} failed with error: {str(e)}")
            return 0.0

    db_path = storage_name.replace("sqlite:///", "")
    if resume and os.path.exists(db_path):
        study = optuna.load_study(study_name=study_name, storage=storage_name)
        logger.info(f"Resuming study with {len(study.trials)} previous trials")
    else:
        study = optuna.create_study(
            direction="maximize", study_name=study_name, storage=storage_name
        )

    completed_trials = len(study.trials)

    try:
        remaining_trials = n_trials - completed_trials
        if remaining_trials > 0:
            logger.info(f"Running {remaining_trials} additional trials")
            study.optimize(
                objective,
                n_trials=remaining_trials,
                catch=(ValueError, RuntimeError),
            )
        else:
            logger.info(
                f"All {n_trials} trials already completed, using existing results"
            )
    except Exception as optimization_error:
        logger.error(f"Optimization failed: {str(optimization_error)}")
        fallback_config = {
            "name": "Fallback-configuration",
            "hidden_layers_scale": (0.6, 0.3, 0.15),
            "dropout_rates": (0.3, 0.25, 0.2, 0.15),
            "learning_rate": 0.001,
            "fp_weight": 3.0,
            "use_residual": True,
            "focal_gamma": 1.5,
        }

        fallback_model = build_malware_detection_nn(
            input_dimension=x_train_scaled.shape[1],
            learning_rate=fallback_config["learning_rate"],
            hidden_layers_scale=fallback_config["hidden_layers_scale"],
            dropout_rates=fallback_config["dropout_rates"],
            fp_weight=fallback_config["fp_weight"],
        )

        return fallback_config, fallback_model, study

    if len(study.trials) > 0:
        best_params = study.best_params
        best_score = study.best_value
        best_trial = study.best_trial.number

        logger.info(f"Best balanced score: {best_score:.4f} (Trial {best_trial})")
        logger.info(f"Best parameters: {best_params}")

        initial_neurons = int(x_train_scaled.shape[1] * best_params["initial_scale"])
        neuron_sizes = [initial_neurons]

        for layer_num in range(1, best_params["n_layers"]):
            neuron_sizes.append(
                int(
                    neuron_sizes[layer_num - 1]
                    * best_params[f"layer_{layer_num}_reduction"]
                )
            )

        hidden_layers_scale = [size / x_train_scaled.shape[1] for size in neuron_sizes]

        opt_base_dropout = best_params["dropout_base"]
        decay_dropout = best_params["dropout_decay"]
        dropout_rates = [
            opt_base_dropout * (decay_dropout**layer_num)
            for layer_num in range(best_params["n_layers"])
        ]

        dropout_rates.append(dropout_rates[-1] * decay_dropout)

        optimal_config = {
            "name": "Optuna-optimized",
            "hidden_layers_scale": tuple(hidden_layers_scale),
            "dropout_rates": tuple(dropout_rates),
            "learning_rate": best_params["learning_rate"],
            "fp_weight": best_params["fp_weight"],
            "use_residual": best_params.get("use_residual", False),
            "focal_gamma": best_params.get("focal_gamma", 2.0),
            "l1_reg": best_params.get("l1_reg", 1e-6),
            "l2_reg": best_params.get("l2_reg", 1e-5),
        }
    else:
        logger.warning("No successful trials found, using fallback configuration")
        optimal_config = {
            "name": "Fallback-configuration",
            "hidden_layers_scale": (0.6, 0.3, 0.15),
            "dropout_rates": (0.3, 0.25, 0.2, 0.15),
            "learning_rate": 0.001,
            "fp_weight": 3.0,
            "use_residual": True,
            "focal_gamma": 1.5,
        }

    optimal_model = build_malware_detection_nn(
        input_dimension=x_train_scaled.shape[1],
        learning_rate=optimal_config["learning_rate"],
        hidden_layers_scale=optimal_config["hidden_layers_scale"],
        dropout_rates=optimal_config["dropout_rates"],
        fp_weight=optimal_config["fp_weight"],
    )

    optimal_model.save(f"{model_dir}/best_nn_model.keras")

    with open(f"{model_dir}/best_config.json", "w") as config_file:
        json_config = {
            k: (list(v) if isinstance(v, tuple) else v)
            for k, v in optimal_config.items()
        }
        json.dump(json_config, config_file, indent=2)

    return optimal_config, optimal_model, study


try:
    tuned_config, tuned_model, tuning_results = tune_nn_hyperparameters(n_trials=100)

    if tuning_results is not None:
        with open(f"../models/tuning_summary.json", "w") as summary_file:
            summary = {
                "best_trial": tuning_results.best_trial.number,
                "best_score": tuning_results.best_value,
                "n_trials": len(tuning_results.trials),
                "completion_time": datetime.now().isoformat(),
                "total_trials_requested": 100,
                "completed_trials": len(
                    [
                        trial
                        for trial in tuning_results.trials
                        if trial.state == optuna.trial.TrialState.COMPLETE
                    ]
                ),
                "failed_trials": len(
                    [
                        trial
                        for trial in tuning_results.trials
                        if trial.state == optuna.trial.TrialState.FAIL
                    ]
                ),
            }
            json.dump(summary, summary_file, indent=2)

except Exception as hyperparameter_error:
    logger.error(f"Hyperparameter tuning failed: {str(hyperparameter_error)}")
    default_config = {
        "name": "Default-configuration",
        "hidden_layers_scale": (0.5, 0.25),
        "dropout_rates": (0.2, 0.3, 0.2),
        "learning_rate": 0.001,
        "fp_weight": 2.5,
    }
    tuned_model = build_malware_detection_nn(
        input_dimension=x_train_scaled.shape[1],
        learning_rate=default_config["learning_rate"],
        hidden_layers_scale=default_config["hidden_layers_scale"],
        dropout_rates=default_config["dropout_rates"],
        fp_weight=default_config["fp_weight"],
    )

    fallback_model_dir = "../models"
    os.makedirs(fallback_model_dir, exist_ok=True)
    tuned_model.save(f"{fallback_model_dir}/fallback_nn_model.keras")

    with open(f"{fallback_model_dir}/fallback_config.json", "w") as fallback_file:
        fallback_json_config = {
            k: (list(v) if isinstance(v, tuple) else v)
            for k, v in default_config.items()
        }
        json.dump(fallback_json_config, fallback_file, indent=2)

    tuning_results = None

2025-05-18 17:38:52,815 - __main__ - INFO - Starting hyperparameter tuning for neural network model with Optuna (max 100 trials)
2025-05-18 17:38:53,862 - __main__ - INFO - Resuming study with 100 previous trials
2025-05-18 17:38:53,879 - __main__ - INFO - All 100 trials already completed, using existing results
2025-05-18 17:38:53,898 - __main__ - INFO - Best balanced score: 0.8874 (Trial 48)
2025-05-18 17:38:53,898 - __main__ - INFO - Best parameters: {'use_residual': True, 'n_layers': 3, 'initial_scale': 1.1507435007963025, 'layer_1_reduction': 0.556686860856263, 'layer_2_reduction': 0.5733684742158552, 'dropout_base': 0.21815664554191094, 'dropout_decay': 0.6485342382519644, 'l1_reg': 9.512848320543392e-07, 'l2_reg': 6.972612486743858e-05, 'fp_weight': 3.4757893684029226, 'focal_gamma': 1.624336264314367, 'learning_rate': 0.004104047450574272, 'batch_size': 128}

y_nn_prob = nn_model.predict(x_test_scaled).flatten()


def find_optimal_threshold(y_true, y_prob, target_fpr=0.05):
    """Find the optimal threshold that gives a false positive rate closest to the target while maximizing true positives.

    Args:
        y_true: Ground truth labels
        y_prob: Predicted probabilities
        target_fpr: Target false positive rate

    Returns:
        Optimal threshold value
    """
    fpr_values, tpr_values, all_thresholds = roc_curve(y_true, y_prob)

    scores = []
    min_recall = 0.97

    for threshold_idx in range(len(all_thresholds)):
        recall_penalty = (
            0
            if tpr_values[threshold_idx] >= min_recall
            else (min_recall - tpr_values[threshold_idx]) * 5
        )
        fpr_penalty = abs(fpr_values[threshold_idx] - target_fpr) * 10
        scores.append(tpr_values[threshold_idx] - fpr_penalty - recall_penalty)

    best_idx = np.argmax(scores).astype(np.int64)
    selected_threshold = all_thresholds[best_idx]
    achieved_fpr = fpr_values[best_idx]
    achieved_tpr = tpr_values[best_idx]

    logger.info(f"Target FPR: {target_fpr:.4f}")
    logger.info(
        f"Achieved FPR: {achieved_fpr:.4f} at threshold {selected_threshold:.4f}"
    )
    logger.info(f"Corresponding TPR: {achieved_tpr:.4f}")

    return selected_threshold


optimal_threshold = find_optimal_threshold(y_test, y_nn_prob, target_fpr=0.05)

y_nn_pred_threshold = np.array(y_nn_prob >= optimal_threshold, dtype=np.int32)

148/148 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
2025-05-18 17:38:54,643 - __main__ - INFO - Target FPR: 0.0500
2025-05-18 17:38:54,644 - __main__ - INFO - Achieved FPR: 0.0498 at threshold 0.4619
2025-05-18 17:38:54,644 - __main__ - INFO - Corresponding TPR: 0.9269

nn_results_threshold = evaluate_model_performance(
    y_test,
    y_nn_pred_threshold,
    y_nn_prob,
    "Neural Network (Optimized Threshold)",
)

y_nn_pred_default = np.array(y_nn_prob >= 0.5, dtype=np.int32)
nn_results_default = evaluate_model_performance(
    y_test, y_nn_pred_default, y_nn_prob, "Neural Network (Default Threshold)"
)

2025-05-18 17:38:54,744 - windows_malware_classifier.visualization.models_plots - INFO - Neural Network (Optimized Threshold) Performance Metrics:
2025-05-18 17:38:54,744 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.9358
2025-05-18 17:38:54,744 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.9679 (indicates false positive control)
2025-05-18 17:38:54,745 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.9269
2025-05-18 17:38:54,745 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.9470
2025-05-18 17:38:54,745 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.0498
2025-05-18 17:38:54,745 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.9269
2025-05-18 17:38:54,745 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.9824
2025-05-18 17:38:54,762 - windows_malware_classifier.visualization.models_plots - INFO - Neural Network (Default Threshold) Performance Metrics:
2025-05-18 17:38:54,762 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.9322
2025-05-18 17:38:54,762 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.9749 (indicates false positive control)
2025-05-18 17:38:54,762 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.9139
2025-05-18 17:38:54,762 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.9434
2025-05-18 17:38:54,763 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.0382
2025-05-18 17:38:54,763 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.9139
2025-05-18 17:38:54,763 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.9824

results_list = [
    dummy_results,
    rf_results,
    xgb_results,
    nn_results_default,
    nn_results_threshold,
]
results_df = pd.DataFrame(results_list)

display_cols = ["model", "accuracy", "precision", "recall", "f1", "fpr", "tpr"]
if "auc" in results_df.columns:
    display_cols.append("auc")

results_comparison = results_df[display_cols]
display(Markdown("### Model Performance Comparison:"))
display(results_comparison)

plt.figure(figsize=(10, 8))

fpr, tpr, _ = roc_curve(y_test, y_dummy_prob)
plt.plot(fpr, tpr, label=f"Dummy (AUC = {auc(fpr, tpr):.4f})")

fpr, tpr, _ = roc_curve(y_test, y_rf_prob)
plt.plot(fpr, tpr, label=f"Random Forest (AUC = {auc(fpr, tpr):.4f})")

fpr, tpr, _ = roc_curve(y_test, y_xgb_prob)
plt.plot(fpr, tpr, label=f"XGBoost (AUC = {auc(fpr, tpr):.4f})")

fpr, tpr, _ = roc_curve(y_test, y_nn_prob)
plt.plot(fpr, tpr, label=f"Neural Network (AUC = {auc(fpr, tpr):.4f})")

plt.plot([0, 1], [0, 1], "k--")

plt.axvline(x=0.05, color="r", linestyle="--", alpha=0.3, label="5% FPR Target")

nn_fpr, nn_tpr, _ = roc_curve(y_test, y_nn_prob)
idx = np.argmin(np.abs(nn_fpr - 0.05))
plt.scatter([nn_fpr[idx]], [nn_tpr[idx]], color="red", s=100, zorder=5)
plt.annotate(
    f"Optimized Threshold\nFPR={nn_fpr[idx]:.3f}, TPR={nn_tpr[idx]:.3f}",
    (nn_fpr[idx], nn_tpr[idx]),
    xytext=(0.1, 0.4),
    arrowprops=dict(arrowstyle="->", color="black"),
)

plt.title("ROC Curve Comparison", fontsize=15)
plt.xlabel("False Positive Rate", fontsize=12)
plt.ylabel("True Positive Rate", fontsize=12)
plt.xlim([0, 1])
plt.ylim([0, 1.05])
plt.grid(True, alpha=0.3)
plt.legend(loc="lower right", fontsize=12)

plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 8))

ap_dummy = average_precision_score(y_test, y_dummy_prob)
ap_rf = average_precision_score(y_test, y_rf_prob)
ap_xgb = average_precision_score(y_test, y_xgb_prob)
ap_nn = average_precision_score(y_test, y_nn_prob)

precision, recall, _ = precision_recall_curve(y_test, y_dummy_prob)
plt.plot(recall, precision, label=f"Dummy (AP = {ap_dummy:.4f})")

precision, recall, _ = precision_recall_curve(y_test, y_rf_prob)
plt.plot(recall, precision, label=f"Random Forest (AP = {ap_rf:.4f})")

precision, recall, _ = precision_recall_curve(y_test, y_xgb_prob)
plt.plot(recall, precision, label=f"XGBoost (AP = {ap_xgb:.4f})")

precision, recall, _ = precision_recall_curve(y_test, y_nn_prob)
plt.plot(recall, precision, label=f"Neural Network (AP = {ap_nn:.4f})")

plt.axhline(
    y=sum(y_test) / len(y_test),
    color="k",
    linestyle="--",
    label=f"No Skill (AP = {sum(y_test) / len(y_test):.4f})",
)

nn_precision, nn_recall, pr_thresholds = precision_recall_curve(y_test, y_nn_prob)
reversed_thresholds = pr_thresholds[::-1]
idx = np.minimum(
    np.searchsorted(reversed_thresholds, optimal_threshold), len(nn_precision) - 2
)
plt.scatter([nn_recall[idx]], [nn_precision[idx]], color="red", s=100, zorder=5)
plt.annotate(
    f"Optimized Threshold Precision={nn_precision[idx]:.3f}, Recall={nn_recall[idx]:.3f}",
    (nn_recall[idx], nn_precision[idx]),
    xytext=(0.5, 0.5),
    arrowprops=dict(arrowstyle="->", color="black"),
)

plt.title("Precision-Recall Curve Comparison", fontsize=15)
plt.xlabel("Recall", fontsize=12)
plt.ylabel("Precision", fontsize=12)
plt.xlim([0, 1])
plt.ylim([0, 1.05])
plt.grid(True, alpha=0.3)
plt.legend(loc="lower left", fontsize=12)
plt.tight_layout()
plt.show()

def identify_text_features():
    """Identifies text-related features in the dataset.

    Returns:
        list: A list of column names that are related to text features.
    """
    text_features = [
        col
        for col in x_train.columns
        if any(
            x in col.lower()
            for x in [
                "string",
                "text",
                "url",
                "registry",
                "email",
                "path",
                "unicode",
                "base64",
                "hex",
            ]
        )
    ]

    print(f"Identified {len(text_features)} text-related features:")
    print(text_features[:10])
    if len(text_features) > 10:
        print(f"...and {len(text_features) - 10} more")

    return text_features


def prepare_datasets_without_text(text_features):
    """Creates versions of training and test sets without text features.

    Args:
        text_features: List of text-related feature names to remove

    Returns:
        tuple: (x_train_no_text, x_test_no_text) datasets without text features
    """
    x_train_no_text = x_train_scaled.copy()
    x_train_no_text = np.delete(
        x_train_no_text,
        [
            x_train.columns.get_loc(col)
            for col in text_features
            if col in x_train.columns
        ],
        axis=1,
    )

    x_test_no_text = x_test_scaled.copy()
    x_test_no_text = np.delete(
        x_test_no_text,
        [x_test.columns.get_loc(col) for col in text_features if col in x_test.columns],
        axis=1,
    )

    print(f"\nFeature dimensions after removing text features:")
    print(f"Original: {x_train_scaled.shape[1]} features")
    print(f"Without text: {x_train_no_text.shape[1]} features")
    print(f"Removed {x_train_scaled.shape[1] - x_train_no_text.shape[1]} text features")

    return x_train_no_text, x_test_no_text


def train_model_without_text(x_train_no_text):
    """Trains an XGBoost model without text features and evaluates performance.

    Args:
        x_train_no_text: Training data without text features

    Returns:
        dict: Performance metrics for the model without text features
    """
    xgb_no_text = xgb.XGBClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        objective="binary:logistic",
        scale_pos_weight=class_weight_dict[1] / class_weight_dict[0],
        random_state=RANDOM_SEED,
        n_jobs=-1,
    )

    xgb_no_text.fit(x_train_no_text, y_train)

    print("Model without text features has been trained.")
    print("Performance would typically be evaluated here.")

    return {
        "model": "XGBoost (No Text Features)",
        "note": "Trained but not evaluated - placeholder for text feature analysis",
    }

class WarmupCosineDecayScheduler(tf.keras.callbacks.Callback):
    """Learning rate scheduler with warmup and cosine decay.

    Implements a learning rate schedule that starts with a warm-up period
    followed by cosine decay to a minimum learning rate.
    """

    def __init__(
        self,
        initial_lr=0.001,
        min_lr=1e-6,
        warmup_epochs=5,
        decay_epochs=95,
    ):
        """Initialize the learning rate scheduler with warmup and cosine decay.

        Args:
            initial_lr (float): Initial learning rate. Default is 0.001.
            min_lr (float): Minimum learning rate after decay. Default is 1e-6.
            warmup_epochs (int): Number of epochs for the warmup phase. Default is 5.
            decay_epochs (int): Number of epochs for the decay phase. Default is 95.
        """
        super(WarmupCosineDecayScheduler, self).__init__()
        self.initial_lr = initial_lr
        self.min_lr = min_lr
        self.warmup_epochs = warmup_epochs
        self.decay_epochs = decay_epochs

    def on_epoch_begin(self, epoch, _=None):
        """Update learning rate at the beginning of each epoch.

        Args:
            epoch (int): Current epoch number
            _ (dict, optional): Unused parameter for compatibility with Keras callback API.
        """
        if epoch < self.warmup_epochs:
            lr = self.initial_lr * ((epoch + 1) / self.warmup_epochs)
        else:
            progress = (epoch - self.warmup_epochs) / self.decay_epochs
            cosine_decay = 0.5 * (1 + np.cos(np.pi * min(1.0, progress)))
            lr = self.min_lr + (self.initial_lr - self.min_lr) * cosine_decay

        if hasattr(self.model.optimizer, "learning_rate"):
            self.model.optimizer.learning_rate.assign(lr)
        else:
            tf.keras.backend.set_value(self.model.optimizer.lr, lr)

        if (epoch + 1) % 5 == 0:
            logger.info(f"Current learning rate: {lr:.6f}")


def find_optimal_threshold_v2(y_true, y_pred_proba, target_fpr=0.01):
    """Find threshold that achieves target false positive rate.

    Args:
        y_true (array-like): Ground truth labels
        y_pred_proba (array-like): Predicted probabilities
        target_fpr (float): Target false positive rate. Default is 0.01.

    Returns:
        float: Optimal threshold value
    """
    threshold_values = np.linspace(0, 1, 1000)
    best_threshold = 0.5
    best_fpr_diff = float("inf")

    for threshold in threshold_values:
        y_pred = np.array(y_pred_proba >= threshold, dtype=np.int32)

        fp_count = np.sum((y_pred == 1) & (y_true == 0))
        tn_count = np.sum((y_pred == 0) & (y_true == 0))

        current_fpr = (
            fp_count / (fp_count + tn_count) if (fp_count + tn_count) > 0 else 0
        )

        fpr_diff = abs(current_fpr - target_fpr)
        if fpr_diff < best_fpr_diff:
            best_fpr_diff = fpr_diff
            best_threshold = threshold

    return best_threshold


def weighted_binary_crossentropy(y_true, y_pred):
    """Custom loss function with adaptive weights for precision-recall balance.

    Args:
        y_true (tensor): Ground truth labels
        y_pred (tensor): Predicted probabilities

    Returns:
        tensor: Weighted binary cross-entropy loss
    """
    y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)

    bce = -(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))

    pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
    focal_weight = tf.pow(1 - pt, 1.5)

    weights = y_true + 3.0 * (1.0 - y_true)

    weighted_bce = weights * focal_weight * bce

    return tf.reduce_mean(weighted_bce)


def build_final_nn_model(
    x_train_data,
    y_train_data,
    x_val_data,
    y_val_data,
    class_weights_dict,
    input_dimension_size,
):
    """Builds and trains the final neural network model with optimized architecture.

    Args:
        x_train_data (array-like): Training features
        y_train_data (array-like): Training labels
        x_val_data (array-like): Validation features
        y_val_data (array-like): Validation labels
        class_weights_dict (dict): Class weights for imbalanced data
        input_dimension_size (int): Input dimension size

    Returns:
        tuple: (trained model, training history)
    """
    inputs = Input(shape=(input_dimension_size,))

    x = Dense(512, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(
        inputs
    )
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    skip = x
    x = Dense(512, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.25)(x)
    x = Dense(512, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(x)
    x = BatchNormalization()(x)
    x = tf.keras.layers.Add()([x, skip])

    x = Dense(256, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)

    x = Dense(128, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.15)(x)

    x = Dense(64, activation="relu", kernel_regularizer=l1_l2(l1=1e-6, l2=1e-5))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.1)(x)

    outputs = Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=weighted_binary_crossentropy,
        metrics=[
            "accuracy",
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc"),
        ],
    )

    callbacks_list = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_auc",
            mode="max",
            patience=25,
            restore_best_weights=True,
            verbose=1,
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath="../models/nn_malware_final.keras",
            monitor="val_auc",
            mode="max",
            save_best_only=True,
            verbose=1,
        ),
        WarmupCosineDecayScheduler(
            initial_lr=0.001, min_lr=1e-6, warmup_epochs=5, decay_epochs=95
        ),
        FalsePositiveMonitor(validation_data=(x_val_data, y_val_data), print_freq=5),
    ]

    logger.info("Phase 1: Initial training with balanced objective")
    history1 = model.fit(
        x_train_data,
        y_train_data,
        epochs=75,
        batch_size=64,
        validation_data=(x_val_data, y_val_data),
        class_weight=class_weights_dict,
        callbacks=callbacks_list,
        verbose=1,
    )

    logger.info("Phase 2: Fine-tuning with emphasis on precision")
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss=lambda y_true, y_pred: weighted_binary_crossentropy(y_true, y_pred) * 1.5,
        metrics=[
            "accuracy",
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc"),
        ],
    )

    fine_tuning_callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_precision",
            mode="max",
            patience=15,
            restore_best_weights=True,
            verbose=1,
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath="../models/nn_malware_final_phase2.keras",
            monitor="val_precision",
            mode="max",
            save_best_only=True,
            verbose=1,
        ),
        FalsePositiveMonitor(validation_data=(x_val_data, y_val_data), print_freq=5),
    ]

    history2 = model.fit(
        x_train_data,
        y_train_data,
        epochs=25,
        batch_size=128,
        validation_data=(x_val_data, y_val_data),
        class_weight={
            0: class_weights_dict[0] * 1.5,
            1: class_weights_dict[1],
        },
        callbacks=fine_tuning_callbacks,
        verbose=1,
    )

    combined_history = {}
    for hist_key in history1.history:
        if hist_key in history2.history:
            combined_history[hist_key] = (
                history1.history[hist_key] + history2.history[hist_key]
            )
        else:
            combined_history[hist_key] = history1.history[hist_key]

    return model, combined_history


x_train_nn, x_val_nn, y_train_nn, y_val_nn = train_test_split(
    x_train_scaled,
    y_train,
    test_size=0.2,
    random_state=RANDOM_SEED,
    stratify=y_train,
)

tf.keras.backend.clear_session()

nn_input_dims = x_train_nn.shape[1]
logger.info(f"Starting model training with input dimension: {nn_input_dims}")

final_model, final_history = build_final_nn_model(
    x_train_nn,
    y_train_nn,
    x_val_nn,
    y_val_nn,
    class_weight_dict,
    nn_input_dims,
)

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(final_history["loss"], label="Train Loss")
plt.plot(final_history["val_loss"], label="Validation Loss")
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(final_history["precision"], label="Train Precision")
plt.plot(final_history["val_precision"], label="Validation Precision")
plt.title("Precision (False Positive Control)")
plt.xlabel("Epoch")
plt.ylabel("Precision")
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(final_history["recall"], label="Train Recall")
plt.plot(final_history["val_recall"], label="Validation Recall")
plt.title("Recall")
plt.xlabel("Epoch")
plt.ylabel("Recall")
plt.legend()

plt.tight_layout()
plt.show()

logger.info("Making predictions with the final model...")
y_final_prob = final_model.predict(x_test_scaled).flatten()

final_optimal_threshold = find_optimal_threshold_v2(
    y_test, y_final_prob, target_fpr=0.01
)
logger.info(f"Optimal threshold for 1% FPR: {final_optimal_threshold:.4f}")

y_final_pred = np.array(y_final_prob >= final_optimal_threshold, dtype=np.int32)

2025-05-18 17:38:56,968 - __main__ - INFO - Starting model training with input dimension: 515
2025-05-18 17:38:57,293 - __main__ - INFO - Phase 1: Initial training with balanced objective
Epoch 1/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 47ms/step - accuracy: 0.8393 - auc: 0.9167 - loss: 0.3264 - precision: 0.9147 - recall: 0.8158
Epoch 1: val_auc improved from -inf to 0.97715, saving model to ../models/nn_malware_final.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 18s 63ms/step - accuracy: 0.8395 - auc: 0.9169 - loss: 0.3260 - precision: 0.9149 - recall: 0.8160 - val_accuracy: 0.9239 - val_auc: 0.9772 - val_loss: 0.1922 - val_precision: 0.9765 - val_recall: 0.8987
Epoch 2/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step - accuracy: 0.9120 - auc: 0.9709 - loss: 0.1948 - precision: 0.9646 - recall: 0.8903
Epoch 2: val_auc improved from 0.97715 to 0.98136, saving model to ../models/nn_malware_final.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 9s 49ms/step - accuracy: 0.9120 - auc: 0.9709 - loss: 0.1948 - precision: 0.9646 - recall: 0.8903 - val_accuracy: 0.9324 - val_auc: 0.9814 - val_loss: 0.1665 - val_precision: 0.9804 - val_recall: 0.9089
Epoch 3/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step - accuracy: 0.9136 - auc: 0.9740 - loss: 0.1805 - precision: 0.9616 - recall: 0.8960
Epoch 3: val_auc did not improve from 0.98136
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 10s 55ms/step - accuracy: 0.9136 - auc: 0.9740 - loss: 0.1805 - precision: 0.9616 - recall: 0.8959 - val_accuracy: 0.9246 - val_auc: 0.9798 - val_loss: 0.1662 - val_precision: 0.9777 - val_recall: 0.8987
Epoch 4/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 48ms/step - accuracy: 0.9132 - auc: 0.9743 - loss: 0.1778 - precision: 0.9643 - recall: 0.8926
Epoch 4: val_auc did not improve from 0.98136
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 10s 54ms/step - accuracy: 0.9132 - auc: 0.9743 - loss: 0.1778 - precision: 0.9643 - recall: 0.8926 - val_accuracy: 0.9239 - val_auc: 0.9773 - val_loss: 0.1852 - val_precision: 0.9759 - val_recall: 0.8993
2025-05-18 17:39:44,102 - __main__ - INFO - Current learning rate: 0.001000
Epoch 5/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 40ms/step - accuracy: 0.9135 - auc: 0.9741 - loss: 0.1760 - precision: 0.9626 - recall: 0.8948
Epoch 5: val_auc did not improve from 0.98136
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
Epoch 5: False Positive Rate = 0.0295
178/178 ━━━━━━━━━━━━━━━━━━━━ 8s 47ms/step - accuracy: 0.9135 - auc: 0.9741 - loss: 0.1760 - precision: 0.9627 - recall: 0.8947 - val_accuracy: 0.9285 - val_auc: 0.9751 - val_loss: 0.2069 - val_precision: 0.9802 - val_recall: 0.9027
Epoch 6/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step - accuracy: 0.9047 - auc: 0.9720 - loss: 0.1814 - precision: 0.9595 - recall: 0.8831
Epoch 6: val_auc did not improve from 0.98136
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 10s 52ms/step - accuracy: 0.9047 - auc: 0.9720 - loss: 0.1814 - precision: 0.9596 - recall: 0.8831 - val_accuracy: 0.9285 - val_auc: 0.9775 - val_loss: 0.1889 - val_precision: 0.9814 - val_recall: 0.9015
Epoch 7/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step - accuracy: 0.9081 - auc: 0.9731 - loss: 0.1805 - precision: 0.9592 - recall: 0.8892
Epoch 7: val_auc improved from 0.98136 to 0.98166, saving model to ../models/nn_malware_final.keras
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 9s 51ms/step - accuracy: 0.9082 - auc: 0.9731 - loss: 0.1805 - precision: 0.9592 - recall: 0.8892 - val_accuracy: 0.9303 - val_auc: 0.9817 - val_loss: 0.1664 - val_precision: 0.9791 - val_recall: 0.9067
Epoch 8/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step - accuracy: 0.9142 - auc: 0.9724 - loss: 0.1823 - precision: 0.9639 - recall: 0.8946
Epoch 8: val_auc did not improve from 0.98166
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 9s 48ms/step - accuracy: 0.9142 - auc: 0.9724 - loss: 0.1823 - precision: 0.9639 - recall: 0.8946 - val_accuracy: 0.9271 - val_auc: 0.9775 - val_loss: 0.1775 - val_precision: 0.9796 - val_recall: 0.9010
Epoch 9/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 49ms/step - accuracy: 0.9146 - auc: 0.9739 - loss: 0.1794 - precision: 0.9660 - recall: 0.8932
Epoch 9: val_auc did not improve from 0.98166
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 10s 54ms/step - accuracy: 0.9146 - auc: 0.9739 - loss: 0.1795 - precision: 0.9660 - recall: 0.8932 - val_accuracy: 0.9282 - val_auc: 0.9800 - val_loss: 0.1709 - val_precision: 0.9820 - val_recall: 0.9004
2025-05-18 17:40:29,969 - __main__ - INFO - Current learning rate: 0.000996
Epoch 10/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step - accuracy: 0.9120 - auc: 0.9709 - loss: 0.1883 - precision: 0.9624 - recall: 0.8924
Epoch 10: val_auc did not improve from 0.98166
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Epoch 10: False Positive Rate = 0.0305
178/178 ━━━━━━━━━━━━━━━━━━━━ 9s 49ms/step - accuracy: 0.9120 - auc: 0.9709 - loss: 0.1883 - precision: 0.9624 - recall: 0.8924 - val_accuracy: 0.9271 - val_auc: 0.9804 - val_loss: 0.1676 - val_precision: 0.9796 - val_recall: 0.9010
Epoch 11/75
178/178 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step - accuracy: 0.9084 - auc: 0.9721 - loss: 0.1873 - precision: 0.9624 - recall: 0.8865
Epoch 11: val_auc did not improve from 0.98166
89/89 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
178/178 ━━━━━━━━━━━━━━━━━━━━ 8s 47ms/step - accuracy: 0.9084 - auc: 0.9721 - loss: 0.1873 - precision: 0.9624 - recall: 0.8865 - val_accuracy: 0.9275 - val_auc: 0.9798 - val_loss: 0.1719 - val_precision: 0.9820 - val_recall: 0.8993
Epoch 12/75
 90/178 ━━━━━━━━━━━━━━━━━━━━ 3s 40ms/step - accuracy: 0.9176 - auc: 0.9740 - loss: 0.1853 - precision: 0.9651 - recall: 0.8997

Exception ignored in: <function ResourceTracker.__del__ at 0x1051de200>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes

 96/178 ━━━━━━━━━━━━━━━━━━━━ 3s 42ms/step - accuracy: 0.9172 - auc: 0.9738 - loss: 0.1859 - precision: 0.9648 - recall: 0.8991

Exception ignored in: <function ResourceTracker.__del__ at 0x105896200>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes

106/178 ━━━━━━━━━━━━━━━━━━━━ 3s 44ms/step - accuracy: 0.9166 - auc: 0.9735 - loss: 0.1866 - precision: 0.9646 - recall: 0.8982

Exception ignored in: <function ResourceTracker.__del__ at 0x10372a200>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes

111/178 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - accuracy: 0.9163 - auc: 0.9734 - loss: 0.1869 - precision: 0.9645 - recall: 0.8977

Exception ignored in: <function ResourceTracker.__del__ at 0x106a6e200>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes

118/178 ━━━━━━━━━━━━━━━━━━━━ 2s 44ms/step - accuracy: 0.9159 - auc: 0.9733 - loss: 0.1873 - precision: 0.9644 - recall: 0.8972

Exception ignored in: <function ResourceTracker.__del__ at 0x106d3e200>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/homebrew/Cellar/python@3.12/3.12.10/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes

final_results = evaluate_model_performance(
    y_test,
    y_final_pred,
    y_final_prob,
    "Final Neural Network (Optimized for False Positives)",
)

final_comparison = pd.DataFrame(
    [
        {
            "Model": "Dummy Classifier",
            "Accuracy": dummy_results["accuracy"],
            "Precision": dummy_results["precision"],
            "Recall": dummy_results["recall"],
            "F1": dummy_results["f1"],
            "FPR": dummy_results["fpr"],
            "AUC": dummy_results["auc"],
        },
        {
            "Model": "Random Forest",
            "Accuracy": rf_results["accuracy"],
            "Precision": rf_results["precision"],
            "Recall": rf_results["recall"],
            "F1": rf_results["f1"],
            "FPR": rf_results["fpr"],
            "AUC": rf_results["auc"],
        },
        {
            "Model": "XGBoost",
            "Accuracy": xgb_results["accuracy"],
            "Precision": xgb_results["precision"],
            "Recall": xgb_results["recall"],
            "F1": xgb_results["f1"],
            "FPR": xgb_results["fpr"],
            "AUC": xgb_results["auc"],
        },
        {
            "Model": "Neural Network (Default)",
            "Accuracy": nn_results_default["accuracy"],
            "Precision": nn_results_default["precision"],
            "Recall": nn_results_default["recall"],
            "F1": nn_results_default["f1"],
            "FPR": nn_results_default["fpr"],
            "AUC": nn_results_default["auc"],
        },
        {
            "Model": "Final NN (FP-Optimized)",
            "Accuracy": final_results["accuracy"],
            "Precision": final_results["precision"],
            "Recall": final_results["recall"],
            "F1": final_results["f1"],
            "FPR": final_results["fpr"],
            "AUC": final_results["auc"],
        },
    ]
)

display(Markdown("### Final Model Comparison:"))
display(final_comparison)

final_model.save("../models/nn_malware_final_complete.keras")
logger.info("Final model saved in Keras format")

2025-05-18 17:49:39,844 - windows_malware_classifier.visualization.models_plots - INFO - Final Neural Network (Optimized for False Positives) Performance Metrics:
2025-05-18 17:49:39,846 - windows_malware_classifier.visualization.models_plots - INFO - Accuracy: 0.8718
2025-05-18 17:49:39,848 - windows_malware_classifier.visualization.models_plots - INFO - Precision: 0.9924 (indicates false positive control)
2025-05-18 17:49:39,850 - windows_malware_classifier.visualization.models_plots - INFO - Recall: 0.7988
2025-05-18 17:49:39,852 - windows_malware_classifier.visualization.models_plots - INFO - F1 Score: 0.8851
2025-05-18 17:49:39,853 - windows_malware_classifier.visualization.models_plots - INFO - False Positive Rate: 0.0100
2025-05-18 17:49:39,854 - windows_malware_classifier.visualization.models_plots - INFO - True Positive Rate: 0.7988
2025-05-18 17:49:39,854 - windows_malware_classifier.visualization.models_plots - INFO - AUC: 0.9803

2025-05-18 17:49:40,115 - __main__ - INFO - Final model saved in Keras format

final_model.save("../models/final_malware_detector.keras")

model_summary = {
    "Model": "Neural Network (Optimized for False Positives)",
    "Input Features": x_train_scaled.shape[1],
    "Hidden Layers": 4,
    "Accuracy": final_results["accuracy"],
    "Precision": final_results["precision"],
    "Recall": final_results["recall"],
    "F1 Score": final_results["f1"],
    "AUC": final_results["auc"],
    "False Positive Rate": final_results["fpr"],
    "Classification Threshold": final_optimal_threshold,
    "Random Seed": RANDOM_SEED,
}

with open("../models/model_summary.txt", "w") as summary_file:
    for key, value in model_summary.items():
        summary_file.write(f"{key}: {value}\n")

logger.info("Model artifacts saved to ../models/ directory")

2025-05-18 17:50:22,545 - __main__ - INFO - Model artifacts saved to ../models/ directory

	Model	Accuracy	Precision	Recall	F1	FPR
0	Dummy Classifier	0.524081	0.615306	0.615096	0.615201	0.623477
1	Random Forest	0.965146	0.977862	0.965505	0.971645	0.035437
2	XGBoost	0.979299	0.984921	0.981557	0.983236	0.024363

	model	accuracy	precision	recall	f1	fpr	tpr	auc
0	Dummy Classifier (Random Baseline)	0.524081	0.615306	0.615096	0.615201	0.623477	0.615096	0.495809
1	Random Forest (Grid Search)	0.965146	0.977862	0.965505	0.971645	0.035437	0.965505	0.995438
2	XGBoost	0.979299	0.984921	0.981557	0.983236	0.024363	0.981557	0.997825
3	Neural Network (Default Threshold)	0.932193	0.974863	0.913934	0.943416	0.038206	0.913934	0.982353
4	Neural Network (Optimized Threshold)	0.935784	0.967903	0.926913	0.946964	0.049834	0.926913	0.982353

Technique	Description	Advantages	Challenges
Byte Histograms	Frequency distribution of byte values (0-255) in the binary	Simple to implement, captures basic statistical properties	Limited semantic understanding, weak against obfuscation
N-gram Analysis	Frequency of sequences of n consecutive bytes or opcodes	Captures sequence patterns and common signatures	High dimensionality, sensitive to minor byte changes
Entropy Analysis	Measure of randomness in different sections of the binary	Effective at detecting encryption, packing, and obfuscation	Provides limited information by itself, needs context
Image-based Representation	Conversion of binary data to grayscale images for visual pattern recognition	Leverages CNN power for pattern recognition, resistant to small changes	Requires significant preprocessing, computationally expensive
Opcode Sequences	Extraction and analysis of CPU instruction sequences	Reveals actual program behavior and malicious instructions	Requires disassembly which can be imprecise for obfuscated code
Control Flow Graphs	Structural representation of code execution paths	Captures program structure and behavioral patterns	Complex to generate, high computational overhead
Transformer-based Models	Tokenization of raw binary data as sequence input to transformer architectures	Captures long-range dependencies and context, handles variable-length inputs, state-of-the-art performance	Computationally expensive, requires large amounts of training data, black-box nature limits interpretability

	Model	Accuracy	Precision	Recall	F1	FPR	AUC
0	Dummy Classifier	0.524081	0.615306	0.615096	0.615201	0.623477	0.495809
1	Random Forest	0.965146	0.977862	0.965505	0.971645	0.035437	0.995438
2	XGBoost	0.979299	0.984921	0.981557	0.983236	0.024363	0.997825
3	Neural Network (Default)	0.932193	0.974863	0.913934	0.943416	0.038206	0.982353
4	Final NN (FP-Optimized)	0.871779	0.992363	0.798839	0.885147	0.009967	0.980350

03. Modeling and Evaluation¶

Success Metrics¶

Modeling Approach¶

Data Loading and Preparation¶

Baseline Models¶

Dummy Classifier (Random Baseline)¶

Tree-Based Models (Stronger Baseline)¶

Random Forest and XGBoost¶

Baseline Models Summary:¶

Neural Network Model Development¶

Neural Network Architecture¶

Training with False Positive Focus¶

Threshold Optimization for False Positive Reduction¶

Model Evaluation and Comparison¶

Model Performance Comparison:¶

Analysis of Text Features¶

Binary Data Representation Analysis¶

Binary Data Representation Techniques for Malware Detection¶

Final Model Comparison:¶

Conclusion and Recommendations¶