initial commit

2026-06-03 23:24:12 +02:00
commit accd42068d
152 changed files with 100216 additions and 0 deletions
@@ -0,0 +1,294 @@
+# =============================================================================
+# ANALYSE TRADEOFF : Impact de Jobs, Tracks, Site sur NbBranch et TimeRatio
+# Approche en deux étapes :
+#   Étape 1 : Modéliser les ÉCHECS (régression logistique)
+#   Étape 2 : Sur les cas non-échoués, modéliser NbBranch et TimeRatio
+# =============================================================================
+
+# --- Packages -----------------------------------------------------------------
+pkgs <- c("dplyr", "ggplot2", "tidyr", "lme4", "MASS", "nnet", "pROC",
+          "ggeffects", "patchwork", "broom")
+for (p in pkgs) if (!require(p, character.only = TRUE, quietly = TRUE))
+  install.packages(p)
+
+library(dplyr); library(ggplot2); library(tidyr); library(MASS)
+library(nnet); library(pROC); library(ggeffects); library(patchwork); library(broom)
+
+# --- 1. Lecture et préparation ------------------------------------------------
+data_raw <- read.csv2("export_tradeoff.csv", header = TRUE,
+                      stringsAsFactors = FALSE)
+
+# Conversion numérique
+data_raw <- data_raw %>%
+  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
+
+# Variables factorielles (Site, Jobs, Tracks sont des niveaux discrets)
+data_raw <- data_raw %>%
+  mutate(
+    Site_f    = factor(Site),
+    Jobs_f    = factor(Jobs),
+    Tracks_f  = factor(Tracks)
+  )
+
+cat("=== APERÇU DES DONNÉES ===\n")
+cat("Dimensions :", nrow(data_raw), "lignes x", ncol(data_raw), "colonnes\n")
+cat("\nValeurs de Jobs   :", sort(unique(data_raw$Jobs)), "\n")
+cat("Valeurs de Tracks :", sort(unique(data_raw$Tracks)), "\n")
+cat("Valeurs de Site   :", sort(unique(data_raw$Site)), "\n")
+cat("Valeurs de Rames  :", sort(unique(data_raw$Rames)), "\n")
+
+# =============================================================================
+# ÉTAPE 0 : Vue d'ensemble des taux d'échec
+# =============================================================================
+data_raw <- data_raw %>%
+  mutate(Echec = ifelse(NbBranch == -1, 1L, 0L))
+
+cat("\n=== TAUX D'ÉCHEC GLOBAL ===\n")
+cat(sprintf("Échecs : %d / %d (%.1f%%)\n",
+            sum(data_raw$Echec), nrow(data_raw),
+            100 * mean(data_raw$Echec)))
+
+# Taux d'échec par facteur
+for (var in c("Jobs_f", "Tracks_f", "Site_f")) {
+  cat(sprintf("\n--- Taux d'échec par %s ---\n", var))
+  tbl <- data_raw %>%
+    group_by(.data[[var]]) %>%
+    summarise(N = n(), Echecs = sum(Echec),
+              Taux = round(100 * mean(Echec), 1), .groups = "drop")
+  print(tbl)
+}
+
+# =============================================================================
+# ÉTAPE 1 : MODÉLISATION DES ÉCHECS (Régression logistique)
+# =============================================================================
+cat("\n\n========== ÉTAPE 1 : MODÈLE LOGISTIQUE (Échec vs Succès) ==========\n")
+
+# Modèle avec Jobs, Tracks, Site comme facteurs
+mod_logit <- glm(Echec ~ Jobs_f + Tracks_f + Site_f,
+                 data = data_raw, family = binomial(link = "logit"))
+
+cat("\n--- Résumé du modèle logistique ---\n")
+print(summary(mod_logit))
+
+cat("\n--- Odds-Ratios (exp(coef)) avec IC 95% ---\n")
+or_table <- exp(cbind(OR = coef(mod_logit), confint(mod_logit)))
+print(round(or_table, 3))
+
+# Test de déviance (ANOVA du modèle)
+cat("\n--- Test de déviance (importance de chaque variable) ---\n")
+print(anova(mod_logit, test = "Chisq"))
+
+# AUC / ROC
+roc_obj <- roc(data_raw$Echec, fitted(mod_logit), quiet = TRUE)
+cat(sprintf("\nAUC = %.3f (pouvoir discriminant du modèle logistique)\n", auc(roc_obj)))
+
+# Modèle avec effets d'interaction Jobs × Tracks
+mod_logit_inter <- glm(Echec ~ Jobs_f * Tracks_f + Site_f,
+                       data = data_raw, family = binomial(link = "logit"))
+cat("\n--- Comparaison modèle sans vs avec interaction Jobs×Tracks ---\n")
+print(anova(mod_logit, mod_logit_inter, test = "Chisq"))
+
+# =============================================================================
+# VISUALISATION ÉTAPE 1
+# =============================================================================
+
+# Graphe 1 : Taux d'échec moyen par Jobs et Tracks
+df_heatmap <- data_raw %>%
+  group_by(Jobs_f, Tracks_f) %>%
+  summarise(TauxEchec = mean(Echec) * 100, N = n(), .groups = "drop")
+
+p1 <- ggplot(df_heatmap, aes(x = Jobs_f, y = Tracks_f, fill = TauxEchec)) +
+  geom_tile(color = "white", linewidth = 0.5) +
+  geom_text(aes(label = sprintf("%.0f%%\n(n=%d)", TauxEchec, N)), size = 3) +
+  scale_fill_gradient2(low = "steelblue", mid = "lightyellow", high = "firebrick",
+                       midpoint = 50, name = "% Échec") +
+  labs(title = "Taux d'échec (%) par Jobs et Tracks",
+       x = "Jobs", y = "Tracks") +
+  theme_minimal(base_size = 12)
+
+# Graphe 2 : Taux d'échec par Site
+df_site <- data_raw %>%
+  group_by(Site_f) %>%
+  summarise(TauxEchec = mean(Echec) * 100, N = n(),
+            se = sqrt(mean(Echec) * (1 - mean(Echec)) / n()), .groups = "drop")
+
+p2 <- ggplot(df_site, aes(x = Site_f, y = TauxEchec, fill = Site_f)) +
+  geom_col(width = 0.6) +
+  geom_errorbar(aes(ymin = TauxEchec - 1.96 * se * 100,
+                    ymax = TauxEchec + 1.96 * se * 100), width = 0.2) +
+  scale_fill_brewer(palette = "Set2", guide = "none") +
+  labs(title = "Taux d'échec par Site",
+       x = "Site", y = "% Échec") +
+  theme_minimal(base_size = 12)
+
+# Graphe 3 : Probabilités prédites par le modèle logistique
+pred_data <- expand.grid(
+  Jobs_f   = levels(data_raw$Jobs_f),
+  Tracks_f = levels(data_raw$Tracks_f),
+  Site_f   = levels(data_raw$Site_f)
+) %>%
+  mutate(prob_echec = predict(mod_logit, newdata = ., type = "response"))
+
+p3 <- ggplot(pred_data, aes(x = Jobs_f, y = prob_echec, color = Tracks_f, group = Tracks_f)) +
+  geom_line(linewidth = 1) +
+  geom_point(size = 2) +
+  facet_wrap(~ Site_f, labeller = label_both) +
+  scale_color_brewer(palette = "RdYlGn", name = "Tracks", direction = -1) +
+  labs(title = "Probabilité d'échec prédite (modèle logistique)",
+       x = "Jobs", y = "P(Échec)") +
+  theme_minimal(base_size = 11) +
+  theme(legend.position = "right")
+
+# =============================================================================
+# ÉTAPE 2a : Sous-ensemble des non-échecs
+# =============================================================================
+data_ok <- data_raw %>%
+  filter(NbBranch != -1, TimeRatio != -1) %>%
+  mutate(
+    Statut = case_when(
+      TimeRatio <= 1 ~ "Succès",
+      TimeRatio >  1 ~ "Semi-échec"
+    ),
+    Statut_f    = factor(Statut, levels = c("Succès", "Semi-échec")),
+    logTimeRatio = log(TimeRatio)
+  )
+
+cat(sprintf("\n\nSous-ensemble non-échoués : %d lignes\n", nrow(data_ok)))
+cat(sprintf("  Succès     (TimeRatio ≤ 1) : %d (%.1f%%)\n",
+            sum(data_ok$Statut == "Succès"),
+            100 * mean(data_ok$Statut == "Succès")))
+cat(sprintf("  Semi-échec (TimeRatio > 1)  : %d (%.1f%%)\n",
+            sum(data_ok$Statut == "Semi-échec"),
+            100 * mean(data_ok$Statut == "Semi-échec")))
+
+# =============================================================================
+# ÉTAPE 2b : MODÈLE POUR NbBranch (régression binomiale négative)
+# =============================================================================
+cat("\n\n========== ÉTAPE 2b : NbBranch (régression binomiale négative) ==========\n")
+
+mod_nb <- glm.nb(NbBranch ~ Jobs_f + Tracks_f + Site_f, data = data_ok)
+print(summary(mod_nb))
+
+cat("\n--- IRR (Incidence Rate Ratio = exp(coef)) avec IC 95% ---\n")
+irr_table <- exp(cbind(IRR = coef(mod_nb), confint(mod_nb)))
+print(round(irr_table, 3))
+
+cat("\n--- Test de déviance (importance de chaque variable) ---\n")
+print(anova(mod_nb))
+
+# =============================================================================
+# ÉTAPE 2c : MODÈLE POUR TimeRatio (régression log-linéaire)
+# =============================================================================
+cat("\n\n========== ÉTAPE 2c : TimeRatio (régression log-linéaire) ==========\n")
+
+mod_tr <- lm(logTimeRatio ~ Jobs_f + Tracks_f + Site_f, data = data_ok)
+print(summary(mod_tr))
+
+cat("\n--- ANOVA (importance de chaque variable) ---\n")
+print(anova(mod_tr))
+
+cat("\n--- Effets sur l'échelle originale (exp(coef)) ---\n")
+coef_table <- exp(cbind(Multiplicateur = coef(mod_tr),
+                        confint(mod_tr)))
+print(round(coef_table, 3))
+
+# =============================================================================
+# ÉTAPE 2d : DISTINCTION SUCCÈS vs SEMI-ÉCHEC (régression logistique)
+# =============================================================================
+cat("\n\n========== ÉTAPE 2d : Succès vs Semi-échec (logistique) ==========\n")
+
+mod_logit2 <- glm(Statut_f ~ Jobs_f + Tracks_f + Site_f,
+                  data = data_ok, family = binomial(link = "logit"))
+print(summary(mod_logit2))
+
+cat("\n--- Odds-Ratios ---\n")
+or2 <- exp(cbind(OR = coef(mod_logit2), confint(mod_logit2)))
+print(round(or2, 3))
+
+# =============================================================================
+# VISUALISATION ÉTAPE 2
+# =============================================================================
+
+# Graphe 4 : Distribution de TimeRatio par Jobs et Tracks (non-échecs)
+p4 <- ggplot(data_ok, aes(x = Jobs_f, y = TimeRatio, fill = Jobs_f)) +
+  geom_boxplot(outlier.size = 1, alpha = 0.7) +
+  geom_hline(yintercept = 1, linetype = "dashed", color = "red", linewidth = 0.8) +
+  facet_wrap(~ Tracks_f, labeller = label_both, nrow = 1) +
+  scale_fill_brewer(palette = "Blues", guide = "none") +
+  coord_cartesian(ylim = c(0, quantile(data_ok$TimeRatio, 0.95))) +
+  labs(title = "TimeRatio par Jobs et Tracks (cas non-échoués)\n[ligne rouge = seuil 1 : succès / semi-échec]",
+       x = "Jobs", y = "TimeRatio") +
+  theme_minimal(base_size = 11)
+
+# Graphe 5 : Distribution de NbBranch par Jobs et Tracks
+p5 <- ggplot(data_ok, aes(x = Jobs_f, y = NbBranch, fill = Jobs_f)) +
+  geom_boxplot(outlier.size = 1, alpha = 0.7) +
+  facet_wrap(~ Tracks_f, labeller = label_both, nrow = 1) +
+  scale_fill_brewer(palette = "Greens", guide = "none") +
+  labs(title = "NbBranch par Jobs et Tracks (cas non-échoués)",
+       x = "Jobs", y = "NbBranch") +
+  theme_minimal(base_size = 11)
+
+# Graphe 6 : % semi-échec par Jobs et Tracks
+df_semi <- data_ok %>%
+  group_by(Jobs_f, Tracks_f) %>%
+  summarise(PctSemi = 100 * mean(Statut == "Semi-échec"), N = n(), .groups = "drop")
+
+p6 <- ggplot(df_semi, aes(x = Jobs_f, y = Tracks_f, fill = PctSemi)) +
+  geom_tile(color = "white") +
+  geom_text(aes(label = sprintf("%.0f%%\n(n=%d)", PctSemi, N)), size = 3) +
+  scale_fill_gradient2(low = "steelblue", mid = "lightyellow", high = "orange",
+                       midpoint = 50, name = "% Semi-échec") +
+  labs(title = "% Semi-échec (TimeRatio > 1) parmi les non-échoués",
+       x = "Jobs", y = "Tracks") +
+  theme_minimal(base_size = 12)
+
+# =============================================================================
+# AFFICHAGE DES GRAPHES
+# =============================================================================
+if (interactive()) {
+  print(p1 + p2)
+  print(p3)
+  print(p4)
+  print(p5)
+  print(p6)
+}
+
+# Sauvegarde des graphes
+ggsave("plot1_taux_echec_heatmap.png",  p1, width = 8, height = 6, dpi = 150)
+ggsave("plot2_echec_par_site.png",      p2, width = 5, height = 4, dpi = 150)
+ggsave("plot3_proba_echec_predit.png",  p3, width = 10, height = 5, dpi = 150)
+ggsave("plot4_timeratio_boxplot.png",   p4, width = 12, height = 5, dpi = 150)
+ggsave("plot5_nbbranch_boxplot.png",    p5, width = 12, height = 5, dpi = 150)
+ggsave("plot6_semiechec_heatmap.png",   p6, width = 8,  height = 6, dpi = 150)
+
+# =============================================================================
+# SYNTHÈSE NUMÉRIQUE FINALE
+# =============================================================================
+cat("\n\n========== SYNTHÈSE FINALE ==========\n")
+
+cat("\n[ÉTAPE 1 - ÉCHEC]\n")
+cat("Variables significatives pour prédire l'échec (p < 0.05) :\n")
+coef_logit <- tidy(mod_logit) %>% filter(p.value < 0.05, term != "(Intercept)")
+print(coef_logit %>% select(term, estimate, p.value) %>%
+        mutate(OR = round(exp(estimate), 2), p = round(p.value, 4)))
+
+cat("\n[ÉTAPE 2b - NbBranch]\n")
+cat("Variables significatives pour NbBranch (p < 0.05) :\n")
+coef_nb <- tidy(mod_nb) %>% filter(p.value < 0.05, term != "(Intercept)")
+print(coef_nb %>% select(term, estimate, p.value) %>%
+        mutate(IRR = round(exp(estimate), 2), p = round(p.value, 4)))
+
+cat("\n[ÉTAPE 2c - TimeRatio log]\n")
+cat("Variables significatives pour log(TimeRatio) (p < 0.05) :\n")
+coef_tr <- tidy(mod_tr) %>% filter(p.value < 0.05, term != "(Intercept)")
+print(coef_tr %>% select(term, estimate, p.value) %>%
+        mutate(Multiplicateur = round(exp(estimate), 2), p = round(p.value, 4)))
+
+cat("\n[ÉTAPE 2d - Succès vs Semi-échec]\n")
+cat("Variables significatives pour Statut (p < 0.05) :\n")
+coef_l2 <- tidy(mod_logit2) %>% filter(p.value < 0.05, term != "(Intercept)")
+print(coef_l2 %>% select(term, estimate, p.value) %>%
+        mutate(OR = round(exp(estimate), 2), p = round(p.value, 4)))
+
+cat("\nScript terminé. Graphes sauvegardés dans le répertoire courant.\n")
@@ -0,0 +1,243 @@
+#include <functional>
+#include <ios>
+#include <nlohmann/json.hpp>
+
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <stdexcept>
+
+namespace fs = std::filesystem;
+using json   = nlohmann::json;
+
+// ─────────────────────────────────────────────────────────────
+//  Structure : nom d'instance décomposé depuis le nom de fichier
+//  Format : Site_SiteCr_Tracks_Rames_Jobs_ID
+// ─────────────────────────────────────────────────────────────
+
+struct InstanceKey {
+    int64_t site;
+    int64_t site_cr;
+    int64_t tracks;
+    int64_t rames;
+    int64_t jobs;
+    int64_t id;
+    std::string raw; // nom complet sans extension
+    bool operator == (const InstanceKey& key) const
+    {
+        return key.raw == raw;
+    }
+};
+
+struct HashInstanceKey
+{
+    size_t operator() (const InstanceKey& key) const{
+        return std::hash<std::string>()(key.raw);
+    }
+};
+
+InstanceKey parse_instance_name(const std::string& stem) {
+    InstanceKey k;
+    k.raw = stem;
+    std::vector<int64_t> parts;
+    std::string token;
+    for (char c : stem) {
+        if (c == '_') {
+            if (!token.empty()) { parts.push_back(std::stoll(token)); token.clear(); }
+        } else {
+            token += c;
+        }
+    }
+    if (!token.empty()) parts.push_back(std::stoll(token));
+
+    if (parts.size() != 6)
+        throw std::runtime_error("Nom d'instance invalide (attendu 6 entiers) : " + stem);
+
+    k.site    = parts[0];
+    k.site_cr = parts[1];
+    k.tracks  = parts[2];
+    k.rames   = parts[3];
+    k.jobs    = parts[4];
+    k.id      = parts[5];
+    return k;
+}
+
+struct InstanceTradeOff{
+    int64_t nbBranchBest;
+    double timeRatio;
+};
+
+// ─────────────────────────────────────────────────────────────
+//  Parse un fichier JSON et insère les résultats
+// ─────────────────────────────────────────────────────────────
+
+void process_file(const fs::path& path, std::unordered_map<InstanceKey, std::pair<unsigned int, double>, HashInstanceKey>& references, std::unordered_map<InstanceKey, std::optional<InstanceTradeOff>, HashInstanceKey>& trades) {
+    // -- Nom d'instance depuis le stem du fichier
+    InstanceKey key = parse_instance_name(path.stem().string());
+
+    // -- Lecture JSON
+    std::ifstream f(path);
+    if (!f) throw std::runtime_error("Impossible d'ouvrir : " + path.string());
+
+    json doc;
+    try { f >> doc; }
+    catch (const json::exception& e) {
+        throw std::runtime_error("JSON invalide dans " + path.string() + " : " + e.what());
+    }
+
+    if (!doc.is_object())
+        throw std::runtime_error("Le JSON doit être un objet à la racine : " + path.string());
+
+    // -- Chaque clé = un algo
+    for (auto& [algo_name, obj] : doc.items()) {
+        if (!obj.is_object()) continue;
+
+        if(algo_name == "rbs")
+        {     
+            int64_t     fct_obj   = obj.value("fctObjectif", int64_t{0});
+            double      time_f    = obj.value("timeFinal",   0.0);
+            references[key] = {
+                fct_obj,
+                time_f
+            };
+        }
+        else
+        {
+            if(trades.find(key) != trades.end() && trades[key] != std::nullopt)
+            {
+                int64_t     fct_obj   = obj.value("fctObjectif", int64_t{0});
+                int64_t     nbBranch   = obj.value("nbBranch", int64_t{0});
+
+                double      time_f    = obj.value("timeFinal",   0.0);
+                auto& tradePrec = trades[key];
+                auto& ref = references[key];
+                if(fct_obj <= ref.first && tradePrec->nbBranchBest > nbBranch)
+                {
+                    InstanceTradeOff tr = {
+                        nbBranch,
+                        time_f/ref.second
+                    };
+                    trades[key] = tr;
+                }
+            }
+            else
+            {
+                int64_t     fct_obj   = obj.value("fctObjectif", int64_t{0});
+                int64_t     nbBranch   = obj.value("nbBranch", int64_t{0});
+
+                double      time_f    = obj.value("timeFinal",   0.0);
+
+                auto& ref = references[key];
+                if(fct_obj <= ref.first)
+                {
+                    InstanceTradeOff tr = {
+                        nbBranch,
+                        time_f/ref.second
+                    };
+                    trades[key] = tr;
+                }
+                else
+                {
+                    trades[key] = std::nullopt;
+                }
+            }
+        }
+    }
+}
+
+// ─────────────────────────────────────────────────────────────
+//  Collecte récursive des .json dans un dossier
+// ─────────────────────────────────────────────────────────────
+
+std::vector<fs::path> collect_json_files(const fs::path& dir) {
+    std::vector<fs::path> files;
+    if (!fs::is_directory(dir)) {
+        std::cerr << "[WARN] Pas un dossier, ignoré : " << dir << "\n";
+        return files;
+    }
+    for (auto& entry : fs::recursive_directory_iterator(dir)) {
+        if (entry.is_regular_file() && entry.path().extension() == ".json")
+            files.push_back(entry.path());
+    }
+    return files;
+}
+
+void export_to_csv(std::unordered_map<InstanceKey, std::optional<InstanceTradeOff>, HashInstanceKey>& trades)
+{
+    std::ofstream oFile("export_tradeoff.csv", std::ios_base::out | std::ios_base::trunc);
+    oFile << "Site;" << "SiteCr;" << "Tracks;" << "Rames;" << "Jobs;" << "ID;" << "NbBranch;" << "TimeRatio;\n";
+    for(const auto& [ins, trade] : trades)
+    {
+        oFile << ins.site << ";" << ins.site_cr << ";" << ins.tracks << ";" << ins.rames << ";" << ins.jobs << ";" << ins.id << ";";
+        if(trade)
+        {
+            oFile << trade->nbBranchBest << ";" << trade->timeRatio << ";\n";
+        }
+        else
+        {
+            oFile << -1 << ";" << -1 << ";\n";
+        }
+    }
+    oFile.close();
+}
+
+
+// ─────────────────────────────────────────────────────────────
+//  Main
+// ─────────────────────────────────────────────────────────────
+
+int main(int argc, char* argv[]) {
+    if (argc < 2) {
+        std::cerr << "Usage: " << argv[0] << " <dossier1> [dossier2 ...]\n";
+        return 1;
+    }
+
+    std::vector<fs::path> dirs;
+    for (int i = 1; i < argc; ++i) dirs.emplace_back(argv[i]);
+
+    // -- Collecte des fichiers
+    unsigned int all_files = 0;
+    std::unordered_map<InstanceKey, std::optional<InstanceTradeOff>, HashInstanceKey> trades;
+    std::unordered_map<InstanceKey, std::pair<unsigned int, double>, HashInstanceKey> reference;
+    for (auto& d : dirs) {
+        auto files = collect_json_files(d);
+        all_files+= files.size();
+        try {
+            int ok = 0, err = 0;
+
+            for (auto& path : files) {
+                try {
+                    process_file(path, reference, trades);
+                    ++ok;
+                    if (ok % 100 == 0)
+                        std::cout << "  [" << ok << "/" << files.size() << "]\n";
+                } catch (const std::exception& e) {
+                    std::cerr << "[ERR] " << path.filename() << " : " << e.what() << "\n";
+                    ++err;
+                }
+            }
+
+            std::cout << "[DONE] " << ok << " ok, " << err << " erreur(s).\n";
+
+        } catch (const std::exception& e) {
+            std::cerr << "[ERR] " << e.what() << "\n";
+            return 1;
+        }
+    }
+
+    if (all_files == 0) {
+        std::cerr << "[INFO] Aucun fichier .json trouvé.\n";
+        return 0;
+    }
+    std::cout << "[INFO] " << all_files << " fichier(s) trouvé(s).\n";
+
+    std::cout << "[INFO] " << "Exporting to CSV tradeoffs" << "\n";
+
+    export_to_csv(trades);
+
+    return 0;
+}