KhiopsML
diff --git a/‎doc/samples/samples_sklearn.rst‎
Lines changed: 18 additions & 38 deletions b/‎doc/samples/samples_sklearn.rst‎
Lines changed: 18 additions & 38 deletions
diff --git a/‎khiops/core/helpers.py‎
Lines changed: 14 additions & 2 deletions b/‎khiops/core/helpers.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎khiops/samples/samples_sklearn.ipynb‎
Lines changed: 18 additions & 38 deletions b/‎khiops/samples/samples_sklearn.ipynb‎
Lines changed: 18 additions & 38 deletions
diff --git a/‎khiops/samples/samples_sklearn.py‎
Lines changed: 18 additions & 38 deletions b/‎khiops/samples/samples_sklearn.py‎
Lines changed: 18 additions & 38 deletions
@@ -168,9 +168,8 @@ Samples
 
     # Create the dataset spec and the target
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
     }
@@ -224,18 +223,12 @@ Samples
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
-            "Users": (users_df, ["AccidentId", "VehicleId"]),
-            "Places": (places_df, "AccidentId"),
+            "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
+            "Places": (places_df, ["AccidentId"], True),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-            ("Vehicles", "Users"),
-            ("Accidents", "Places", True),
-        ],
     }
 
     # Load the target variable "Gravity"
@@ -411,16 +404,14 @@ Samples
     # Create the dataset multitable specification for the train/test split
     # We specify each table with a name and a tuple (dataframe, key_columns)
     X_train = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_train_main, "AccidentId"),
+        "main_table": (X_train_main, ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]),
         },
     }
     X_test = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_test_main, "AccidentId"),
+        "main_table": (X_test_main, ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]),
         },
     }
@@ -557,9 +548,8 @@ Samples
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
     }
@@ -596,18 +586,12 @@ Samples
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
-            "Users": (users_df, ["AccidentId", "VehicleId"]),
-            "Places": (places_df, "AccidentId"),
+            "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
+            "Places": (places_df, ["AccidentId"], True),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-            ("Vehicles", "Users"),
-            ("Accidents", "Places", True),
-        ],
     }
 
     # Load the target variable "Gravity"
@@ -701,14 +685,10 @@ Samples
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-        ],
     }
 
     # Load the target variable "Gravity"
 
@@ -23,7 +23,10 @@
 
 
 def _build_multi_table_dictionary_domain(
-    dictionary_domain, root_dictionary_name, secondary_table_variable_name
+    dictionary_domain,
+    root_dictionary_name,
+    secondary_table_variable_name,
+    update_secondary_table_name=False,
 ):
     """Builds a multi-table dictionary domain from a dictionary with a key
     Parameters
@@ -34,6 +37,9 @@ def _build_multi_table_dictionary_domain(
         Name for the new root dictionary
     secondary_table_variable_name : str
         Name, in the root dictionary, for the "table" variable of the secondary table.
+    update_secondary_table_name : bool, default `False`
+        If ``True``, then update the secondary table name according to the
+        secondary table variable name. If not set, keep original table name.
 
     Returns
     -------
@@ -103,11 +109,17 @@ def _build_multi_table_dictionary_domain(
     target_variable = Variable()
     target_variable.name = secondary_table_variable_name
     target_variable.type = "Table"
-    target_variable.object_type = root_source_dictionary.name
+    if update_secondary_table_name:
+        target_variable.object_type = secondary_table_variable_name
+    else:
+        target_variable.object_type = root_source_dictionary.name
     root_target_dictionary.add_variable(target_variable)
 
     # Build secondary target dictionary, by copying root source dictionary
     secondary_target_dictionary = root_source_dictionary.copy()
+    secondary_target_dictionary.root = False
+    if update_secondary_table_name:
+        secondary_target_dictionary.name = secondary_table_variable_name
 
     # Build target domain and add dictionaries to it
     target_domain = DictionaryDomain()
 
@@ -180,9 +180,8 @@
     "\n",
     "# Create the dataset spec and the target\n",
     "X = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
+    "    \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
     "    },\n",
     "}\n",
@@ -249,18 +248,12 @@
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
+    "    \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
-    "        \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
-    "        \"Places\": (places_df, \"AccidentId\"),\n",
+    "        \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
+    "        \"Places\": (places_df, [\"AccidentId\"], True),\n",
     "    },\n",
-    "    \"relations\": [\n",
-    "        (\"Accidents\", \"Vehicles\"),\n",
-    "        (\"Vehicles\", \"Users\"),\n",
-    "        (\"Accidents\", \"Places\", True),\n",
-    "    ],\n",
     "}\n",
     "\n",
     "# Load the target variable \"Gravity\"\n",
@@ -475,16 +468,14 @@
     "# Create the dataset multitable specification for the train/test split\n",
     "# We specify each table with a name and a tuple (dataframe, key_columns)\n",
     "X_train = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (X_train_main, \"AccidentId\"),\n",
+    "    \"main_table\": (X_train_main, [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (X_train_secondary, [\"AccidentId\", \"VehicleId\"]),\n",
     "    },\n",
     "}\n",
     "X_test = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (X_test_main, \"AccidentId\"),\n",
+    "    \"main_table\": (X_test_main, [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (X_test_secondary, [\"AccidentId\", \"VehicleId\"]),\n",
     "    },\n",
     "}\n",
@@ -660,9 +651,8 @@
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
+    "    \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
     "    },\n",
     "}\n",
@@ -712,18 +702,12 @@
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
+    "    \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
-    "        \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
-    "        \"Places\": (places_df, \"AccidentId\"),\n",
+    "        \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
+    "        \"Places\": (places_df, [\"AccidentId\"], True),\n",
     "    },\n",
-    "    \"relations\": [\n",
-    "        (\"Accidents\", \"Vehicles\"),\n",
-    "        (\"Vehicles\", \"Users\"),\n",
-    "        (\"Accidents\", \"Places\", True),\n",
-    "    ],\n",
     "}\n",
     "\n",
     "# Load the target variable \"Gravity\"\n",
@@ -843,14 +827,10 @@
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
+    "    \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
+    "    \"additional_data_tables\": {\n",
     "        \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
     "    },\n",
-    "    \"relations\": [\n",
-    "        (\"Accidents\", \"Vehicles\"),\n",
-    "    ],\n",
     "}\n",
     "\n",
     "# Load the target variable \"Gravity\"\n",
 
@@ -163,9 +163,8 @@ def khiops_classifier_multitable_star():
 
     # Create the dataset spec and the target
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
     }
@@ -224,18 +223,12 @@ def khiops_classifier_multitable_snowflake():
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
-            "Users": (users_df, ["AccidentId", "VehicleId"]),
-            "Places": (places_df, "AccidentId"),
+            "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
+            "Places": (places_df, ["AccidentId"], True),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-            ("Vehicles", "Users"),
-            ("Accidents", "Places", True),
-        ],
     }
 
     # Load the target variable "Gravity"
@@ -416,16 +409,14 @@ def khiops_classifier_with_hyperparameters():
     # Create the dataset multitable specification for the train/test split
     # We specify each table with a name and a tuple (dataframe, key_columns)
     X_train = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_train_main, "AccidentId"),
+        "main_table": (X_train_main, ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]),
         },
     }
     X_test = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_test_main, "AccidentId"),
+        "main_table": (X_test_main, ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]),
         },
     }
@@ -578,9 +569,8 @@ def khiops_encoder_multitable_star():
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
     }
@@ -622,18 +612,12 @@ def khiops_encoder_multitable_snowflake():
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
-            "Users": (users_df, ["AccidentId", "VehicleId"]),
-            "Places": (places_df, "AccidentId"),
+            "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
+            "Places": (places_df, ["AccidentId"], True),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-            ("Vehicles", "Users"),
-            ("Accidents", "Places", True),
-        ],
     }
 
     # Load the target variable "Gravity"
@@ -739,14 +723,10 @@ def khiops_encoder_with_hyperparameters():
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
+        "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
+        "additional_data_tables": {
             "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
         },
-        "relations": [
-            ("Accidents", "Vehicles"),
-        ],
     }
 
     # Load the target variable "Gravity"