|
180 | 180 | "\n", |
181 | 181 | "# Create the dataset spec and the target\n", |
182 | 182 | "X = {\n", |
183 | | - " \"main_table\": \"Accidents\",\n", |
184 | | - " \"tables\": {\n", |
185 | | - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", |
| 183 | + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", |
| 184 | + " \"additional_data_tables\": {\n", |
186 | 185 | " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", |
187 | 186 | " },\n", |
188 | 187 | "}\n", |
|
249 | 248 | "\n", |
250 | 249 | "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", |
251 | 250 | "X = {\n", |
252 | | - " \"main_table\": \"Accidents\",\n", |
253 | | - " \"tables\": {\n", |
254 | | - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", |
| 251 | + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", |
| 252 | + " \"additional_data_tables\": {\n", |
255 | 253 | " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", |
256 | | - " \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", |
257 | | - " \"Places\": (places_df, \"AccidentId\"),\n", |
| 254 | + " \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", |
| 255 | + " \"Places\": (places_df, [\"AccidentId\"], True),\n", |
258 | 256 | " },\n", |
259 | | - " \"relations\": [\n", |
260 | | - " (\"Accidents\", \"Vehicles\"),\n", |
261 | | - " (\"Vehicles\", \"Users\"),\n", |
262 | | - " (\"Accidents\", \"Places\", True),\n", |
263 | | - " ],\n", |
264 | 257 | "}\n", |
265 | 258 | "\n", |
266 | 259 | "# Load the target variable \"Gravity\"\n", |
|
475 | 468 | "# Create the dataset multitable specification for the train/test split\n", |
476 | 469 | "# We specify each table with a name and a tuple (dataframe, key_columns)\n", |
477 | 470 | "X_train = {\n", |
478 | | - " \"main_table\": \"Accidents\",\n", |
479 | | - " \"tables\": {\n", |
480 | | - " \"Accidents\": (X_train_main, \"AccidentId\"),\n", |
| 471 | + " \"main_table\": (X_train_main, [\"AccidentId\"]),\n", |
| 472 | + " \"additional_data_tables\": {\n", |
481 | 473 | " \"Vehicles\": (X_train_secondary, [\"AccidentId\", \"VehicleId\"]),\n", |
482 | 474 | " },\n", |
483 | 475 | "}\n", |
484 | 476 | "X_test = {\n", |
485 | | - " \"main_table\": \"Accidents\",\n", |
486 | | - " \"tables\": {\n", |
487 | | - " \"Accidents\": (X_test_main, \"AccidentId\"),\n", |
| 477 | + " \"main_table\": (X_test_main, [\"AccidentId\"]),\n", |
| 478 | + " \"additional_data_tables\": {\n", |
488 | 479 | " \"Vehicles\": (X_test_secondary, [\"AccidentId\", \"VehicleId\"]),\n", |
489 | 480 | " },\n", |
490 | 481 | "}\n", |
|
660 | 651 | "\n", |
661 | 652 | "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", |
662 | 653 | "X = {\n", |
663 | | - " \"main_table\": \"Accidents\",\n", |
664 | | - " \"tables\": {\n", |
665 | | - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", |
| 654 | + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", |
| 655 | + " \"additional_data_tables\": {\n", |
666 | 656 | " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", |
667 | 657 | " },\n", |
668 | 658 | "}\n", |
|
712 | 702 | "\n", |
713 | 703 | "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", |
714 | 704 | "X = {\n", |
715 | | - " \"main_table\": \"Accidents\",\n", |
716 | | - " \"tables\": {\n", |
717 | | - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", |
| 705 | + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", |
| 706 | + " \"additional_data_tables\": {\n", |
718 | 707 | " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", |
719 | | - " \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", |
720 | | - " \"Places\": (places_df, \"AccidentId\"),\n", |
| 708 | + " \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", |
| 709 | + " \"Places\": (places_df, [\"AccidentId\"], True),\n", |
721 | 710 | " },\n", |
722 | | - " \"relations\": [\n", |
723 | | - " (\"Accidents\", \"Vehicles\"),\n", |
724 | | - " (\"Vehicles\", \"Users\"),\n", |
725 | | - " (\"Accidents\", \"Places\", True),\n", |
726 | | - " ],\n", |
727 | 711 | "}\n", |
728 | 712 | "\n", |
729 | 713 | "# Load the target variable \"Gravity\"\n", |
|
843 | 827 | "\n", |
844 | 828 | "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", |
845 | 829 | "X = {\n", |
846 | | - " \"main_table\": \"Accidents\",\n", |
847 | | - " \"tables\": {\n", |
848 | | - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", |
| 830 | + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", |
| 831 | + " \"additional_data_tables\": {\n", |
849 | 832 | " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", |
850 | 833 | " },\n", |
851 | | - " \"relations\": [\n", |
852 | | - " (\"Accidents\", \"Vehicles\"),\n", |
853 | | - " ],\n", |
854 | 834 | "}\n", |
855 | 835 | "\n", |
856 | 836 | "# Load the target variable \"Gravity\"\n", |
|
0 commit comments