diff --git a/notebooks/dataset_exploration.ipynb b/notebooks/dataset_exploration.ipynb new file mode 100644 index 000000000..fd0fcd67e --- /dev/null +++ b/notebooks/dataset_exploration.ipynb @@ -0,0 +1,377 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e6422765", + "metadata": {}, + "source": [ + "# LIBERO Dataset Exploration\n", + "\n", + "This notebook explores the LIBERO spatial dataset structure stored in HDF5 format. Each file contains multiple robot manipulation demonstrations.\n", + "\n", + "## Dataset Components\n", + "\n", + "- **Actions**: 7D normalized vectors `[Δx, Δy, Δz, Δroll, Δpitch, Δyaw, gripper]` (range [-1, 1])\n", + "- **Observations**: \n", + " - RGB images: `agentview_rgb`, `eye_in_hand_rgb` (128×128×3)\n", + " - End-effector: `ee_pos` (3D), `ee_ori` (3D Euler), `ee_states` (6D concatenated)\n", + " - Robot: `joint_states` (7D), `gripper_states` (2D)\n", + "- **Episode Info**: `dones` (termination flags), `rewards` (sparse: 0/1)\n", + "- **States**: `robot_states` (9D: gripper + ee_pos + quaternion), `states` (92D environment state)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d2963cb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "One demo data contains the following keys: ['actions', 'dones', 'obs', 'rewards', 'robot_states', 'states']\n" + ] + } + ], + "source": [ + "import h5py\n", + "from pathlib import Path\n", + "\n", + "# LIBERO dataset exploration\n", + "# The dataset is stored in HDF5 format\n", + "# Each HDF5 file contains multiple demonstrations of a specific task\n", + "\n", + "# Change this if you want a different suite / task\n", + "dataset_root = Path(\"../libero/datasets/libero_spatial\")\n", + "hdf5_file = dataset_root / \"pick_up_the_black_bowl_next_to_the_cookie_box_and_place_it_on_the_plate_demo.hdf5\"\n", + "\n", + "# Open the HDF5 file and extract demonstration data\n", + "with h5py.File(hdf5_file, \"r\") as f:\n", + " data = f[\"data\"]\n", + " demos = list(data.keys()) # Get list of all demonstration IDs in the file\n", + "\n", + " # Select a specific demonstration (using index 2)\n", + " demo0 = data[demos[6]]\n", + " \n", + " # Print available data keys to understand the structure\n", + " print(\"One demo data contains the following keys:\", list(demo0.keys()))\n", + " \n", + " # Extract all data from the demonstration into a dictionary\n", + " # [:] reads the actual data from the HDF5 dataset (not just a reference)\n", + " demo0_data = {\n", + " 'actions': demo0[\"actions\"][:], # Robot actions: [num_timesteps, action_dim] - typically 7D (position, orientation, gripper)\n", + " 'dones': demo0[\"dones\"][:], # Episode termination flags: [num_timesteps] - True when task is complete\n", + " 'rewards': demo0[\"rewards\"][:], # Reward signal: [num_timesteps] - sparse rewards for task completion\n", + " 'obs': {key: demo0[\"obs\"][key][:] for key in demo0[\"obs\"].keys()}, # Observations: dict of various sensor readings\n", + " 'robot_states': demo0[\"robot_states\"][:], # Full robot state: [num_timesteps, state_dim]\n", + " 'states': demo0[\"states\"][:] # Environment state: [num_timesteps, state_dim]\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a9d2e2b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Libero represents actions using an end-effector cartesian representation.\n", + "The seven dimentsion represent the position and oritnetation of the end-effector:\n", + "Δx, Δy, Δz, Δroll, Δpitch, Δyaw, gripper\n", + "Action shape: (141, 7)\n", + "Action store in a normalized way: -1.0 1.0\n" + ] + } + ], + "source": [ + "# Re-inspect the first action (useful for comparison or debugging)\n", + "print('Libero represents actions using an end-effector cartesian representation.')\n", + "print('The seven dimentsion represent the position and oritnetation of the end-effector:')\n", + "print('Δx, Δy, Δz, Δroll, Δpitch, Δyaw, gripper')\n", + "print('Action shape:', demo0_data['actions'].shape)\n", + "print('Action store in a normalized way:', demo0_data['actions'].min(), demo0_data['actions'].max())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f85994e3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the done array, a one indicates the end of the demonstration\n", + "The shape of the done array is: (141,)\n", + "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]\n" + ] + } + ], + "source": [ + "# Check the shape of the done flags\n", + "# Each timestep has a boolean indicating if the episode terminated\n", + "print('In the done array, a one indicates the end of the demonstration')\n", + "print('The shape of the done array is:', demo0_data['dones'].shape)\n", + "print(demo0_data['dones'])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f236ac5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LIBERO typically uses sparse rewards 0 most of the time, 1 when task completes\n", + "The shape of the reward signal is: (141,)\n", + "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]\n" + ] + } + ], + "source": [ + "# Check the shape of the reward signal\n", + "print('LIBERO typically uses sparse rewards 0 most of the time, 1 when task completes')\n", + "print('The shape of the reward signal is:', demo0_data['rewards'].shape)\n", + "# View all reward values\n", + "# In sparse reward settings, most values are 0, with 1 at task completion\n", + "print(demo0_data['rewards'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5bcf9831", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The vector robot_states contains the full robot configuration as computed by the physical engine: \n", + "-- 2 varibles for the left and right gripper joint states\n", + "-- 3 variables for the end-effector coordinates\n", + "-- 4 variables for the gripper base orientation as a quaternion\n", + "[ 0.0362455 -0.03621179 -0.21422191 -0.01943306 1.17593119 0.99883435\n", + " -0.03581595 -0.0300125 -0.01209963]\n" + ] + } + ], + "source": [ + "print('The vector robot_states contains the full robot configuration as computed by the physical engine: ')\n", + "print('-- 2 varibles for the left and right gripper joint states')\n", + "print('-- 3 variables for the end-effector coordinates')\n", + "print('-- 4 variables for the gripper base orientation as a quaternion')\n", + "print(demo0_data['robot_states'][0,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6e5e1b8a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0.01097094 -0.17940503 -0.06104203 -2.45571358 0.01567369 2.21628218\n", + " 0.79897862]\n" + ] + } + ], + "source": [ + "print(demo0_data['obs']['joint_states'][0,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6259340e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The first 2 elements are the left and right gripper joint states, compare with the first 2 of the robot_states\n", + "[ 0.0362455 -0.03621179]\n", + "THe next 3 are the end-effector position and orientation, compare with the values of ee_pos\n", + "[-0.21422191 -0.01943306 1.17593119]\n" + ] + } + ], + "source": [ + "print('The first 2 elements are the left and right gripper joint states, compare with the first 2 of the robot_states')\n", + "print(demo0_data['obs']['gripper_states'][0,:])\n", + "\n", + "print('THe next 3 are the end-effector position and orientation, compare with the values of ee_pos')\n", + "print(demo0_data['obs']['ee_pos'][0,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8b8f2fd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The variable states contains the full state of the environment, including the positions of the objects:\n", + "It is a vector with 92 entries: (141, 92)\n", + "-2.4574334135159837\n" + ] + } + ], + "source": [ + "print('The variable states contains the full state of the environment, including the positions of the objects:')\n", + "print('It is a vector with 92 entries:', demo0_data['states'].shape)\n", + "print(demo0_data['states'].min())" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "372a0d90", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The obs entrie contains the following keys:\n", + "-- agentview_rgb, eye_in_hand_rgb: the view of from 2 different cameras\n", + "-- ee_pos, ee_ori, ee_states: the end-effector position and orientation. In ee_states the position and orientation are concatenated\n", + "-- gripper_joint_states: the joint states of the gripper\n", + "-- joint_states: the joint states of the robot arm\n", + "dict_keys(['agentview_rgb', 'ee_ori', 'ee_pos', 'ee_states', 'eye_in_hand_rgb', 'gripper_states', 'joint_states'])\n" + ] + } + ], + "source": [ + "print('The obs entrie contains the following keys:')\n", + "print('-- agentview_rgb, eye_in_hand_rgb: the view of from 2 different cameras')\n", + "print('-- ee_pos, ee_ori, ee_states: the end-effector position and orientation. In ee_states the position and orientation are concatenated')\n", + "print('-- gripper_joint_states: the joint states of the gripper')\n", + "print('-- joint_states: the joint states of the robot arm')\n", + "print(demo0_data['obs'].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "19113ac6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The agent view and the eye in hand view are 3 channel images with a resolution of 128x128\n", + "(141, 128, 128, 3)\n", + "(141, 128, 128, 3)\n" + ] + } + ], + "source": [ + "print('The agent view and the eye in hand view are 3 channel images with a resolution of 128x128')\n", + "print(demo0_data['obs']['agentview_rgb'].shape)\n", + "print(demo0_data['obs']['eye_in_hand_rgb'].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "7f2c0717", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The ee_states contains the position and orientation of the end-effector. The ee_ori is a 3D vector, so it is probably a euler angle representation\n", + "(141, 6)\n", + "(141, 3)\n", + "(141, 3)\n", + "The ee_states concatenates the ee_pos and ee_ori\n", + "[-0.21422191 -0.01943306 1.17593119 3.16233381 -0.11339417 -0.09502031]\n", + "[-0.21422191 -0.01943306 1.17593119]\n", + "[ 3.16233381 -0.11339417 -0.09502031]\n" + ] + } + ], + "source": [ + "print('The ee_states contains the position and orientation of the end-effector. The ee_ori is a 3D vector, so it is probably a euler angle representation')\n", + "print(demo0_data['obs']['ee_states'].shape)\n", + "print(demo0_data['obs']['ee_pos'].shape)\n", + "print(demo0_data['obs']['ee_ori'].shape)\n", + "\n", + "print('The ee_states concatenates the ee_pos and ee_ori')\n", + "print(demo0_data['obs']['ee_states'][0, :])\n", + "print(demo0_data['obs']['ee_pos'][0, :])\n", + "print(demo0_data['obs']['ee_ori'][0, :])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5d345e46", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The gripper states contains the joint states of the gripper\n", + "(141, 2)\n", + "The joint states contains the joint states of the robot arm\n", + "(141, 7)\n" + ] + } + ], + "source": [ + "print('The gripper states contains the joint states of the gripper')\n", + "print(demo0_data['obs']['gripper_states'].shape)\n", + "\n", + "print('The joint states contains the joint states of the robot arm')\n", + "print(demo0_data['obs']['joint_states'].shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}