diff --git a/.gitattributes b/.gitattributes index c4ccd1f825..74858a9b91 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4,17 +4,13 @@ # Ensure Python files always use LF for line endings. *.py text eol=lf # Treat designated file types as binary and do not alter their contents or line endings. -*.png filter=lfs diff=lfs merge=lfs -text binary -*.jpg filter=lfs diff=lfs merge=lfs -text binary -*.jpeg filter=lfs diff=lfs merge=lfs -text binary +*.png binary +*.jpg binary +*.jpeg binary *.ico binary *.pdf binary -# Explicit LFS tracking for test files -/data/.lfs/*.tar.gz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text binary -*.mp4 filter=lfs diff=lfs merge=lfs -text binary -*.mov filter=lfs diff=lfs merge=lfs -text binary -*.gif filter=lfs diff=lfs merge=lfs -text binary -*.foxe filter=lfs diff=lfs merge=lfs -text binary -docs/capabilities/memory/assets/** filter=lfs diff=lfs merge=lfs -text -docs/capabilities/memory/assets/.gitattributes -filter -diff -merge text +*.onnx binary +*.mp4 binary +*.mov binary +*.gif binary +*.foxe binary diff --git a/.gitignore b/.gitignore index 1816510c08..dca3d2b345 100644 --- a/.gitignore +++ b/.gitignore @@ -30,9 +30,8 @@ __pycache__ .bash_history -# Ignore all test data directories but allow compressed files -/data/* -!/data/.lfs/ +# Ignore all test data (archives are stored in S3, not git) +/data/ # node env (used by devcontainers cli) node_modules @@ -43,9 +42,8 @@ package-lock.json dist/ build/ -# Ignore data directory but keep .lfs subdirectory -data/* -!data/.lfs/ +# Ignore data directory (archives are stored in S3, not git) +data/ FastSAM-x.pt yolo11n.pt diff --git a/bin/hooks/lfs_check b/bin/hooks/lfs_check index 3d493ec82d..922f8fcdbe 100755 --- a/bin/hooks/lfs_check +++ b/bin/hooks/lfs_check @@ -50,7 +50,7 @@ done if [ ${#new_data[@]} -gt 0 ]; then echo -e "${RED}✗${NC} New test data detected at /data:" echo -e " ${GREEN}${new_data[@]}${NC}" - echo -e "\nEither delete or run ${GREEN}./bin/lfs_push${NC}" - echo -e "(lfs_push will compress the files into /data/.lfs/, upload to LFS, and add them to your commit)" + echo -e "\nEither delete or run ${GREEN}./bin/s3_push${NC}" + echo -e "(s3_push will compress the files into /data/.lfs/ and upload to S3)" exit 1 fi diff --git a/bin/s3_push b/bin/s3_push new file mode 100755 index 0000000000..1898f2b40a --- /dev/null +++ b/bin/s3_push @@ -0,0 +1,76 @@ +!/bin/bash +# Compresses directories/files in data/* into data/.lfs/.tar.gz +# Uploads to S3 (replaces bin/lfs_push) + +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +S3_BUCKET="${DIMOS_DATA_S3_BUCKET:-dimos-github-lfs}" +S3_PREFIX="${DIMOS_DATA_S3_PREFIX:-.lfs/}" +AWS_PROFILE="${AWS_PROFILE:-dimensional-aws}" + +ROOT=$(git rev-parse --show-toplevel) +cd "$ROOT" + +if [ ! -d "data/" ]; then + echo -e "${YELLOW}No data directory found, skipping.${NC}" + exit 0 +fi + +mkdir -p data/.lfs + +compressed_entries=() + +for entry_path in data/*; do + [ ! "$entry_path" ] && continue + + entry_name=$(basename "$entry_path") + + [ "$entry_name" = ".lfs" ] && continue + + # Skip SQLite ephemeral sidecar files + case "$entry_name" in + *-wal|*-shm|*-journal) continue ;; + esac + + compressed_file="data/.lfs/${entry_name}.tar.gz" + + if [ -f "$compressed_file" ]; then + continue + fi + + echo -e " ${YELLOW}Compressing${NC} $entry_path -> $compressed_file" + + entry_size=$(du -sh "$entry_path" | cut -f1) + echo -e " Data size: ${YELLOW}$entry_size${NC}" + + tar -czf "$compressed_file" \ + --exclude='*.tmp' \ + --exclude='*.temp' \ + --exclude='.DS_Store' \ + --exclude='Thumbs.db' \ + --checkpoint=1000 \ + --checkpoint-action=dot \ + -C "data/" \ + "$entry_name" + + if [ $? -eq 0 ]; then + compressed_size=$(du -sh "$compressed_file" | cut -f1) + echo -e " ${GREEN}✓${NC} Compressed $entry_name (${GREEN}$entry_size${NC} → ${GREEN}$compressed_size${NC})" + compressed_entries+=("$entry_name") + else + echo -e " ${RED}✗${NC} Failed to compress $entry_name" + exit 1 + fi +done + +echo -e "\n${GREEN}Syncing data/.lfs/ to s3://${S3_BUCKET}/${S3_PREFIX}${NC}" +aws s3 sync data/.lfs/ "s3://${S3_BUCKET}/${S3_PREFIX}" \ + --profile "$AWS_PROFILE" \ + --exclude ".gitkeep" + +echo -e "${GREEN}✓${NC} Upload complete" diff --git a/data/.lfs/ab_lidar_frames.tar.gz b/data/.lfs/ab_lidar_frames.tar.gz deleted file mode 100644 index 38c61cd506..0000000000 --- a/data/.lfs/ab_lidar_frames.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab4efaf5d7d4303424868fecaf10083378007adf20244fd17ed934e37f2996da -size 116271 diff --git a/data/.lfs/apartment.tar.gz b/data/.lfs/apartment.tar.gz deleted file mode 100644 index c8e6cf0331..0000000000 --- a/data/.lfs/apartment.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d2c44f39573a80a65aeb6ccd3fcb1c8cb0741dbc7286132856409e88e150e77 -size 18141029 diff --git a/data/.lfs/assets.tar.gz b/data/.lfs/assets.tar.gz deleted file mode 100644 index b7a2fcbd1c..0000000000 --- a/data/.lfs/assets.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b14b01f5c907f117331213abfce9ef5d0c41d0524e14327b5cc706520fb2035 -size 2306191 diff --git a/data/.lfs/astar_corner_min_cost.png.tar.gz b/data/.lfs/astar_corner_min_cost.png.tar.gz deleted file mode 100644 index 35f3ffe0b6..0000000000 --- a/data/.lfs/astar_corner_min_cost.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42517c5f67a9f06949cb2015a345f9d6b43d22cafd50e1fefb9b5d24d8b72509 -size 5671 diff --git a/data/.lfs/astar_min_cost.png.tar.gz b/data/.lfs/astar_min_cost.png.tar.gz deleted file mode 100644 index 752a778295..0000000000 --- a/data/.lfs/astar_min_cost.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06b67aa0d18c291c3525e67ca3a2a9ab2530f6fe782a850872ba4c343353a20a -size 12018 diff --git a/data/.lfs/big_office.ply.tar.gz b/data/.lfs/big_office.ply.tar.gz deleted file mode 100644 index c8524a1862..0000000000 --- a/data/.lfs/big_office.ply.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7eabc682f75e1725a07df51bb009d3950190318d119d54d0ad8c6b7104f175e3 -size 2355227 diff --git a/data/.lfs/big_office_height_cost_occupancy.png.tar.gz b/data/.lfs/big_office_height_cost_occupancy.png.tar.gz deleted file mode 100644 index 75addaf103..0000000000 --- a/data/.lfs/big_office_height_cost_occupancy.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6d8e7d096f1108d45ebdad760c4655de1e1d50105ca59c5188e79cb1a7c0d4a9 -size 133051 diff --git a/data/.lfs/big_office_simple_occupancy.png.tar.gz b/data/.lfs/big_office_simple_occupancy.png.tar.gz deleted file mode 100644 index dd667640be..0000000000 --- a/data/.lfs/big_office_simple_occupancy.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dded2e28694de9ec84a91a686b27654b83c604f44f4d3e336d5cd481e88d3249 -size 28146 diff --git a/data/.lfs/cafe-smol.jpg.tar.gz b/data/.lfs/cafe-smol.jpg.tar.gz deleted file mode 100644 index a05beb4900..0000000000 --- a/data/.lfs/cafe-smol.jpg.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd0c1e5aa5e8ec856cb471c5ed256c2d3a5633ed9a1e052291680eb86bf89a5e -size 8298 diff --git a/data/.lfs/cafe.jpg.tar.gz b/data/.lfs/cafe.jpg.tar.gz deleted file mode 100644 index dbb2d970a1..0000000000 --- a/data/.lfs/cafe.jpg.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8cf30439b41033ccb04b09b9fc8388d18fb544d55b85c155dbf85700b9e7603 -size 136165 diff --git a/data/.lfs/chair-image.png.tar.gz b/data/.lfs/chair-image.png.tar.gz deleted file mode 100644 index 1a2aab4cf5..0000000000 --- a/data/.lfs/chair-image.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1f3478f472b5750f118cf7225c2028beeaae41f1b4b726c697ac8c9b004eccbf -size 48504 diff --git a/data/.lfs/command_center.html.tar.gz b/data/.lfs/command_center.html.tar.gz deleted file mode 100644 index 9f7bfe1979..0000000000 --- a/data/.lfs/command_center.html.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7663ac06572e3b9490859b400e9ddbf45ac3ef52a58fcdb8c2c41936dc9d43b5 -size 137675 diff --git a/data/.lfs/drone.tar.gz b/data/.lfs/drone.tar.gz deleted file mode 100644 index 2973c649cd..0000000000 --- a/data/.lfs/drone.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd73f988eee8fd7b99d6c0bf6a905c2f43a6145a4ef33e9eef64bee5f53e04dd -size 709946060 diff --git a/data/.lfs/expected_occupancy_scene.xml.tar.gz b/data/.lfs/expected_occupancy_scene.xml.tar.gz deleted file mode 100644 index efbe7ce49d..0000000000 --- a/data/.lfs/expected_occupancy_scene.xml.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3eb91f3c7787882bf26a69df21bb1933d2f6cd71132ca5f0521e2808269bfa2 -size 6777 diff --git a/data/.lfs/g1_wholebody_replay.json.tar.gz b/data/.lfs/g1_wholebody_replay.json.tar.gz deleted file mode 100644 index 1ad3ea8da3..0000000000 --- a/data/.lfs/g1_wholebody_replay.json.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8bffca753401f78587ba743d19fc3493aac7ffdb9a42c3863299082e62da2788 -size 1147342 diff --git a/data/.lfs/g1_zed.tar.gz b/data/.lfs/g1_zed.tar.gz deleted file mode 100644 index 4029f48204..0000000000 --- a/data/.lfs/g1_zed.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:955094035b3ac1edbc257ca1d24fa131f79ac6f502c8b35cc50329025c421dbe -size 1029559759 diff --git a/data/.lfs/go2_bigoffice.db.tar.gz b/data/.lfs/go2_bigoffice.db.tar.gz deleted file mode 100644 index 540d7009ba..0000000000 --- a/data/.lfs/go2_bigoffice.db.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e66f5472e72f370446d8dcd802f70f3c3c07e4e083c5d6a394873877dec4c88d -size 196309743 diff --git a/data/.lfs/go2_china_office.db.tar.gz b/data/.lfs/go2_china_office.db.tar.gz deleted file mode 100644 index 772ef3627e..0000000000 --- a/data/.lfs/go2_china_office.db.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:834539871fd325b15f3079a3490b278c54e78d0d40bfa1342dbdc983f6a3ee02 -size 136080653 diff --git a/data/.lfs/go2_hongkong_office.db.tar.gz b/data/.lfs/go2_hongkong_office.db.tar.gz deleted file mode 100644 index 38696c1484..0000000000 --- a/data/.lfs/go2_hongkong_office.db.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9f472bc9b0b8ce86c6253013800cca86e149b0ab5868c79e5072c7e07f2e3a5 -size 770750369 diff --git a/data/.lfs/go2_sf_office.tar.gz b/data/.lfs/go2_sf_office.tar.gz deleted file mode 100644 index be294d49fa..0000000000 --- a/data/.lfs/go2_sf_office.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dab40e2310e8397bc20046dfd23475c805c973eaa94043eb87e23ba43e2774fb -size 25533811 diff --git a/data/.lfs/go2_short.db.tar.gz b/data/.lfs/go2_short.db.tar.gz deleted file mode 100644 index 092c1de2a3..0000000000 --- a/data/.lfs/go2_short.db.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c26ca64b9f63d98070a12b364ecea78a44f2040d82020cdd551c81760c8b4d2 -size 77023899 diff --git a/data/.lfs/gradient_simple.png.tar.gz b/data/.lfs/gradient_simple.png.tar.gz deleted file mode 100644 index 7232282ce4..0000000000 --- a/data/.lfs/gradient_simple.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e418f2a6858c757cb72bd25772749a1664c97a407682d88ad2b51c4bbdcb8006 -size 11568 diff --git a/data/.lfs/gradient_voronoi.png.tar.gz b/data/.lfs/gradient_voronoi.png.tar.gz deleted file mode 100644 index 28e7f263c4..0000000000 --- a/data/.lfs/gradient_voronoi.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3867c0fb5b00f8cb5e0876e5120a70d61f7da121c0a3400010743cc858ee2d54 -size 20680 diff --git a/data/.lfs/inflation_simple.png.tar.gz b/data/.lfs/inflation_simple.png.tar.gz deleted file mode 100644 index ca6586800c..0000000000 --- a/data/.lfs/inflation_simple.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:658ed8cafc24ac7dc610b7e5ae484f23e1963872ffc2add0632ee61a7c20492d -size 3412 diff --git a/data/.lfs/lcm_msgs.tar.gz b/data/.lfs/lcm_msgs.tar.gz deleted file mode 100644 index 2b2f28c252..0000000000 --- a/data/.lfs/lcm_msgs.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:245395d0c3e200fcfcea8de5de217f645362b145b200c81abc3862e0afc1aa7e -size 327201 diff --git a/data/.lfs/make_navigation_map_mixed.png.tar.gz b/data/.lfs/make_navigation_map_mixed.png.tar.gz deleted file mode 100644 index 4fcaa8134a..0000000000 --- a/data/.lfs/make_navigation_map_mixed.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:36ea27a2434836eb309728f35033674736552daeb82f6e41fb7e3eb175d950da -size 13084 diff --git a/data/.lfs/make_navigation_map_simple.png.tar.gz b/data/.lfs/make_navigation_map_simple.png.tar.gz deleted file mode 100644 index f966b459e2..0000000000 --- a/data/.lfs/make_navigation_map_simple.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0d211fa1bc517ef78e8dc548ebff09f58ad34c86d28eb3bd48a09a577ee5d1e -size 11767 diff --git a/data/.lfs/make_path_mask_full.png.tar.gz b/data/.lfs/make_path_mask_full.png.tar.gz deleted file mode 100644 index 0e9336aaea..0000000000 --- a/data/.lfs/make_path_mask_full.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b772d266dffa82ccf14f13c7d8cc2443210202836883c80f016a56d4cfe2b52a -size 11213 diff --git a/data/.lfs/make_path_mask_two_meters.png.tar.gz b/data/.lfs/make_path_mask_two_meters.png.tar.gz deleted file mode 100644 index 7fa9e767b8..0000000000 --- a/data/.lfs/make_path_mask_two_meters.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da608d410f4a1afee0965abfac814bc05267bdde31b0d3a9622c39515ee4f813 -size 11395 diff --git a/data/.lfs/models_clip.tar.gz b/data/.lfs/models_clip.tar.gz deleted file mode 100644 index a4ab2b5f88..0000000000 --- a/data/.lfs/models_clip.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:102f11bb0aa952b3cebc4491c5ed3f2122e8c38c76002e22400da4f1e5ca90c5 -size 392327708 diff --git a/data/.lfs/models_contact_graspnet.tar.gz b/data/.lfs/models_contact_graspnet.tar.gz deleted file mode 100644 index 73dd44d033..0000000000 --- a/data/.lfs/models_contact_graspnet.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:431c4611a9e096fd8b0a83fecda39c5a575e72fa933f7bd29ff8cfad5bbb5f9d -size 52149165 diff --git a/data/.lfs/models_edgetam.tar.gz b/data/.lfs/models_edgetam.tar.gz deleted file mode 100644 index 64baa5d139..0000000000 --- a/data/.lfs/models_edgetam.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd452096f91415ce7ca90548a06a87354ccdb19a66925c0242413c80b08f5c57 -size 51988780 diff --git a/data/.lfs/models_fastsam.tar.gz b/data/.lfs/models_fastsam.tar.gz deleted file mode 100644 index 77278f4323..0000000000 --- a/data/.lfs/models_fastsam.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:682cb3816451bd73722cc430fdfce15bbe72a07e50ef2ea81ddaed61d1f22a25 -size 39971209 diff --git a/data/.lfs/models_graspgen.tar.gz b/data/.lfs/models_graspgen.tar.gz deleted file mode 100644 index 8321530922..0000000000 --- a/data/.lfs/models_graspgen.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:058ff764c043dccc516c1519a1e23207500c20a10c432c15eb5e30104477c0a4 -size 2117602984 diff --git a/data/.lfs/models_mobileclip.tar.gz b/data/.lfs/models_mobileclip.tar.gz deleted file mode 100644 index afe82c96e9..0000000000 --- a/data/.lfs/models_mobileclip.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:143747a320e959d9ee9fd239535d0451c378b1a2e165a242e981c4a3e4defb73 -size 1654541503 diff --git a/data/.lfs/models_torchreid.tar.gz b/data/.lfs/models_torchreid.tar.gz deleted file mode 100644 index 6446a049fb..0000000000 --- a/data/.lfs/models_torchreid.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2215070bd8e814ac9867410e3e6c49700f6c3ef7caf29b42d7832be090003743 -size 23873718 diff --git a/data/.lfs/models_yolo.tar.gz b/data/.lfs/models_yolo.tar.gz deleted file mode 100644 index 650d4617ca..0000000000 --- a/data/.lfs/models_yolo.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01796d5884cf29258820cf0e617bf834e9ffb63d8a4c7a54eea802e96fe6a818 -size 72476992 diff --git a/data/.lfs/models_yoloe.tar.gz b/data/.lfs/models_yoloe.tar.gz deleted file mode 100644 index a0870d71d2..0000000000 --- a/data/.lfs/models_yoloe.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a78e39477667b25c9454f846cd66dc044dd05981b2f7ebb0d331ef3626de9bc -size 184892540 diff --git a/data/.lfs/mujoco_sim.tar.gz b/data/.lfs/mujoco_sim.tar.gz deleted file mode 100644 index 57833fbbc6..0000000000 --- a/data/.lfs/mujoco_sim.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d178439569ed81dfad05455419dc51da2c52021313b6d7b9259d9e30946db7c6 -size 60186340 diff --git a/data/.lfs/occupancy_general.png.tar.gz b/data/.lfs/occupancy_general.png.tar.gz deleted file mode 100644 index b509151e5a..0000000000 --- a/data/.lfs/occupancy_general.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b770d950cf7206a67ccdfd8660ee0ab818228faa9ebbf1a37cbf6ee9d1ac7539 -size 2970 diff --git a/data/.lfs/occupancy_simple.npy.tar.gz b/data/.lfs/occupancy_simple.npy.tar.gz deleted file mode 100644 index cf42cf3667..0000000000 --- a/data/.lfs/occupancy_simple.npy.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e1cf83464442fb284b6f7ba2752546fc4571a73f3490c24a58fb45987555a66c -size 1954 diff --git a/data/.lfs/occupancy_simple.png.tar.gz b/data/.lfs/occupancy_simple.png.tar.gz deleted file mode 100644 index 4962f13db1..0000000000 --- a/data/.lfs/occupancy_simple.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c9dac221a594c87d0baa60b8c678c63a0c215325080b34ee60df5cc1e1c331d -size 3311 diff --git a/data/.lfs/office_building_1.tar.gz b/data/.lfs/office_building_1.tar.gz deleted file mode 100644 index 0dc013bd94..0000000000 --- a/data/.lfs/office_building_1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70aac31ca76597b3eee1ddfcbe2ba71d432fd427176f66d8281d75da76641f49 -size 1061581652 diff --git a/data/.lfs/office_lidar.tar.gz b/data/.lfs/office_lidar.tar.gz deleted file mode 100644 index 849e9e3d49..0000000000 --- a/data/.lfs/office_lidar.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4958965334660c4765553afa38081f00a769c8adf81e599e63fabc866c490fd -size 28576272 diff --git a/data/.lfs/openarm_description.tar.gz b/data/.lfs/openarm_description.tar.gz deleted file mode 100644 index 54aa76da41..0000000000 --- a/data/.lfs/openarm_description.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4da176b6c210b9796bb2ee1a29c15ee9a67578b9ae906eb89a6ec8a44b7f303a -size 70064687 diff --git a/data/.lfs/osm_map_test.tar.gz b/data/.lfs/osm_map_test.tar.gz deleted file mode 100644 index b29104ea17..0000000000 --- a/data/.lfs/osm_map_test.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25097f1bffebd2651f1f4ba93cb749998a064adfdc0cb004981b2317f649c990 -size 1062262 diff --git a/data/.lfs/overlay_occupied.png.tar.gz b/data/.lfs/overlay_occupied.png.tar.gz deleted file mode 100644 index 158a52c6bd..0000000000 --- a/data/.lfs/overlay_occupied.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b55bcf7a2a7a5cbdfdfe8c6a75c53ffe5707197d991d1e39e9aa9dc22503397 -size 3657 diff --git a/data/.lfs/person.tar.gz b/data/.lfs/person.tar.gz deleted file mode 100644 index 1f32d0db58..0000000000 --- a/data/.lfs/person.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:332c3196c6436e7d4c2b7e3314b4a4055865ef358b2e9cf3c8ddd7e173f39b93 -size 2535758 diff --git a/data/.lfs/piper.tar.gz b/data/.lfs/piper.tar.gz deleted file mode 100644 index ac5d1ab468..0000000000 --- a/data/.lfs/piper.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2de07119ba33de9b5c1a2fdc02fee8cabc7775829d5064eff00ea65b949ff5f -size 7475509 diff --git a/data/.lfs/piper_description.tar.gz b/data/.lfs/piper_description.tar.gz deleted file mode 100644 index 3ab8ab227b..0000000000 --- a/data/.lfs/piper_description.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4ce51d4ea15f29d80e69b0fff4a4d667f086e010329bb5c66980a881f1ee539 -size 3091511 diff --git a/data/.lfs/raw_odometry_rotate_walk.tar.gz b/data/.lfs/raw_odometry_rotate_walk.tar.gz deleted file mode 100644 index ce8bb1d2b0..0000000000 --- a/data/.lfs/raw_odometry_rotate_walk.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:396345f0cd7a94bb9d85540d4bbce01b027618972f83e713e4550abf1d6ec445 -size 15685 diff --git a/data/.lfs/replay_g1.tar.gz b/data/.lfs/replay_g1.tar.gz deleted file mode 100644 index 67750bd0cf..0000000000 --- a/data/.lfs/replay_g1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19ad1c53c4f8f9414c0921b94cd4c87e81bf0ad676881339f15ae2d8a8619311 -size 557410250 diff --git a/data/.lfs/replay_g1_run.tar.gz b/data/.lfs/replay_g1_run.tar.gz deleted file mode 100644 index 86368ec788..0000000000 --- a/data/.lfs/replay_g1_run.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00cf21f65a15994895150f74044f5d00d7aa873d24f071d249ecbd09cb8f2b26 -size 559554274 diff --git a/data/.lfs/resample_path_simple.png.tar.gz b/data/.lfs/resample_path_simple.png.tar.gz deleted file mode 100644 index 1a8c1118d6..0000000000 --- a/data/.lfs/resample_path_simple.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b5c454ed6cc66cf4446ce4a246464aec27368da4902651b4ad9ed29b3ba56ec -size 118319 diff --git a/data/.lfs/resample_path_smooth.png.tar.gz b/data/.lfs/resample_path_smooth.png.tar.gz deleted file mode 100644 index 80af3d3805..0000000000 --- a/data/.lfs/resample_path_smooth.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6cc0dfd80bada94f2ab1bb577e2ec1734dad6894113f2fe77964bd80d886c3d3 -size 109699 diff --git a/data/.lfs/rgbd_frames.tar.gz b/data/.lfs/rgbd_frames.tar.gz deleted file mode 100644 index 8081c76961..0000000000 --- a/data/.lfs/rgbd_frames.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:381b9fd296a885f5211a668df16c68581d2aee458c8734c3256a7461f0decccd -size 948391033 diff --git a/data/.lfs/security_detection.png.tar.gz b/data/.lfs/security_detection.png.tar.gz deleted file mode 100644 index 30637471ff..0000000000 --- a/data/.lfs/security_detection.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7952034063d4216cb03b870ba8f20f51b59883767ee198880d58a5859151775c -size 42747 diff --git a/data/.lfs/security_no_detection.png.tar.gz b/data/.lfs/security_no_detection.png.tar.gz deleted file mode 100644 index 22acd21a2e..0000000000 --- a/data/.lfs/security_no_detection.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:680467d4219daf29f9211930221b533193591b5d2ca15ff4dbd79cd78203350e -size 14903 diff --git a/data/.lfs/smooth_occupied.png.tar.gz b/data/.lfs/smooth_occupied.png.tar.gz deleted file mode 100644 index 0e09e7d15a..0000000000 --- a/data/.lfs/smooth_occupied.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44c8988b8a7d954ee26a0a5f195b961c62bbdb251b540df6b4d67cd85a72e5ac -size 3511 diff --git a/data/.lfs/three_paths.npy.tar.gz b/data/.lfs/three_paths.npy.tar.gz deleted file mode 100644 index 744eb06305..0000000000 --- a/data/.lfs/three_paths.npy.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba849a6b648ccc9ed4987bbe985ee164dd9ad0324895076baa9f86196b2a0d5f -size 5180 diff --git a/data/.lfs/three_paths.ply.tar.gz b/data/.lfs/three_paths.ply.tar.gz deleted file mode 100644 index a5bfc6bac4..0000000000 --- a/data/.lfs/three_paths.ply.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:639093004355c1ba796c668cd43476dfcabff137ca0bb430ace07730cc512f0e -size 307187 diff --git a/data/.lfs/three_paths.png.tar.gz b/data/.lfs/three_paths.png.tar.gz deleted file mode 100644 index ade2bd3eb7..0000000000 --- a/data/.lfs/three_paths.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2265ddd76bfb70e7ac44f2158dc0d16e0df264095b0f45a77f95eb85c529d935 -size 2559 diff --git a/data/.lfs/unitree_go2_bigoffice.tar.gz b/data/.lfs/unitree_go2_bigoffice.tar.gz deleted file mode 100644 index 6582702479..0000000000 --- a/data/.lfs/unitree_go2_bigoffice.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a009674153f7ee1f98219af69dc7a92d063f2581bfd9b0aa019762c9235895c -size 2312982327 diff --git a/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz b/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz deleted file mode 100644 index 89ecb54e87..0000000000 --- a/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68adb344ae040c3f94d61dd058beb39cc2811c4ae8328f678bc2ba761c504eb5 -size 2331189 diff --git a/data/.lfs/unitree_go2_lidar_corrected.tar.gz b/data/.lfs/unitree_go2_lidar_corrected.tar.gz deleted file mode 100644 index 013f6b3fe1..0000000000 --- a/data/.lfs/unitree_go2_lidar_corrected.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51a817f2b5664c9e2f2856293db242e030f0edce276e21da0edc2821d947aad2 -size 1212727745 diff --git a/data/.lfs/unitree_go2_office_walk2.tar.gz b/data/.lfs/unitree_go2_office_walk2.tar.gz deleted file mode 100644 index ea392c4b4c..0000000000 --- a/data/.lfs/unitree_go2_office_walk2.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d208cdf537ad01eed2068a4665e454ed30b30894bd9b35c14b4056712faeef5d -size 1693876005 diff --git a/data/.lfs/unitree_office_walk.tar.gz b/data/.lfs/unitree_office_walk.tar.gz deleted file mode 100644 index 419489dbb1..0000000000 --- a/data/.lfs/unitree_office_walk.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bee487130eb662bca73c7d84f14eaea091bd6d7c3f1bfd5173babf660947bdec -size 553620791 diff --git a/data/.lfs/unitree_raw_webrtc_replay.tar.gz b/data/.lfs/unitree_raw_webrtc_replay.tar.gz deleted file mode 100644 index d41ff5c48f..0000000000 --- a/data/.lfs/unitree_raw_webrtc_replay.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a02c622cfee712002afc097825ab5e963071471c3445a20a004ef3532cf59888 -size 756280504 diff --git a/data/.lfs/unity_sim_x86.tar.gz b/data/.lfs/unity_sim_x86.tar.gz deleted file mode 100644 index 15c06301fc..0000000000 --- a/data/.lfs/unity_sim_x86.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4ce5b93751657cc991c4242c227627ec3bbc0263085312e602eae264652d3ac -size 581676645 diff --git a/data/.lfs/video.tar.gz b/data/.lfs/video.tar.gz deleted file mode 100644 index 6c0e01a0bb..0000000000 --- a/data/.lfs/video.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:530d2132ef84df228af776bd2a2ef387a31858c63ea21c94fb49c7e579b366c0 -size 4322822 diff --git a/data/.lfs/visualize_occupancy_rainbow.png.tar.gz b/data/.lfs/visualize_occupancy_rainbow.png.tar.gz deleted file mode 100644 index 9bbd2e6ea1..0000000000 --- a/data/.lfs/visualize_occupancy_rainbow.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3dc1e3b6519f7d7ff25b16c3124ee447f02857eeb3eb20930cdab95464b1f0a3 -size 11582 diff --git a/data/.lfs/visualize_occupancy_turbo.png.tar.gz b/data/.lfs/visualize_occupancy_turbo.png.tar.gz deleted file mode 100644 index e2863cdae6..0000000000 --- a/data/.lfs/visualize_occupancy_turbo.png.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c21874bab6ec7cd9692d2b1e67498ddfff3c832ec992e9552fee17093759b270 -size 18593 diff --git a/data/.lfs/xarm6.tar.gz b/data/.lfs/xarm6.tar.gz deleted file mode 100644 index 16771358b8..0000000000 --- a/data/.lfs/xarm6.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71c9990ab779d20b878ec4c6b0ee21b29bbb7963b59375acf9dd635b7241009b -size 1863215 diff --git a/data/.lfs/xarm7.tar.gz b/data/.lfs/xarm7.tar.gz deleted file mode 100644 index 597c883e80..0000000000 --- a/data/.lfs/xarm7.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c46a39bdf15e91138e00868b001df0a354f870b82f92b5039d571bc37af80e51 -size 1606716 diff --git a/data/.lfs/xarm_description.tar.gz b/data/.lfs/xarm_description.tar.gz deleted file mode 100644 index 4cccd9ab25..0000000000 --- a/data/.lfs/xarm_description.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e25f1ede8e4022f5053a61717191a2c338ea5af5b81e26bd2c880343aff1316 -size 12709222 diff --git a/dimos/utils/data.py b/dimos/utils/data.py index 63e644909a..4866911887 100644 --- a/dimos/utils/data.py +++ b/dimos/utils/data.py @@ -16,16 +16,24 @@ import os from pathlib import Path import platform -import subprocess import sys import tarfile import tempfile +import boto3 +from botocore import UNSIGNED +from botocore.config import Config as BotoConfig +from botocore.exceptions import ClientError + from dimos.constants import DIMOS_PROJECT_ROOT from dimos.utils.logging_config import setup_logger logger = setup_logger() +S3_BUCKET = os.environ.get("DIMOS_DATA_S3_BUCKET", "dimos-github-lfs") +S3_PREFIX = os.environ.get("DIMOS_DATA_S3_PREFIX", ".lfs/") +S3_REGION = os.environ.get("DIMOS_DATA_S3_REGION", "us-east-2") + def _get_user_data_dir() -> Path: """Get platform-specific user data directory.""" @@ -57,7 +65,7 @@ def get_project_root() -> Path: if (DIMOS_PROJECT_ROOT / ".git").exists(): return DIMOS_PROJECT_ROOT - # Running as installed package - clone repo to data dir + # Running as installed package - use a local data directory try: data_dir = _get_user_data_dir() data_dir.mkdir(parents=True, exist_ok=True) @@ -72,36 +80,7 @@ def get_project_root() -> Path: data_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Using tmp data directory at '{data_dir}'") - repo_dir = data_dir / "repo" - - # Clone if not already cloned - if not (repo_dir / ".git").exists(): - try: - env = os.environ.copy() - env["GIT_LFS_SKIP_SMUDGE"] = "1" - subprocess.run( - [ - "git", - "clone", - "--depth", - "1", - "--branch", - "main", - "https://github.com/dimensionalOS/dimos.git", - str(repo_dir), - ], - check=True, - capture_output=True, - text=True, - env=env, - ) - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"Failed to clone dimos repository: {e.stderr}\n" - f"Make sure you can access https://github.com/dimensionalOS/dimos.git" - ) - - return repo_dir + return data_dir @cache @@ -116,61 +95,22 @@ def _get_lfs_dir() -> Path: return get_data_dir() / ".lfs" -def _check_git_lfs_available() -> bool: - missing = [] - - # Check if git is available - try: - subprocess.run(["git", "--version"], capture_output=True, check=True, text=True) - except (subprocess.CalledProcessError, FileNotFoundError): - missing.append("git") - - # Check if git-lfs is available - try: - subprocess.run(["git-lfs", "version"], capture_output=True, check=True, text=True) - except (subprocess.CalledProcessError, FileNotFoundError): - missing.append("git-lfs") - - if missing: - raise RuntimeError( - f"Missing required tools: {', '.join(missing)}.\n\n" - "Git LFS installation instructions: https://git-lfs.github.io/" - ) - - return True - - -def _is_lfs_pointer_file(file_path: Path) -> bool: - try: - # LFS pointer files are small (typically < 200 bytes) and start with specific text - if file_path.stat().st_size > 1024: # LFS pointers are much smaller - return False - - with open(file_path, encoding="utf-8") as f: - first_line = f.readline().strip() - return first_line.startswith("version https://git-lfs.github.com/spec/") - - except (UnicodeDecodeError, OSError): - return False - - -def _lfs_pull(file_path: Path, repo_root: Path) -> None: +@cache +def _get_s3_client(): # type: ignore[no-untyped-def] + """Get a boto3 S3 client, using instance profile / env credentials.""" try: - relative_path = file_path.relative_to(repo_root) - - env = os.environ.copy() - env["GIT_LFS_FORCE_PROGRESS"] = "1" - - subprocess.run( - ["git", "lfs", "pull", "--include", str(relative_path)], - cwd=repo_root, - check=True, - env=env, + session = boto3.Session(region_name=S3_REGION) + client = session.client("s3") + # Quick check that credentials work + client.head_bucket(Bucket=S3_BUCKET) + return client + except ClientError: + # Fall back to unsigned access (public bucket) + return boto3.client( + "s3", + region_name=S3_REGION, + config=BotoConfig(signature_version=UNSIGNED), ) - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to pull LFS file {file_path}: {e}") - - return None def _decompress_archive(filename: str | Path) -> Path: @@ -181,46 +121,40 @@ def _decompress_archive(filename: str | Path) -> Path: return target_dir / filename_path.name.replace(".tar.gz", "") -def _pull_lfs_archive(filename: str | Path) -> Path: - # Check Git LFS availability first - _check_git_lfs_available() +def _pull_s3_archive(filename: str | Path) -> Path: + """Download an archive from S3 into the local .lfs cache dir.""" + lfs_dir = _get_lfs_dir() + lfs_dir.mkdir(parents=True, exist_ok=True) - # Find repository root - repo_root = get_project_root() + archive_name = str(filename) + ".tar.gz" + local_path = lfs_dir / archive_name + s3_key = S3_PREFIX + archive_name - # Construct path to test data file - file_path = _get_lfs_dir() / (str(filename) + ".tar.gz") + if local_path.exists() and local_path.stat().st_size > 1024: + # Already downloaded (and not an LFS pointer stub) + return local_path - # Check if file exists - if not file_path.exists(): + logger.info(f"Downloading s3://{S3_BUCKET}/{s3_key} -> {local_path}") + client = _get_s3_client() + try: + client.download_file(S3_BUCKET, s3_key, str(local_path)) + except ClientError as e: raise FileNotFoundError( - f"Test file '{filename}' not found at {file_path}. " - f"Make sure the file is committed to Git LFS in the tests/data directory." + f"Data file '{archive_name}' not found in s3://{S3_BUCKET}/{S3_PREFIX}. " + f"Make sure it has been uploaded with bin/s3_push. ({e})" ) - # If it's an LFS pointer file, ensure LFS is set up and pull the file - if _is_lfs_pointer_file(file_path): - _lfs_pull(file_path, repo_root) - - # Verify the file was actually downloaded - if _is_lfs_pointer_file(file_path): - raise RuntimeError( - f"Failed to download LFS file '{filename}'. The file is still a pointer after attempting to pull." - ) - - return file_path + return local_path def get_data(name: str | Path) -> Path: """ - Get the path to a test data, downloading from LFS if needed. + Get the path to test data, downloading from S3 if needed. This function will: - 1. Check that Git LFS is available - 2. Locate the file in the tests/data directory - 3. Initialize Git LFS if needed - 4. Download the file from LFS if it's a pointer file - 5. Return the Path object to the actual file or dir + 1. Check if the data is already available locally + 2. If not, download the compressed archive from S3 + 3. Decompress and return the path Supports nested paths like "dataset/subdir/file.jpg" - will download and decompress "dataset" archive but return the full nested path. @@ -233,8 +167,7 @@ def get_data(name: str | Path) -> Path: Path: Path object to the test file or dir Raises: - RuntimeError: If Git LFS is not available or LFS operations fail - FileNotFoundError: If the test file doesn't exist + FileNotFoundError: If the data file doesn't exist in S3 Usage: # Simple file/dir @@ -256,7 +189,7 @@ def get_data(name: str | Path) -> Path: nested_path = Path(*path_parts[1:]) if len(path_parts) > 1 else None # download and decompress the archive root - archive_path = _decompress_archive(_pull_lfs_archive(archive_name)) + archive_path = _decompress_archive(_pull_s3_archive(archive_name)) # return full path including nested components if nested_path: @@ -266,12 +199,10 @@ def get_data(name: str | Path) -> Path: class LfsPath(type(Path())): # type: ignore[misc] """ - A Path subclass that lazily downloads LFS data when accessed. - - This is useful for both lazy loading and differentiating between LFS paths and regular paths. + A Path subclass that lazily downloads data from S3 when accessed. This class wraps pathlib.Path and ensures that get_data() is called - before any meaningful filesystem operation, making LFS data lazy-loaded. + before any meaningful filesystem operation, making data lazy-loaded. Usage: path = LfsPath("sample_data") @@ -295,7 +226,7 @@ def __new__(cls, filename: str | Path) -> "LfsPath": return instance def _ensure_downloaded(self) -> Path: - """Ensure the LFS data is downloaded and return the resolved path.""" + """Ensure the data is downloaded and return the resolved path.""" cache: Path | None = object.__getattribute__(self, "_lfs_resolved_cache") if cache is None: filename = object.__getattribute__(self, "_lfs_filename") @@ -324,7 +255,7 @@ def __str__(self) -> str: return str(self._ensure_downloaded()) def __fspath__(self) -> str: - """Return filesystem path, downloading from LFS if needed.""" + """Return filesystem path, downloading from S3 if needed.""" return str(self._ensure_downloaded()) def __truediv__(self, other: object) -> "LfsPath": diff --git a/dimos/utils/test_data.py b/dimos/utils/test_data.py index 373126ec26..477f1efe9d 100644 --- a/dimos/utils/test_data.py +++ b/dimos/utils/test_data.py @@ -15,7 +15,6 @@ import hashlib import os from pathlib import Path -import subprocess import pytest @@ -25,7 +24,6 @@ @pytest.mark.slow def test_pull_file() -> None: - repo_root = data.get_project_root() test_file_name = "cafe.jpg" test_file_compressed = data._get_lfs_dir() / (test_file_name + ".tar.gz") test_file_decompressed = data.get_data_dir() / test_file_name @@ -34,29 +32,14 @@ def test_pull_file() -> None: if test_file_decompressed.exists(): test_file_decompressed.unlink() - # delete lfs archive file if it exists + # delete cached archive if it exists if test_file_compressed.exists(): test_file_compressed.unlink() assert not test_file_compressed.exists() assert not test_file_decompressed.exists() - # pull the lfs file reference from git - env = os.environ.copy() - env["GIT_LFS_SKIP_SMUDGE"] = "1" - subprocess.run( - ["git", "checkout", "HEAD", "--", test_file_compressed], - cwd=repo_root, - env=env, - check=True, - capture_output=True, - ) - - # ensure we have a pointer file from git (small ASCII text file) - assert test_file_compressed.exists() - assert test_file_compressed.stat().st_size < 200 - - # trigger a data file pull + # trigger a data file pull from S3 assert data.get_data(test_file_name) == test_file_decompressed # validate data is received @@ -81,7 +64,6 @@ def test_pull_file() -> None: @pytest.mark.slow def test_pull_dir() -> None: - repo_root = data.get_project_root() test_dir_name = "ab_lidar_frames" test_dir_compressed = data._get_lfs_dir() / (test_dir_name + ".tar.gz") test_dir_decompressed = data.get_data_dir() / test_dir_name @@ -92,26 +74,14 @@ def test_pull_dir() -> None: item.unlink() test_dir_decompressed.rmdir() - # delete lfs archive file if it exists + # delete cached archive if it exists if test_dir_compressed.exists(): test_dir_compressed.unlink() - # pull the lfs file reference from git - env = os.environ.copy() - env["GIT_LFS_SKIP_SMUDGE"] = "1" - subprocess.run( - ["git", "checkout", "HEAD", "--", test_dir_compressed], - cwd=repo_root, - env=env, - check=True, - capture_output=True, - ) - - # ensure we have a pointer file from git (small ASCII text file) - assert test_dir_compressed.exists() - assert test_dir_compressed.stat().st_size < 200 + assert not test_dir_compressed.exists() + assert not test_dir_decompressed.exists() - # trigger a data file pull + # trigger a data file pull from S3 assert data.get_data(test_dir_name) == test_dir_decompressed assert test_dir_compressed.stat().st_size > 200 @@ -304,6 +274,34 @@ def test_lfs_path_division_operator() -> None: assert "three_paths.png" in str(result) +@pytest.mark.slow +def test_missing_file_raises() -> None: + """A file that doesn't exist in S3 should raise FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="not found in s3://"): + data.get_data("definitely_not_a_real_file_xyz_123") + + +@pytest.mark.slow +def test_get_data_idempotent() -> None: + """Calling get_data twice on the same file should not re-download.""" + filename = "three_paths.png" + data_dir = data.get_data_dir() + file_path = data_dir / filename + + # Ensure clean state, then prime the cache + if file_path.exists(): + file_path.unlink() + + first = data.get_data(filename) + assert first.exists() + first_mtime = first.stat().st_mtime + + # Second call should hit the local-file fast path (no re-download) + second = data.get_data(filename) + assert second == first + assert second.stat().st_mtime == first_mtime + + @pytest.mark.slow def test_lfs_path_multiple_instances() -> None: """Test that multiple LfsPath instances for same file work correctly.""" diff --git a/pyproject.toml b/pyproject.toml index e28c767ff0..34572e70be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ dependencies = [ "sqlite-vec>=0.1.6", "lz4>=4.4.5", "rpyc>=6.0.0", + "boto3>=1.34", ] @@ -398,6 +399,9 @@ exclude = "^dimos/models/Detic(/|$)|^dimos/rxpy_backpressure(/|$)|.*/test_.|.*/c [[tool.mypy.overrides]] module = [ "annotation_protocol", + "boto3", + "botocore", + "botocore.*", "cyclonedds", "cyclonedds.*", "dimos_lcm.*", diff --git a/uv.lock b/uv.lock index b85a66ba14..1e6ab9f51f 100644 --- a/uv.lock +++ b/uv.lock @@ -439,6 +439,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, ] +[[package]] +name = "boto3" +version = "1.43.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/b0/90ba01763dd483bb040d0815dc0ba893421e3f5926672ceab9acbb73b23f/boto3-1.43.5.tar.gz", hash = "sha256:414be7868f25c3b6a0232301c8ab40347911b6b191926b61f00a63f89b97b2bc", size = 113150, upload-time = "2026-05-06T19:56:49.629Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bb/347307758c2003783df1d9a9b07596928d05a6ca0e17790cea3b18105244/boto3-1.43.5-py3-none-any.whl", hash = "sha256:aa8a296c8db55d812767b282cfe4c7977f0b0eeaa709abdaeb368b9c738e901f", size = 140502, upload-time = "2026-05-06T19:56:46.626Z" }, +] + +[[package]] +name = "botocore" +version = "1.43.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/a2/1285a22bf157f9e97a8fd236daea95d9b14cc8425ae5f8a616badf948408/botocore-1.43.5.tar.gz", hash = "sha256:5c7207816ab5e48382adcb2a64db388fa4abe9ee1d23f72c82ae62c51a0bc84e", size = 15321290, upload-time = "2026-05-06T19:56:35.658Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/d2/99f1741b12e3cdba2e5370f6dafaab743a373c6f83592601ec75ff2cc47f/botocore-1.43.5-py3-none-any.whl", hash = "sha256:a1df6e0c6346735936f42e6b99f3b28f1e9397731c0bc2563c617df7965a0dc0", size = 15002116, upload-time = "2026-05-06T19:56:29.993Z" }, +] + [[package]] name = "brax" version = "0.14.1" @@ -1709,6 +1737,7 @@ version = "0.0.11" source = { editable = "." } dependencies = [ { name = "annotation-protocol" }, + { name = "boto3" }, { name = "colorlog" }, { name = "dimos-lcm" }, { name = "dimos-viewer" }, @@ -1985,6 +2014,7 @@ web = [ requires-dist = [ { name = "annotation-protocol", specifier = ">=1.4.0" }, { name = "anthropic", marker = "extra == 'agents'", specifier = ">=0.19.0" }, + { name = "boto3", specifier = ">=1.34" }, { name = "catkin-pkg", marker = "extra == 'misc'" }, { name = "cerebras-cloud-sdk", marker = "extra == 'misc'" }, { name = "colorlog", specifier = "==6.9.0" }, @@ -3716,6 +3746,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -8775,6 +8814,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/73/4de6579bac8e979fca0a77e54dec1f1e011a0d268165eb8a9bc0982a6564/ruff-0.14.3-py3-none-win_arm64.whl", hash = "sha256:26eb477ede6d399d898791d01961e16b86f02bc2486d0d1a7a9bb2379d055dc1", size = 12590017, upload-time = "2025-10-31T00:26:24.52Z" }, ] +[[package]] +name = "s3transfer" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/ec/7c692cde9125b77e84b307354d4fb705f98b8ccad59a036d5957ca75bfc3/s3transfer-0.17.0.tar.gz", hash = "sha256:9edeb6d1c3c2f89d6050348548834ad8289610d886e5bf7b7207728bd43ce33a", size = 155337, upload-time = "2026-04-29T22:07:36.33Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/72/c6c32d2b657fa3dad1de340254e14390b1e334ce38268b7ad51abda3c8c2/s3transfer-0.17.0-py3-none-any.whl", hash = "sha256:ce3801712acf4ad3e89fb9990df97b4972e93f4b3b0004d214be5bce12814c20", size = 86811, upload-time = "2026-04-29T22:07:34.966Z" }, +] + [[package]] name = "safetensors" version = "0.7.0"