#!/usr/bin/env python3 """ CLI to run ETL: NetCDF trajectory file → Arrow blocks + metadata. Usage: python -m scripts.run_etl path/to/trajectories.nc python -m scripts.run_etl path/to/file.nc --sim-id my_run python -m scripts.run_etl file.nc --out-dir /path/to/backend/data --write-json """ from __future__ import annotations import argparse from pathlib import Path def main(): parser = argparse.ArgumentParser(description="ETL: NetCDF trajectories → Arrow blocks") parser.add_argument("nc_path", type=str, help="Path to input .nc file") parser.add_argument( "--out-dir", type=str, default=None, help="Output base directory (default: backend/data)", ) parser.add_argument( "--sim-id", type=str, default="default", help="Simulation ID (default: default)", ) parser.add_argument( "--write-json", action="store_true", help="Also write block_*.json (for backward compat)", ) args = parser.parse_args() if args.out_dir is None: out_dir = Path(__file__).resolve().parent.parent / "data" else: out_dir = Path(args.out_dir) from etl.pipeline import run_etl nc_path = Path(args.nc_path) if not nc_path.exists(): raise SystemExit(f"File not found: {nc_path}") print(f"ETL: {nc_path} → {out_dir / 'simulations' / args.sim_id}") meta = run_etl(nc_path, out_dir, sim_id=args.sim_id, write_json=args.write_json) print(f" num_particles={meta['num_particles']}, num_steps={meta['num_steps']}, num_blocks={meta['num_blocks']}") print("Done.") if __name__ == "__main__": main()