58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CLI to run ETL: NetCDF trajectory file → Arrow blocks + metadata.
|
|
|
|
Usage:
|
|
python -m scripts.run_etl path/to/trajectories.nc
|
|
python -m scripts.run_etl path/to/file.nc --sim-id my_run
|
|
python -m scripts.run_etl file.nc --out-dir /path/to/backend/data --write-json
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="ETL: NetCDF trajectories → Arrow blocks")
|
|
parser.add_argument("nc_path", type=str, help="Path to input .nc file")
|
|
parser.add_argument(
|
|
"--out-dir",
|
|
type=str,
|
|
default=None,
|
|
help="Output base directory (default: backend/data)",
|
|
)
|
|
parser.add_argument(
|
|
"--sim-id",
|
|
type=str,
|
|
default="default",
|
|
help="Simulation ID (default: default)",
|
|
)
|
|
parser.add_argument(
|
|
"--write-json",
|
|
action="store_true",
|
|
help="Also write block_*.json (for backward compat)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.out_dir is None:
|
|
out_dir = Path(__file__).resolve().parent.parent / "data"
|
|
else:
|
|
out_dir = Path(args.out_dir)
|
|
|
|
from etl.pipeline import run_etl
|
|
|
|
nc_path = Path(args.nc_path)
|
|
if not nc_path.exists():
|
|
raise SystemExit(f"File not found: {nc_path}")
|
|
|
|
print(f"ETL: {nc_path} → {out_dir / 'simulations' / args.sim_id}")
|
|
meta = run_etl(nc_path, out_dir, sim_id=args.sim_id, write_json=args.write_json)
|
|
print(f" num_particles={meta['num_particles']}, num_steps={meta['num_steps']}, num_blocks={meta['num_blocks']}")
|
|
print("Done.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|