2026-05-04 09:44:56 +02:00

58 lines
1.6 KiB
Python

#!/usr/bin/env python3
"""
CLI to run ETL: NetCDF trajectory file → Arrow blocks + metadata.
Usage:
python -m scripts.run_etl path/to/trajectories.nc
python -m scripts.run_etl path/to/file.nc --sim-id my_run
python -m scripts.run_etl file.nc --out-dir /path/to/backend/data --write-json
"""
from __future__ import annotations
import argparse
from pathlib import Path
def main():
parser = argparse.ArgumentParser(description="ETL: NetCDF trajectories → Arrow blocks")
parser.add_argument("nc_path", type=str, help="Path to input .nc file")
parser.add_argument(
"--out-dir",
type=str,
default=None,
help="Output base directory (default: backend/data)",
)
parser.add_argument(
"--sim-id",
type=str,
default="default",
help="Simulation ID (default: default)",
)
parser.add_argument(
"--write-json",
action="store_true",
help="Also write block_*.json (for backward compat)",
)
args = parser.parse_args()
if args.out_dir is None:
out_dir = Path(__file__).resolve().parent.parent / "data"
else:
out_dir = Path(args.out_dir)
from etl.pipeline import run_etl
nc_path = Path(args.nc_path)
if not nc_path.exists():
raise SystemExit(f"File not found: {nc_path}")
print(f"ETL: {nc_path}{out_dir / 'simulations' / args.sim_id}")
meta = run_etl(nc_path, out_dir, sim_id=args.sim_id, write_json=args.write_json)
print(f" num_particles={meta['num_particles']}, num_steps={meta['num_steps']}, num_blocks={meta['num_blocks']}")
print("Done.")
if __name__ == "__main__":
main()