dist_train.sh 442 B

12345678910111213141516171819
  1. #!/usr/bin/env bash
  2. CONFIG=$1
  3. GPUS=$2
  4. NNODES=${NNODES:-1}
  5. NODE_RANK=${NODE_RANK:-0}
  6. PORT=${PORT:-29500}
  7. MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
  8. PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
  9. python -m torch.distributed.launch \
  10. --nnodes=$NNODES \
  11. --node_rank=$NODE_RANK \
  12. --master_addr=$MASTER_ADDR \
  13. --nproc_per_node=$GPUS \
  14. --master_port=$PORT \
  15. $(dirname "$0")/train.py \
  16. $CONFIG \
  17. --launcher pytorch ${@:3}