electra_large_squad2_512.sh
| 1 | #!/bin/bash |
| 2 | |
| 3 | export OMP_NUM_THREADS=12 |
| 4 | EXAMPLES=/media/dn/work2/nlpu/transformers/examples/question-answering |
| 5 | SQUAD=${EXAMPLES}/scripts/squad2.0 |
| 6 | MODEL_PATH=${EXAMPLES}/runs/electra_large_discriminator_squad2_512 |
| 7 | |
| 8 | python ${EXAMPLES}/run_squad.py \ |
| 9 | --model_type electra \ |
| 10 | --model_name_or_path google/electra-large-discriminator \ |
| 11 | --do_train \ |
| 12 | --do_eval \ |
| 13 | --train_file ${SQUAD}/train-v2.0.json \ |
| 14 | --predict_file ${SQUAD}/dev-v2.0.json \ |
| 15 | --version_2_with_negative \ |
| 16 | --do_lower_case \ |
| 17 | --num_train_epochs 3 \ |
| 18 | --warmup_steps 306 \ |
| 19 | --weight_decay 0.01 \ |
| 20 | --learning_rate 3e-5 \ |
| 21 | --max_grad_norm 0.5 \ |
| 22 | --adam_epsilon 1e-6 \ |
| 23 | --max_seq_length 512 \ |
| 24 | --doc_stride 128 \ |
| 25 | --per_gpu_train_batch_size 8 \ |
| 26 | --gradient_accumulation_steps 16 \ |
| 27 | --per_gpu_eval_batch_size 128 \ |
| 28 | --fp16 \ |
| 29 | --fp16_opt_level O1 \ |
| 30 | --threads 12 \ |
| 31 | --logging_steps 50 \ |
| 32 | --save_steps 1000 \ |
| 33 | --overwrite_output_dir \ |
| 34 | --output_dir ${MODEL_PATH} |
| 35 | $@ |
| 36 | |
| 37 | |