ReT-2/scripts/rag/evqa/evqa_rag_qwen.sh at main · aimagelab/ReT-2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
#SBATCH --job-name=evqa_rag_qwen
#SBATCH --output=
#SBATCH --error=
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gpus-per-node=4
#SBATCH --mem=128G
#SBATCH --cpus-per-task=8
#SBATCH --partition=
#SBATCH --account=
#SBATCH --time=04:00:00

conda activate ret2
cd ~/ReT-2

export PYTHONPATH=.
export TRANSFORMERS_VERBOSITY=info
export TOKENIZERS_PARALLELISM=false
export OMP_NUM_THREADS=1

model_checkpoint="Qwen/Qwen2.5-VL-7B-Instruct"

dataset_name="evqa"

# https://huggingface.co/datasets/aimagelab/ReT-M2KR/blob/main/jsonl/rag/evqa_test.jsonl
dataset_path_query="evqa_test.jsonl"

# https://github.com/google-research/google-research/tree/master/encyclopedic_vqa#vqa-questions
image_root_query=

retriever="ReT2-M2KR-ColBERT-SigLIP2-ViT-L"
ranking_path="./rag/indices/${retriever}/${dataset_name}/ranking.jsonl"
top_k=3
generation_num_beams=5

output_dir="./rag/results/${dataset_name}"
mkdir -p $output_dir
output_path="${output_dir}/${retriever}__top_${top_k}__bs_${generation_num_beams}.jsonl"

dataloader_num_workers=2
batch_size=2
tokenizer_buffer_length=676

srun -c $SLURM_CPUS_PER_TASK --mem $SLURM_MEM_PER_NODE \
torchrun \
--nproc-per-node=$SLURM_GPUS_PER_NODE --standalone \
src/rag/rag_qwen.py \
--query_path $query_path \
--image_root $image_root \
--output_path $output_path \
--model_checkpoint $model_checkpoint \
--generation_num_beams $generation_num_beams \
--dataloader_num_workers $dataloader_num_workers \
--dataset_name $dataset_name \
--ranking_path $ranking_path \
--tokenizer_buffer_length $tokenizer_buffer_length \
--top_k $top_k