4040 VENV_DIR : ${{ github.workspace }}/.venv
4141 HF_HOME : " /shark-cache/data/huggingface"
4242 HF_TOKEN : ${{ secrets.HF_FLUX_TOKEN }}
43+ MODEL_TAG : " "
4344 steps :
4445 - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
4546
@@ -54,48 +55,250 @@ jobs:
5455 source ${VENV_DIR}/bin/activate
5556
5657 - name : Install pip deps
57- run : bash scripts/setenv.sh --nightly
58+ run : |
59+ bash scripts/setenv.sh --nightly
60+ mkdir -p output_artifacts
5861
62+ # Llama 8B FP16 irpa export, compile and validate
5963 - name : Export 8B-FP16 instruct model
64+ id : export_irpa_llama_8b_fp16
65+ continue-on-error : true
6066 run : |
67+ set -e # Exit on any error
68+ export MODEL_TAG="llama3_8b_fp16"
69+ echo $MODEL_TAG
70+ # Export 8B-FP16 instruct model
71+ echo "=== Exporting 8B-FP16 instruct model ==="
6172 bash scripts/download_export_irpa.sh \
62- --model Llama-3.1-8B-Instruct \
63- --hf-token ${HF_TOKEN}
73+ --model Llama-3.1-8B-Instruct \
74+ --hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
75+
76+ # Run export and compile
77+ echo "=== Running export and compile ==="
78+ bash scripts/export_and_compile.sh \
79+ --irpa /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
80+ --bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
81+
82+ # Validate VMFB Responses
83+ echo "=== Validating VMFB Responses ==="
84+ bash scripts/validate_numerics.sh \
85+ --irpa /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
86+ --vmfb $(pwd)/output_artifacts/output.vmfb \
87+ --config $(pwd)/output_artifacts/config_attn.json \
88+ --tokenizer /shark-dev/8b/instruct/tokenizer.json \
89+ --tokenizer_config /shark-dev/8b/instruct/tokenizer_config.json \
90+ --steps 64 \
91+ --kv-cache-dtype float16 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed"; }
92+
93+ # Check for IRPA changes
94+ echo "=== Checking for IRPA changes ==="
95+ echo "Downloading latest IRPA file from Azure"
96+ az storage blob download \
97+ --account-name sharkpublic \
98+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
99+ --container-name ossci \
100+ --name ossci-models/llama_3_1/instruct_8b_fp16.irpa \
101+ --file instruct_8b_fp16_previous.irpa \
102+ --no-progress || echo "No previous IRPA file found, will upload new file"
103+
104+ UPLOAD_REQUIRED=false
105+ if [ -f instruct_8b_fp16_previous.irpa ]; then
106+ echo "Comparing IRPA files"
107+ if ! diff -q /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa instruct_8b_fp16_previous.irpa > /dev/null 2>&1; then
108+ echo "IRPA files differ, upload required"
109+ UPLOAD_REQUIRED=true
110+ else
111+ echo "IRPA files are identical, skipping upload"
112+ fi
113+ else
114+ echo "No previous IRPA file found, upload required"
115+ UPLOAD_REQUIRED=true
116+ fi
117+
118+ # Upload IRPA file if required
119+ if [ "$UPLOAD_REQUIRED" = true ]; then
120+ echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
121+ az storage blob upload \
122+ --account-name sharkpublic \
123+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
124+ --container-name ossci \
125+ --name ossci-models/llama_3_1/instruct_8b_fp16-${{ env.date }}.irpa \
126+ --file /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa
127+
128+ az storage blob upload \
129+ --account-name sharkpublic \
130+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
131+ --container-name ossci \
132+ --name ossci-models/llama_3_1/instruct_8b_fp16.irpa \
133+ --file /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
134+ --overwrite
135+ fi
136+
137+ echo "=== Completed llama-8b-fp16 workflow ==="
138+
139+ # Llama 8B FP8 irpa export, compile and validate
64140 - name : Export 8B-FP8 instruct model
141+ id : export_irpa_llama_8b_fp8
142+ continue-on-error : true
65143 run : |
66- bash scripts/download_export_irpa.sh \
67- --model Llama-3.1-8B-Instruct-FP8-KV \
68- --hf-token ${HF_TOKEN}
144+ set -e # Exit on any error
145+ MODEL_TAG="llama3_8b_fp8"
146+ echo $MODEL_TAG
147+ # Export 8B-FP8 instruct model
148+ echo "=== Exporting 8B-FP8 instruct model ==="
149+ bash scripts/download_export_irpa.sh \
150+ --model Llama-3.1-8B-Instruct-FP8-KV \
151+ --hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
152+
153+ # Run export and compile
154+ echo "=== Running export and compile ==="
155+ bash scripts/export_and_compile.sh \
156+ --irpa instruct_8b_fp8_e4m3fn.irpa \
157+ --dtype fp8 --bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
158+
159+ # Validate VMFB Responses
160+ echo "=== Validating VMFB Responses ==="
161+ bash scripts/validate_numerics.sh \
162+ --irpa instruct_8b_fp8_e4m3fn.irpa \
163+ --vmfb $(pwd)/output_artifacts/output.vmfb \
164+ --config $(pwd)/output_artifacts/config_attn.json \
165+ --tokenizer /shark-dev/8b/instruct/tokenizer.json \
166+ --tokenizer_config /shark-dev/8b/instruct/tokenizer_config.json \
167+ --steps 64 \
168+ --kv-cache-dtype float8_e4m3fnuz | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed "; }
169+
170+ # Check for IRPA changes
171+ echo "=== Checking for IRPA changes ==="
172+ echo "Downloading latest IRPA file from Azure"
173+ az storage blob download \
174+ --account-name sharkpublic \
175+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
176+ --container-name ossci \
177+ --name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn.irpa \
178+ --file instruct_8b_fp8_e4m3fn_previous.irpa \
179+ --no-progress || echo "No previous IRPA file found, will upload new file"
180+
181+ UPLOAD_REQUIRED=false
182+ if [ -f instruct_8b_fp8_e4m3fn_previous.irpa ]; then
183+ echo "Comparing IRPA files"
184+ if ! diff -q instruct_8b_fp8_e4m3fn.irpa instruct_8b_fp8_e4m3fn_previous.irpa > /dev/null 2>&1; then
185+ echo "IRPA files differ, upload required"
186+ UPLOAD_REQUIRED=true
187+ else
188+ echo "IRPA files are identical, skipping upload"
189+ fi
190+ else
191+ echo "No previous IRPA file found, upload required"
192+ UPLOAD_REQUIRED=true
193+ fi
194+
195+ # Upload IRPA file if required
196+ if [ "$UPLOAD_REQUIRED" = true ]; then
197+ echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
198+ az storage blob upload \
199+ --account-name sharkpublic \
200+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
201+ --container-name ossci \
202+ --name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
203+ --file instruct_8b_fp8_e4m3fn.irpa
204+
205+ az storage blob upload \
206+ --account-name sharkpublic \
207+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
208+ --container-name ossci \
209+ --name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn.irpa \
210+ --file instruct_8b_fp8_e4m3fn.irpa \
211+ --overwrite
212+ fi
213+
214+ echo "=== Completed llama-8b-fp8 workflow ==="
215+
216+ # Llama 70B FP16 irpa export, compile and validate
69217 - name : Export 70B-FP16 instruct model
70218 run : |
219+ set -e # Exit on any error
220+ export MODEL_TAG="llama3_70b_fp16"
221+ echo $MODEL_TAG
222+ # Export 70B-FP16 instruct model
223+ echo "=== Exporting 70B-FP16 instruct model ==="
71224 bash scripts/download_export_irpa.sh \
72- --model Llama-3.1-70B-Instruct \
73- --hf-token ${HF_TOKEN}
225+ --model Llama-3.1-70B-Instruct \
226+ --hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
227+
228+ # Run export and compile
229+ echo "=== Running export and compile ==="
230+ bash scripts/export_and_compile.sh \
231+ --irpa /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
232+ --bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
233+
234+ # Validate VMFB Responses
235+ echo "=== Validating VMFB Responses ==="
236+ bash scripts/validate_numerics.sh \
237+ --irpa /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
238+ --vmfb $(pwd)/output_artifacts/output.vmfb \
239+ --config $(pwd)/output_artifacts/config_attn.json \
240+ --tokenizer /shark-dev/70b/instruct/tokenizer.json \
241+ --tokenizer_config /shark-dev/70b/instruct/tokenizer_config.json \
242+ --steps 64 \
243+ --kv-cache-dtype float16 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed"; }
244+
245+ # Check for IRPA changes
246+ echo "=== Checking for IRPA changes ==="
247+ echo "Downloading latest IRPA file from Azure"
248+ az storage blob download \
249+ --account-name sharkpublic \
250+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
251+ --container-name ossci \
252+ --name ossci-models/llama_3_1/70b/instruct_70b_fp16.irpa \
253+ --file instruct_70b_fp16_previous.irpa \
254+ --no-progress || echo "No previous IRPA file found, will upload new file"
255+
256+ UPLOAD_REQUIRED=false
257+ if [ -f instruct_70b_fp16_previous.irpa ]; then
258+ echo "Comparing IRPA files"
259+ if ! diff -q /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa instruct_70b_fp16_previous.irpa > /dev/null 2>&1; then
260+ echo "IRPA files differ, upload required"
261+ UPLOAD_REQUIRED=true
262+ else
263+ echo "IRPA files are identical, skipping upload"
264+ fi
265+ else
266+ echo "No previous IRPA file found, upload required"
267+ UPLOAD_REQUIRED=true
268+ fi
269+
270+ # Upload IRPA file if required
271+ if [ "$UPLOAD_REQUIRED" = true ]; then
272+ echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
273+ az storage blob upload \
274+ --account-name sharkpublic \
275+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
276+ --container-name ossci \
277+ --name ossci-models/llama_3_1/70b/instruct_70b_fp16-${{ env.date }}.irpa \
278+ --file /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa
279+
280+ az storage blob upload \
281+ --account-name sharkpublic \
282+ --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
283+ --container-name ossci \
284+ --name ossci-models/llama_3_1/70b/instruct_70b_fp16.irpa \
285+ --file /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
286+ --overwrite
287+ fi
288+
289+ echo "=== Completed llama-70b-fp16 workflow ==="
290+
291+ # Mistral-Nemo-Instruct-2407-FP8 irpa export
74292 - name : Export Mistral-Nemo-Instruct-2407-FP8
75293 run : |
76294 bash scripts/download_export_irpa.sh \
77295 --model Mistral-Nemo-Instruct-2407-FP8 \
78296 --hf-token ${HF_TOKEN}
79- - name : Upload All IRPA files to ossci
80- env :
81- AZURE_WRITE_ACCESS_OSSCI : ${{ secrets.AZURE_WRITE_ACCESS_OSSCI }}
82- run : |
83- # TODO: add check for upload only if file has changed.
84- echo "upload new IRPA"
85- az storage blob upload \
86- --account-name sharkpublic \
87- --sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
88- --container-name ossci \
89- --name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
90- --file instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
91- --overwrite
92- # echo uploaded_new_ir=true >> "${GITHUB_OUTPUT}"
93- # echo new_ir_url="https://sharkpublic.blob.core.windows.net/ossci/ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa" >> "${GITHUB_OUTPUT}"
94- # fi
95297
96298 - name : Cleanup download Directory
97299 run : |
98- rm -rf Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.1-8B-Instruct-FP8-KV
300+ rm -rf Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.1-8B-Instruct-FP8-KV output_artifacts
301+ rm -f instruct_8b_fp8_e4m3fn.irpa instruct_8b_fp8_e4m3fn_previous.irpa instruct_70b_fp16_previous.irpa instruct_8b_fp16_previous.irpa
99302 test ! -d Llama-3.1-8B-Instruct && echo "Llama-3.1-8B-Instruct downloaded artifacts removed"
100303 test ! -d Llama-3.1-8B-Instruct-FP8-KV && echo "Llama-3.1-8B-Instruct-FP8-KV downloaded artifacts removed"
101304 test ! -d Llama-3.1-70B-Instruct && echo "Llama-3.1-70B-Instruct downloaded artifacts removed"
0 commit comments