Skip to content

Commit e61dacf

Browse files
committed
clean changes for all single model single step
1 parent dc8ab6a commit e61dacf

File tree

2 files changed

+233
-27
lines changed

2 files changed

+233
-27
lines changed

.github/workflows/ci_generate_irpa.yml

Lines changed: 228 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ jobs:
4040
VENV_DIR: ${{ github.workspace }}/.venv
4141
HF_HOME: "/shark-cache/data/huggingface"
4242
HF_TOKEN: ${{ secrets.HF_FLUX_TOKEN }}
43+
MODEL_TAG: ""
4344
steps:
4445
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
4546

@@ -54,48 +55,250 @@ jobs:
5455
source ${VENV_DIR}/bin/activate
5556
5657
- name: Install pip deps
57-
run: bash scripts/setenv.sh --nightly
58+
run: |
59+
bash scripts/setenv.sh --nightly
60+
mkdir -p output_artifacts
5861
62+
# Llama 8B FP16 irpa export, compile and validate
5963
- name: Export 8B-FP16 instruct model
64+
id: export_irpa_llama_8b_fp16
65+
continue-on-error: true
6066
run: |
67+
set -e # Exit on any error
68+
export MODEL_TAG="llama3_8b_fp16"
69+
echo $MODEL_TAG
70+
# Export 8B-FP16 instruct model
71+
echo "=== Exporting 8B-FP16 instruct model ==="
6172
bash scripts/download_export_irpa.sh \
62-
--model Llama-3.1-8B-Instruct \
63-
--hf-token ${HF_TOKEN}
73+
--model Llama-3.1-8B-Instruct \
74+
--hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
75+
76+
# Run export and compile
77+
echo "=== Running export and compile ==="
78+
bash scripts/export_and_compile.sh \
79+
--irpa /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
80+
--bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
81+
82+
# Validate VMFB Responses
83+
echo "=== Validating VMFB Responses ==="
84+
bash scripts/validate_numerics.sh \
85+
--irpa /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
86+
--vmfb $(pwd)/output_artifacts/output.vmfb \
87+
--config $(pwd)/output_artifacts/config_attn.json \
88+
--tokenizer /shark-dev/8b/instruct/tokenizer.json \
89+
--tokenizer_config /shark-dev/8b/instruct/tokenizer_config.json \
90+
--steps 64 \
91+
--kv-cache-dtype float16 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed"; }
92+
93+
# Check for IRPA changes
94+
echo "=== Checking for IRPA changes ==="
95+
echo "Downloading latest IRPA file from Azure"
96+
az storage blob download \
97+
--account-name sharkpublic \
98+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
99+
--container-name ossci \
100+
--name ossci-models/llama_3_1/instruct_8b_fp16.irpa \
101+
--file instruct_8b_fp16_previous.irpa \
102+
--no-progress || echo "No previous IRPA file found, will upload new file"
103+
104+
UPLOAD_REQUIRED=false
105+
if [ -f instruct_8b_fp16_previous.irpa ]; then
106+
echo "Comparing IRPA files"
107+
if ! diff -q /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa instruct_8b_fp16_previous.irpa > /dev/null 2>&1; then
108+
echo "IRPA files differ, upload required"
109+
UPLOAD_REQUIRED=true
110+
else
111+
echo "IRPA files are identical, skipping upload"
112+
fi
113+
else
114+
echo "No previous IRPA file found, upload required"
115+
UPLOAD_REQUIRED=true
116+
fi
117+
118+
# Upload IRPA file if required
119+
if [ "$UPLOAD_REQUIRED" = true ]; then
120+
echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
121+
az storage blob upload \
122+
--account-name sharkpublic \
123+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
124+
--container-name ossci \
125+
--name ossci-models/llama_3_1/instruct_8b_fp16-${{ env.date }}.irpa \
126+
--file /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa
127+
128+
az storage blob upload \
129+
--account-name sharkpublic \
130+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
131+
--container-name ossci \
132+
--name ossci-models/llama_3_1/instruct_8b_fp16.irpa \
133+
--file /shark-dev/8b/instruct/weights/llama3.1_8b_instruct_fp16.irpa \
134+
--overwrite
135+
fi
136+
137+
echo "=== Completed llama-8b-fp16 workflow ==="
138+
139+
# Llama 8B FP8 irpa export, compile and validate
64140
- name: Export 8B-FP8 instruct model
141+
id: export_irpa_llama_8b_fp8
142+
continue-on-error: true
65143
run: |
66-
bash scripts/download_export_irpa.sh \
67-
--model Llama-3.1-8B-Instruct-FP8-KV \
68-
--hf-token ${HF_TOKEN}
144+
set -e # Exit on any error
145+
MODEL_TAG="llama3_8b_fp8"
146+
echo $MODEL_TAG
147+
# Export 8B-FP8 instruct model
148+
echo "=== Exporting 8B-FP8 instruct model ==="
149+
bash scripts/download_export_irpa.sh \
150+
--model Llama-3.1-8B-Instruct-FP8-KV \
151+
--hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
152+
153+
# Run export and compile
154+
echo "=== Running export and compile ==="
155+
bash scripts/export_and_compile.sh \
156+
--irpa instruct_8b_fp8_e4m3fn.irpa \
157+
--dtype fp8 --bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
158+
159+
# Validate VMFB Responses
160+
echo "=== Validating VMFB Responses ==="
161+
bash scripts/validate_numerics.sh \
162+
--irpa instruct_8b_fp8_e4m3fn.irpa \
163+
--vmfb $(pwd)/output_artifacts/output.vmfb \
164+
--config $(pwd)/output_artifacts/config_attn.json \
165+
--tokenizer /shark-dev/8b/instruct/tokenizer.json \
166+
--tokenizer_config /shark-dev/8b/instruct/tokenizer_config.json \
167+
--steps 64 \
168+
--kv-cache-dtype float8_e4m3fnuz | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed "; }
169+
170+
# Check for IRPA changes
171+
echo "=== Checking for IRPA changes ==="
172+
echo "Downloading latest IRPA file from Azure"
173+
az storage blob download \
174+
--account-name sharkpublic \
175+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
176+
--container-name ossci \
177+
--name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn.irpa \
178+
--file instruct_8b_fp8_e4m3fn_previous.irpa \
179+
--no-progress || echo "No previous IRPA file found, will upload new file"
180+
181+
UPLOAD_REQUIRED=false
182+
if [ -f instruct_8b_fp8_e4m3fn_previous.irpa ]; then
183+
echo "Comparing IRPA files"
184+
if ! diff -q instruct_8b_fp8_e4m3fn.irpa instruct_8b_fp8_e4m3fn_previous.irpa > /dev/null 2>&1; then
185+
echo "IRPA files differ, upload required"
186+
UPLOAD_REQUIRED=true
187+
else
188+
echo "IRPA files are identical, skipping upload"
189+
fi
190+
else
191+
echo "No previous IRPA file found, upload required"
192+
UPLOAD_REQUIRED=true
193+
fi
194+
195+
# Upload IRPA file if required
196+
if [ "$UPLOAD_REQUIRED" = true ]; then
197+
echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
198+
az storage blob upload \
199+
--account-name sharkpublic \
200+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
201+
--container-name ossci \
202+
--name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
203+
--file instruct_8b_fp8_e4m3fn.irpa
204+
205+
az storage blob upload \
206+
--account-name sharkpublic \
207+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
208+
--container-name ossci \
209+
--name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn.irpa \
210+
--file instruct_8b_fp8_e4m3fn.irpa \
211+
--overwrite
212+
fi
213+
214+
echo "=== Completed llama-8b-fp8 workflow ==="
215+
216+
# Llama 70B FP16 irpa export, compile and validate
69217
- name: Export 70B-FP16 instruct model
70218
run: |
219+
set -e # Exit on any error
220+
export MODEL_TAG="llama3_70b_fp16"
221+
echo $MODEL_TAG
222+
# Export 70B-FP16 instruct model
223+
echo "=== Exporting 70B-FP16 instruct model ==="
71224
bash scripts/download_export_irpa.sh \
72-
--model Llama-3.1-70B-Instruct \
73-
--hf-token ${HF_TOKEN}
225+
--model Llama-3.1-70B-Instruct \
226+
--hf-token ${HF_TOKEN} || { echo "Export failed"; exit 1; }
227+
228+
# Run export and compile
229+
echo "=== Running export and compile ==="
230+
bash scripts/export_and_compile.sh \
231+
--irpa /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
232+
--bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_export_and_compilation.log" || { echo "Compilation failed"; exit 1; }
233+
234+
# Validate VMFB Responses
235+
echo "=== Validating VMFB Responses ==="
236+
bash scripts/validate_numerics.sh \
237+
--irpa /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
238+
--vmfb $(pwd)/output_artifacts/output.vmfb \
239+
--config $(pwd)/output_artifacts/config_attn.json \
240+
--tokenizer /shark-dev/70b/instruct/tokenizer.json \
241+
--tokenizer_config /shark-dev/70b/instruct/tokenizer_config.json \
242+
--steps 64 \
243+
--kv-cache-dtype float16 | tee "$(pwd)/output_artifacts/${MODEL_TAG}_run_llm_vmfb.log" || { echo "Validation failed"; }
244+
245+
# Check for IRPA changes
246+
echo "=== Checking for IRPA changes ==="
247+
echo "Downloading latest IRPA file from Azure"
248+
az storage blob download \
249+
--account-name sharkpublic \
250+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
251+
--container-name ossci \
252+
--name ossci-models/llama_3_1/70b/instruct_70b_fp16.irpa \
253+
--file instruct_70b_fp16_previous.irpa \
254+
--no-progress || echo "No previous IRPA file found, will upload new file"
255+
256+
UPLOAD_REQUIRED=false
257+
if [ -f instruct_70b_fp16_previous.irpa ]; then
258+
echo "Comparing IRPA files"
259+
if ! diff -q /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa instruct_70b_fp16_previous.irpa > /dev/null 2>&1; then
260+
echo "IRPA files differ, upload required"
261+
UPLOAD_REQUIRED=true
262+
else
263+
echo "IRPA files are identical, skipping upload"
264+
fi
265+
else
266+
echo "No previous IRPA file found, upload required"
267+
UPLOAD_REQUIRED=true
268+
fi
269+
270+
# Upload IRPA file if required
271+
if [ "$UPLOAD_REQUIRED" = true ]; then
272+
echo "=== Uploading new IRPA for llama3-8b-fp8 ==="
273+
az storage blob upload \
274+
--account-name sharkpublic \
275+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
276+
--container-name ossci \
277+
--name ossci-models/llama_3_1/70b/instruct_70b_fp16-${{ env.date }}.irpa \
278+
--file /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa
279+
280+
az storage blob upload \
281+
--account-name sharkpublic \
282+
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
283+
--container-name ossci \
284+
--name ossci-models/llama_3_1/70b/instruct_70b_fp16.irpa \
285+
--file /shark-dev/70b/instruct/weights/llama3.1_70b_instruct_fp16.irpa \
286+
--overwrite
287+
fi
288+
289+
echo "=== Completed llama-70b-fp16 workflow ==="
290+
291+
# Mistral-Nemo-Instruct-2407-FP8 irpa export
74292
- name: Export Mistral-Nemo-Instruct-2407-FP8
75293
run: |
76294
bash scripts/download_export_irpa.sh \
77295
--model Mistral-Nemo-Instruct-2407-FP8 \
78296
--hf-token ${HF_TOKEN}
79-
- name: Upload All IRPA files to ossci
80-
env:
81-
AZURE_WRITE_ACCESS_OSSCI: ${{ secrets.AZURE_WRITE_ACCESS_OSSCI }}
82-
run: |
83-
# TODO: add check for upload only if file has changed.
84-
echo "upload new IRPA"
85-
az storage blob upload \
86-
--account-name sharkpublic \
87-
--sas-token "$AZURE_WRITE_ACCESS_OSSCI" \
88-
--container-name ossci \
89-
--name ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
90-
--file instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa \
91-
--overwrite
92-
# echo uploaded_new_ir=true >> "${GITHUB_OUTPUT}"
93-
# echo new_ir_url="https://sharkpublic.blob.core.windows.net/ossci/ossci-models/llama_3_1/instruct_8b_fp8_e4m3fn-${{ env.date }}.irpa" >> "${GITHUB_OUTPUT}"
94-
# fi
95297
96298
- name: Cleanup download Directory
97299
run: |
98-
rm -rf Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.1-8B-Instruct-FP8-KV
300+
rm -rf Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.1-8B-Instruct-FP8-KV output_artifacts
301+
rm -f instruct_8b_fp8_e4m3fn.irpa instruct_8b_fp8_e4m3fn_previous.irpa instruct_70b_fp16_previous.irpa instruct_8b_fp16_previous.irpa
99302
test ! -d Llama-3.1-8B-Instruct && echo "Llama-3.1-8B-Instruct downloaded artifacts removed"
100303
test ! -d Llama-3.1-8B-Instruct-FP8-KV && echo "Llama-3.1-8B-Instruct-FP8-KV downloaded artifacts removed"
101304
test ! -d Llama-3.1-70B-Instruct && echo "Llama-3.1-70B-Instruct downloaded artifacts removed"

scripts/download_export_irpa.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ function download_model() {
1111

1212
mkdir $MODEL
1313
hf auth login --token $HF_TOKEN
14-
14+
1515
# Determine repository based on model name
1616
if [ -n "${HF_REPO}" ]; then
1717
REPO=$HF_REPO
@@ -25,7 +25,7 @@ function download_model() {
2525
# Default: assume model name contains the full repo path (e.g., "owner/model-name")
2626
REPO=$MODEL
2727
fi
28-
28+
2929
echo "Downloading from repository: $REPO"
3030
hf download $REPO --local-dir $MODEL
3131
}
@@ -74,6 +74,9 @@ download_model $MODEL $HF_TOKEN
7474

7575
if [[ $? = 0 ]]; then
7676
if [[ $MODEL = "Llama-3.1-8B-Instruct-FP8-KV" ]]; then
77+
if [ -f "$MODEL/merged.safetensors" ]; then
78+
rm "$MODEL/merged.safetensors"
79+
fi
7780
python scripts/merge_safetensors.py $MODEL
7881
sudo mv merged.safetensors $MODEL/merged.safetensors
7982
if [[ $? = 0 ]]; then

0 commit comments

Comments
 (0)