From 6165c293176a0d1b664d9d65eae336a39934346a Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Sun, 23 Mar 2025 17:46:20 +0000 Subject: [PATCH 1/7] dump dmesg at end of ci run --- ci3/bootstrap_ec2 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 14310375f969..28ad5a7fb0d1 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -142,7 +142,10 @@ container_script=$(cat <&1 | ci3/cache_log 'dmesg' + log_ci_run FAILED \$ci_log_id + ;; esac exit \$code EOF @@ -182,6 +185,7 @@ ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip " -v \$HOME/.aws:/home/aztec-dev/.aws:ro \ -v /mnt/bb-crs:/home/aztec-dev/.bb-crs:ro \ -v /tmp:/tmp \ + -v /dev/kmsg:/dev/kmsg \ -e USE_TEST_CACHE=1 \ -e CI_REDIS='ci-redis.lzka0i.0001.use2.cache.amazonaws.com' \ -e SSH_CONNECTION=' ' \ From 399108db2d0cfd95e3477a6ece1edd2a2f39cd9b Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:01:44 +0000 Subject: [PATCH 2/7] always dmesg. cleanup old aztec images. --- ci3/bootstrap_ec2 | 8 ++++---- release-image/bootstrap.sh | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 28ad5a7fb0d1..101513914f81 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -139,13 +139,13 @@ container_script=$(cat <&1 | ci3/cache_log 'dmesg' + case \$code in 155) ;; 0) log_ci_run PASSED \$ci_log_id ;; - *) - sudo dmesg 2>&1 | ci3/cache_log 'dmesg' - log_ci_run FAILED \$ci_log_id - ;; + *) log_ci_run FAILED \$ci_log_id ;; esac exit \$code EOF diff --git a/release-image/bootstrap.sh b/release-image/bootstrap.sh index 4a7caccffc55..d28d867e580d 100755 --- a/release-image/bootstrap.sh +++ b/release-image/bootstrap.sh @@ -5,6 +5,17 @@ cmd=${1:-} hash=$(cache_content_hash ^release-image/Dockerfile ^build-images/src/Dockerfile ^yarn-project/yarn.lock) +function build_image { + set -euo pipefail + cd .. + docker build -f release-image/Dockerfile -t aztecprotocol/aztec:$(git rev-parse HEAD) . + docker tag aztecprotocol/aztec:$(git rev-parse HEAD) aztecprotocol/aztec:latest + + # Remove all but the most recent image. + docker images aztecprotocol/aztec --format "{{.ID}}" | uniq | tail -n +2 | xargs -r docker rmi -f +} +export -f build_image + function build { echo_header "release-image build" @@ -16,8 +27,7 @@ function build { docker load < release-image-base fi - denoise "cd .. && docker build -f release-image/Dockerfile -t aztecprotocol/aztec:$(git rev-parse HEAD) ." - docker tag aztecprotocol/aztec:$(git rev-parse HEAD) aztecprotocol/aztec:latest + denoise "build_image" } case "$cmd" in From 37f9303fa01d35e3a1a661eaffe3173481d81dfd Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Sun, 23 Mar 2025 23:41:26 +0000 Subject: [PATCH 3/7] explore sysdig to capture listens --- bootstrap.sh | 1 + ci3/bootstrap_ec2 | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/bootstrap.sh b/bootstrap.sh index d138a134bd15..fff874647a6d 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -156,6 +156,7 @@ export -f start_txes function test { echo_header "test all" + export NOIR_HASH=$(./noir/bootstrap.sh hash) start_txes diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 101513914f81..8453ada2e29b 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -141,6 +141,7 @@ container_script=$(cat <&1 | ci3/cache_log 'dmesg' + sudo cat /tmp/netfile | ci3/cache_log 'netfile' case \$code in 155) ;; @@ -176,6 +177,11 @@ ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip " sudo cp -r \$HOME/.bb-crs/* /mnt/bb-crs echo Done in \$SECONDS seconds. + echo Installing and starting sysdig... + sudo apt-get update + sudo apt-get install -y sysdig + sudo sysdig -p '%evt.time %proc.cmdline %evt.dir %evt.type %evt.args' 'evt.type=bind or evt.type=listen' > /tmp/netfile & + echo Starting devbox... docker run --privileged ${docker_args:-} \ --name aztec_build \ From 45554fb597f7e459b892d8d1470fc42fe4ee7547 Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Sun, 23 Mar 2025 23:51:00 +0000 Subject: [PATCH 4/7] kill on exit --- ci3/bootstrap_ec2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 8453ada2e29b..f798e4dc5642 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -181,6 +181,8 @@ ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip " sudo apt-get update sudo apt-get install -y sysdig sudo sysdig -p '%evt.time %proc.cmdline %evt.dir %evt.type %evt.args' 'evt.type=bind or evt.type=listen' > /tmp/netfile & + netfile_pid=\$! + trap 'sudo kill \$netfile_pid' EXIT echo Starting devbox... docker run --privileged ${docker_args:-} \ From 1031ff81001b624f777607683efeb63feca9681e Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Mon, 24 Mar 2025 09:37:25 +0000 Subject: [PATCH 5/7] /dev/null installing sysdig. really ensure we only use nm cache on ci machines. --- ci3/bootstrap_ec2 | 9 +++++---- ci3/npm_install_deps | 4 +++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index f798e4dc5642..30c5b3aac42c 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -110,8 +110,9 @@ fi container_script=$(cat </dev/null + sudo apt-get install -y sysdig &>/dev/null sudo sysdig -p '%evt.time %proc.cmdline %evt.dir %evt.type %evt.args' 'evt.type=bind or evt.type=listen' > /tmp/netfile & netfile_pid=\$! trap 'sudo kill \$netfile_pid' EXIT diff --git a/ci3/npm_install_deps b/ci3/npm_install_deps index edcf15bbd2ce..fe99b4c36e7a 100755 --- a/ci3/npm_install_deps +++ b/ci3/npm_install_deps @@ -17,7 +17,9 @@ fi yarn_lock_path=$(realpath yarn.lock --relative-to=$REPO_PATH) package_json_path=$(realpath package.json --relative-to=$REPO_PATH) -if [ "$CI" -eq 1 ]; then +# We only use the node module cache in CI. +# The checking for ci-started makes sure we really *are* on a CI machine (it's created in bootstrap_ec2). +if [ "$CI" -eq 1 ] && [ -f $HOME/ci-started ]; then nm_hash=$(cache_content_hash "^$yarn_lock_path" "^$package_json_path") if ! cache_download node-modules-$nm_hash.zst; then denoise "retry 'yarn install --immutable'" From 9d44167025eccc2db7b5bea04f754dddcf8eaef7 Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Mon, 24 Mar 2025 10:59:29 +0000 Subject: [PATCH 6/7] capture logs --- ci3/bootstrap_ec2 | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 30cd993f301e..7dcc6718f949 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -98,13 +98,13 @@ if [ -t 1 ]; then ssh_args="-t" docker_args+=" -t" if [ "$NO_TERMINATE" -eq 0 ]; then - cmd+=" || exec zsh" + run_cmd="run || exec zsh" else - cmd+="; exec zsh" + run_cmd="run; exec zsh" fi else # LOG_ID can optionally be set externally to be e.g. the GA run id. - cmd="ci3/aws_handle_evict '$cmd' 2>&1 | ci3/add_timestamps | DUP=1 ci3/cache_log 'CI run' \$ci_log_id" + run_cmd="ci3/aws_handle_evict run 2>&1 | ci3/add_timestamps | DUP=1 ci3/cache_log 'CI run' \$ci_log_id" fi container_script=$(cat <&1 | ci3/cache_log 'dmesg' - sudo cat /tmp/netfile | ci3/cache_log 'netfile' + function run { + set +e + set -x + $cmd + local code=\${PIPESTATUS[0]} + sudo dmesg 2>&1 | ci3/cache_log 'dmesg' + sudo cat /tmp/netfile | ci3/cache_log 'netfile' + exit \$code + } + export -f run + + $run_cmd + code=\${PIPESTATUS[0]} case \$code in 155) ;; From 285c803bac7acd44eb58e1f0314b40ac339a4e0e Mon Sep 17 00:00:00 2001 From: Charlie Lye <5764343+charlielye@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:04:05 +0000 Subject: [PATCH 7/7] cpu and mem capture --- ci3/bootstrap_ec2 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 7dcc6718f949..e16f5319b79a 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -144,6 +144,8 @@ container_script=$(cat <&1 | ci3/cache_log 'dmesg' sudo cat /tmp/netfile | ci3/cache_log 'netfile' + sudo cat /tmp/cpufile | ci3/cache_log 'cpufile' + sudo cat /tmp/memfile | ci3/cache_log 'memfile' exit \$code } export -f run @@ -190,7 +192,13 @@ ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip " sudo apt-get install -y sysdig &>/dev/null sudo sysdig -p '%evt.time %proc.cmdline %evt.dir %evt.type %evt.args' 'evt.type=bind or evt.type=listen' > /tmp/netfile & netfile_pid=\$! - trap 'sudo kill \$netfile_pid' EXIT + # Capture cpu load. + mpstat 2 &> /tmp/cpufile & + cpufile_pid=\$! + # Capture mem load. + vmstat -w -S M 2 &> /tmp/memfile & + memfile_pid=\$! + trap 'sudo kill \$netfile_pid \$cpufile_pid \$memfile_pid' EXIT echo Starting devbox... docker run --privileged ${docker_args:-} \