Merge branch 'main' into feat/feishu-doc-tool
This commit is contained in:
commit
5cac79cbbe
11
.github/workflows/ci-run.yml
vendored
11
.github/workflows/ci-run.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
name: Lint Gate (Format + Clippy + Strict Delta)
|
||||
needs: [changes]
|
||||
if: needs.changes.outputs.rust_changed == 'true'
|
||||
runs-on: [self-hosted, aws-india]
|
||||
runs-on: [self-hosted, aws-india, Linux]
|
||||
timeout-minutes: 40
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
@ -74,7 +74,7 @@ jobs:
|
||||
name: Test
|
||||
needs: [changes]
|
||||
if: needs.changes.outputs.rust_changed == 'true'
|
||||
runs-on: [self-hosted, aws-india]
|
||||
runs-on: [self-hosted, aws-india, Linux]
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
@ -137,7 +137,7 @@ jobs:
|
||||
name: Build (Smoke)
|
||||
needs: [changes]
|
||||
if: needs.changes.outputs.rust_changed == 'true'
|
||||
runs-on: [self-hosted, aws-india]
|
||||
runs-on: [self-hosted, aws-india, Linux]
|
||||
timeout-minutes: 35
|
||||
|
||||
steps:
|
||||
@ -150,7 +150,10 @@ jobs:
|
||||
prefix-key: ci-run-build
|
||||
cache-targets: true
|
||||
- name: Build binary (smoke check)
|
||||
run: cargo build --profile release-fast --locked --verbose
|
||||
env:
|
||||
CARGO_BUILD_JOBS: 2
|
||||
CI_SMOKE_BUILD_ATTEMPTS: 3
|
||||
run: bash scripts/ci/smoke_build_retry.sh
|
||||
- name: Check binary size
|
||||
run: bash scripts/ci/check_binary_size.sh target/release-fast/zeroclaw
|
||||
|
||||
|
||||
@ -83,6 +83,20 @@ Safety behavior:
|
||||
4. Drain runners, then apply cleanup.
|
||||
5. Re-run health report and confirm queue/availability recovery.
|
||||
|
||||
## 3.1) Build Smoke Exit `143` Triage
|
||||
|
||||
When `CI Run / Build (Smoke)` fails with `Process completed with exit code 143`:
|
||||
|
||||
1. Treat it as external termination (SIGTERM), not a compile error.
|
||||
2. Confirm the build step ended with `Terminated` and no Rust compiler diagnostic was emitted.
|
||||
3. Check current pool pressure (`runner_health_report.py`) before retrying.
|
||||
4. Re-run once after pressure drops; persistent `143` should be handled as runner-capacity remediation.
|
||||
|
||||
Important:
|
||||
|
||||
- `error: cannot install while Rust is installed` from rustup bootstrap can appear in setup logs on pre-provisioned runners.
|
||||
- That message is not itself a terminal failure when subsequent `rustup toolchain install` and `rustup default` succeed.
|
||||
|
||||
## 4) Queue Hygiene (Dry-Run First)
|
||||
|
||||
Dry-run example:
|
||||
|
||||
53
scripts/ci/smoke_build_retry.sh
Normal file
53
scripts/ci/smoke_build_retry.sh
Normal file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
attempts="${CI_SMOKE_BUILD_ATTEMPTS:-3}"
|
||||
|
||||
if ! [[ "$attempts" =~ ^[0-9]+$ ]] || [ "$attempts" -lt 1 ]; then
|
||||
echo "::error::CI_SMOKE_BUILD_ATTEMPTS must be a positive integer (got: ${attempts})" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
IFS=',' read -r -a retryable_codes <<< "${CI_SMOKE_RETRY_CODES:-143,137}"
|
||||
|
||||
is_retryable_code() {
|
||||
local code="$1"
|
||||
local candidate=""
|
||||
for candidate in "${retryable_codes[@]}"; do
|
||||
candidate="${candidate//[[:space:]]/}"
|
||||
if [ "$candidate" = "$code" ]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
build_cmd=(cargo build --package zeroclaw --bin zeroclaw --profile release-fast --locked)
|
||||
|
||||
attempt=1
|
||||
while [ "$attempt" -le "$attempts" ]; do
|
||||
echo "::group::Smoke build attempt ${attempt}/${attempts}"
|
||||
echo "Running: ${build_cmd[*]}"
|
||||
set +e
|
||||
"${build_cmd[@]}"
|
||||
code=$?
|
||||
set -e
|
||||
echo "::endgroup::"
|
||||
|
||||
if [ "$code" -eq 0 ]; then
|
||||
echo "Smoke build succeeded on attempt ${attempt}/${attempts}."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$attempt" -ge "$attempts" ] || ! is_retryable_code "$code"; then
|
||||
echo "::error::Smoke build failed with exit code ${code} on attempt ${attempt}/${attempts}."
|
||||
exit "$code"
|
||||
fi
|
||||
|
||||
echo "::warning::Smoke build exited with ${code} (transient runner interruption suspected). Retrying..."
|
||||
sleep 10
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
echo "::error::Smoke build did not complete successfully."
|
||||
exit 1
|
||||
@ -7,6 +7,7 @@ import contextlib
|
||||
import hashlib
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import socket
|
||||
import socketserver
|
||||
@ -409,6 +410,79 @@ class CiScriptsBehaviorTest(unittest.TestCase):
|
||||
report = json.loads(out_json.read_text(encoding="utf-8"))
|
||||
self.assertEqual(report["classification"], "persistent_failure")
|
||||
|
||||
def test_smoke_build_retry_retries_transient_143_once(self) -> None:
|
||||
fake_bin = self.tmp / "fake-bin"
|
||||
fake_bin.mkdir(parents=True, exist_ok=True)
|
||||
counter = self.tmp / "cargo-counter.txt"
|
||||
|
||||
fake_cargo = fake_bin / "cargo"
|
||||
fake_cargo.write_text(
|
||||
textwrap.dedent(
|
||||
"""\
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
counter="${FAKE_CARGO_COUNTER:?}"
|
||||
attempts=0
|
||||
if [ -f "$counter" ]; then
|
||||
attempts="$(cat "$counter")"
|
||||
fi
|
||||
attempts="$((attempts + 1))"
|
||||
printf '%s' "$attempts" > "$counter"
|
||||
if [ "$attempts" -eq 1 ]; then
|
||||
exit 143
|
||||
fi
|
||||
exit 0
|
||||
"""
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
fake_cargo.chmod(0o755)
|
||||
|
||||
env = dict(os.environ)
|
||||
env["PATH"] = f"{fake_bin}:{env.get('PATH', '')}"
|
||||
env["FAKE_CARGO_COUNTER"] = str(counter)
|
||||
env["CI_SMOKE_BUILD_ATTEMPTS"] = "2"
|
||||
|
||||
proc = run_cmd(["bash", self._script("smoke_build_retry.sh")], env=env, cwd=ROOT)
|
||||
self.assertEqual(proc.returncode, 0, msg=proc.stderr)
|
||||
self.assertEqual(counter.read_text(encoding="utf-8"), "2")
|
||||
self.assertIn("Retrying", proc.stdout)
|
||||
|
||||
def test_smoke_build_retry_fails_immediately_on_non_retryable_code(self) -> None:
|
||||
fake_bin = self.tmp / "fake-bin"
|
||||
fake_bin.mkdir(parents=True, exist_ok=True)
|
||||
counter = self.tmp / "cargo-counter.txt"
|
||||
|
||||
fake_cargo = fake_bin / "cargo"
|
||||
fake_cargo.write_text(
|
||||
textwrap.dedent(
|
||||
"""\
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
counter="${FAKE_CARGO_COUNTER:?}"
|
||||
attempts=0
|
||||
if [ -f "$counter" ]; then
|
||||
attempts="$(cat "$counter")"
|
||||
fi
|
||||
attempts="$((attempts + 1))"
|
||||
printf '%s' "$attempts" > "$counter"
|
||||
exit 101
|
||||
"""
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
fake_cargo.chmod(0o755)
|
||||
|
||||
env = dict(os.environ)
|
||||
env["PATH"] = f"{fake_bin}:{env.get('PATH', '')}"
|
||||
env["FAKE_CARGO_COUNTER"] = str(counter)
|
||||
env["CI_SMOKE_BUILD_ATTEMPTS"] = "3"
|
||||
|
||||
proc = run_cmd(["bash", self._script("smoke_build_retry.sh")], env=env, cwd=ROOT)
|
||||
self.assertEqual(proc.returncode, 101)
|
||||
self.assertEqual(counter.read_text(encoding="utf-8"), "1")
|
||||
self.assertIn("failed with exit code 101", proc.stdout)
|
||||
|
||||
def test_deny_policy_guard_detects_invalid_entries(self) -> None:
|
||||
deny_path = self.tmp / "deny.toml"
|
||||
deny_path.write_text(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user