fix(mlx_metal_kernel_opt): stabilize run script and config

lanmogu98 · lanmogu98 · commit 5055a6821af1 · 2026-01-08T14:23:52.000+08:00
- Fix bash -u background run bug (stdbuf/nohup handling)

- Avoid clobbering OPENAI_API_KEY from GEMINI_API_KEY

- Use non-preview Gemini model names

- Place cascade_evaluation under evaluator and fix 2:1 GQA prompt
diff --git a/examples/mlx_metal_kernel_opt/config.yaml b/examples/mlx_metal_kernel_opt/config.yaml
@@ -4,9 +4,9 @@ log_level: "INFO"
 
 # LLM configuration for Metal kernel optimization
 llm:
-  primary_model: "gemini-2.5-flash-preview-05-20"
+  primary_model: "gemini-2.5-flash"
   primary_model_weight: 0.6
-  secondary_model: "gemini-2.5-pro-preview-06-05"
+  secondary_model: "gemini-2.5-pro"
   secondary_model_weight: 0.4
   api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
   temperature: 0.6
@@ -78,7 +78,7 @@ prompt:
     // CURRENT: Basic kv_head_idx = head_idx / HEADS_PER_KV
     // OPTIMIZE: Leverage the specific 2:1 ratio pattern
     
-    // Example: Process 5 query heads together for each KV head
+    // Example: Process 2 query heads together for each KV head
     // Example: Optimize memory layout for the 16:8 pattern
     // Example: Reduce broadcast overhead through clever indexing
     ```
@@ -181,7 +181,7 @@ prompt:
     **Strategy 4: GQA Pattern Exploitation**
     ```metal
     // Optimize for the specific 2:1 query:KV ratio
-    // Process query heads in groups of 5
+    // Process query heads in groups of 2
     // Reduce KV head indexing overhead
     ```
     
@@ -226,6 +226,8 @@ database:
 evaluator:
   timeout: 900  # 15 minutes for Metal kernel compilation and testing
   parallel_evaluations: 1
+  # This example's evaluator does not implement evaluate_stage1.
+  cascade_evaluation: false
 
 # Evolution settings
 diff_based_evolution: true
diff --git a/examples/mlx_metal_kernel_opt/run_evolve_experiment.sh b/examples/mlx_metal_kernel_opt/run_evolve_experiment.sh
@@ -50,8 +50,6 @@ USAGE
 # Force unbuffered Python output for reliable logging
 export PYTHONUNBUFFERED=1
 
-export OPENAI_API_KEY=$GEMINI_API_KEY
-
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 RUN_NAME=""
@@ -209,20 +207,20 @@ LOG_FILE="$RUN_DIR/run.log"
 # Truncate log file to ensure clean start (especially important for --resume)
 : > "$LOG_FILE"
 
-# Check if stdbuf is available for line-buffered output
-if command -v stdbuf &>/dev/null; then
-  # Use stdbuf to force line buffering on both stdout and stderr
-  STDBUF_PREFIX=(stdbuf -oL -eL)
-else
-  STDBUF_PREFIX=()
-fi
-
 if [[ "$FOREGROUND" -eq 1 ]]; then
   # Stream to console and persist logs with line buffering.
-  "${STDBUF_PREFIX[@]}" "${CMD[@]}" 2>&1 | tee "$LOG_FILE"
+  if command -v stdbuf &>/dev/null; then
+    stdbuf -oL -eL "${CMD[@]}" 2>&1 | tee "$LOG_FILE"
+  else
+    "${CMD[@]}" 2>&1 | tee "$LOG_FILE"
+  fi
 else
   # Run in background with line-buffered output for reliable log ordering.
-  nohup "${STDBUF_PREFIX[@]}" "${CMD[@]}" > "$LOG_FILE" 2>&1 &
+  if command -v stdbuf &>/dev/null; then
+    nohup stdbuf -oL -eL "${CMD[@]}" > "$LOG_FILE" 2>&1 &
+  else
+    nohup "${CMD[@]}" > "$LOG_FILE" 2>&1 &
+  fi
   echo "[run_evolve_experiment] Started PID: $!"
   echo "[run_evolve_experiment] Log: $LOG_FILE"
   echo "[run_evolve_experiment] Tail: tail -f \"$LOG_FILE\""