File tree Expand file tree Collapse file tree
examples/mlx_metal_kernel_opt Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -4,9 +4,9 @@ log_level: "INFO"
44
55# LLM configuration for Metal kernel optimization
66llm :
7- primary_model : " gemini-2.5-flash-preview-05-20 "
7+ primary_model : " gemini-2.5-flash"
88 primary_model_weight : 0.6
9- secondary_model : " gemini-2.5-pro-preview-06-05 "
9+ secondary_model : " gemini-2.5-pro"
1010 secondary_model_weight : 0.4
1111 api_base : " https://generativelanguage.googleapis.com/v1beta/openai/"
1212 temperature : 0.6
@@ -78,7 +78,7 @@ prompt:
7878 // CURRENT: Basic kv_head_idx = head_idx / HEADS_PER_KV
7979 // OPTIMIZE: Leverage the specific 2:1 ratio pattern
8080
81- // Example: Process 5 query heads together for each KV head
81+ // Example: Process 2 query heads together for each KV head
8282 // Example: Optimize memory layout for the 16:8 pattern
8383 // Example: Reduce broadcast overhead through clever indexing
8484 ```
@@ -181,7 +181,7 @@ prompt:
181181 **Strategy 4: GQA Pattern Exploitation**
182182 ```metal
183183 // Optimize for the specific 2:1 query:KV ratio
184- // Process query heads in groups of 5
184+ // Process query heads in groups of 2
185185 // Reduce KV head indexing overhead
186186 ```
187187
Original file line number Diff line number Diff line change 5050# Force unbuffered Python output for reliable logging
5151export PYTHONUNBUFFERED=1
5252
53- export OPENAI_API_KEY=$GEMINI_API_KEY
54-
5553SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
5654
5755RUN_NAME=" "
You can’t perform that action at this time.
0 commit comments