-
Notifications
You must be signed in to change notification settings - Fork 46
Expand file tree
/
Copy pathgithub-azure-pipeline.yml
More file actions
275 lines (233 loc) · 10.6 KB
/
github-azure-pipeline.yml
File metadata and controls
275 lines (233 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# Azure Pipeline: Sync Microsoft Learn Docs to Agent Skills
#
# This pipeline:
# 1. Checks out GitHub repo (contains products/, skills/)
# 2. Checks out ADO repo (contains src/, config/)
# 3. Runs docs2skills to crawl and classify docs
# 4. Pushes results back to GitHub repo
trigger: none
parameters:
- name: fullSync
displayName: 'Full Sync (use --full flag)'
type: boolean
default: false
- name: targetBranch
displayName: 'Target GitHub Branch'
type: string
default: 'main'
- name: docs2SkillsADOBranch
displayName: 'Docs2Skills ADO Branch'
type: string
default: 'main'
schedules:
- cron: "0 2 * * 0" # Every Sunday at 2:00 AM UTC
displayName: Run every Sunday
branches:
include:
- main
always: true
variables:
- group: Docs2Skills-Defaults
- name: pythonVersion
value: '3.12'
stages:
- stage: __default
jobs:
- job: Job
pool:
vmImage: ubuntu-latest
timeoutInMinutes: 240
steps:
# 0. Check required variables
- task: Bash@3
displayName: 'Check required variables'
inputs:
targetType: inline
script: |
set -e
missing_vars=""
if [ -z "$(azureOpenAIDeployment)" ]; then
echo "ERROR: azureOpenAIDeployment is not set or empty."
missing_vars="1"
fi
if [ -z "$(azureOpenAIEndpoint)" ]; then
echo "ERROR: azureOpenAIEndpoint is not set or empty."
missing_vars="1"
fi
# 1. Checkout GitHub repo (self - contains products/, skills/)
- task: Checkout@1
displayName: 'Checkout GitHub repo'
inputs:
repository: self
persistCredentials: true
# 2. Checkout ADO repo (source code)
- task: Checkout@1
displayName: 'Checkout ADO repo (code)'
inputs:
repository: git://Engineering/Learn.Docs2AgentSkills@refs/heads/${{ parameters.docs2SkillsADOBranch }}
# 3. Check if current branch is behind target branch
- task: Bash@3
displayName: 'Check behind target branch'
inputs:
targetType: inline
script: |
set -e
GITHUB_REPO=$(basename $(Build.Repository.Uri) .git)
cd $(Build.SourcesDirectory)/$GITHUB_REPO
TARGET_BRANCH="${{ parameters.targetBranch }}"
echo "Fetching target branch: $TARGET_BRANCH"
git fetch origin $TARGET_BRANCH || true
if ! git show-ref --verify --quiet refs/remotes/origin/$TARGET_BRANCH; then
echo "Target branch origin/$TARGET_BRANCH does not exist yet. Skipping behind/ahead check."
exit 0
fi
BEHIND_COUNT=$(git rev-list --count HEAD..origin/$TARGET_BRANCH)
AHEAD_COUNT=$(git rev-list --count origin/$TARGET_BRANCH..HEAD)
echo "Branch status vs origin/$TARGET_BRANCH: ahead=$AHEAD_COUNT, behind=$BEHIND_COUNT"
if [ "$BEHIND_COUNT" -gt 0 ]; then
echo "ERROR: Current branch is behind origin/$TARGET_BRANCH by $BEHIND_COUNT commit(s)."
echo "Please rebase/merge latest target branch before running sync."
exit 1
fi
# 4. Setup Python
- task: UsePythonVersion@0
displayName: 'Setup Python $(pythonVersion)'
inputs:
versionSpec: '$(pythonVersion)'
# 5. Install dependencies
- task: Bash@3
displayName: 'Install dependencies'
inputs:
targetType: inline
script: |
set -e
cd $(Build.SourcesDirectory)/Learn.Docs2AgentSkills
pip install --upgrade pip
pip install -r requirements.txt
# 6. Run docs2skills with Azure identity
- task: AzureCLI@2
displayName: 'Run docs2skills'
inputs:
azureSubscription: $(azureSubscription)
scriptType: bash
scriptLocation: inlineScript
inlineScript: |
set -e
# Define paths
ADO_REPO="$(Build.SourcesDirectory)/Learn.Docs2AgentSkills"
GITHUB_REPO="$(Build.SourcesDirectory)/$(basename $(Build.Repository.Uri) .git)"
PRODUCTS_DIR="$GITHUB_REPO/products"
SKILLS_DIR="$GITHUB_REPO/skills"
cd $ADO_REPO
echo "========================================"
echo "Starting docs sync"
echo "========================================"
echo "ADO repo (code): $ADO_REPO"
echo "GitHub repo (data): $GITHUB_REPO"
echo "Products dir: $PRODUCTS_DIR"
echo "Skills dir: $SKILLS_DIR"
echo "Azure OpenAI: $(azureOpenAIEndpoint)"
echo "Deployment: $(azureOpenAIDeployment)"
echo "Full Sync: ${{ parameters.fullSync }}"
echo "Target Branch: ${{ parameters.targetBranch }}"
echo "========================================"
FULL_FLAG=""
if [ "${{ parameters.fullSync }}" = "True" ]; then
FULL_FLAG="--full"
echo "Running with --full flag"
# check before delete
if [ -n "$PRODUCTS_DIR" ] && [ -d "$PRODUCTS_DIR" ]; then
rm -rf "$PRODUCTS_DIR"
mkdir -p "$PRODUCTS_DIR"
fi
if [ -n "$SKILLS_DIR" ] && [ -d "$SKILLS_DIR" ]; then
rm -rf "$SKILLS_DIR"
mkdir -p "$SKILLS_DIR"
fi
fi
echo "========================================"
echo "Processing Well-Architected Framework"
echo "========================================"
python -m src.docs2skills \
--config-type waf \
--products-dir $PRODUCTS_DIR \
--skills-dir $SKILLS_DIR
echo "========================================"
echo "Processing Cloud Adoption Framework"
echo "========================================"
python -m src.docs2skills \
--config-type caf \
--products-dir $PRODUCTS_DIR \
--skills-dir $SKILLS_DIR
echo "========================================"
echo "Processing Azure Architecture Center"
echo "========================================"
python -m src.docs2skills \
--config-type aac \
--products-dir $PRODUCTS_DIR \
--skills-dir $SKILLS_DIR
echo "========================================"
echo "Processing Azure Other Products"
echo "========================================"
python -m src.docs2skills \
--products-dir $PRODUCTS_DIR \
--skills-dir $SKILLS_DIR
echo "========================================"
echo "Sync completed"
echo "========================================"
env:
AZURE_OPENAI_ENDPOINT: $(azureOpenAIEndpoint)
AZURE_OPENAI_CHAT_DEPLOYMENT: $(azureOpenAIDeployment)
# # 6.5. [TEST ONLY] Make a dummy change to trigger PR
# - task: Bash@3
# displayName: '[TEST] Make dummy change'
# inputs:
# targetType: inline
# script: |
# set -e
# GITHUB_REPO=$(basename $(Build.Repository.Uri) .git)
# cd $(Build.SourcesDirectory)/$GITHUB_REPO
# echo "test-$(date +%s)" >> products/test-dummy.txt
# 7. Push to GitHub repo and create PR
- task: Bash@3
displayName: 'Push to GitHub and create PR'
inputs:
targetType: inline
script: |
set -e
GITHUB_REPO=$(basename $(Build.Repository.Uri) .git)
cd $(Build.SourcesDirectory)/$GITHUB_REPO
git config user.email "docs2skills-pipeline@microsoft.com"
git config user.name "Docs2Skills Pipeline"
CHANGED_FILES=$(git status --porcelain products/ skills/ | wc -l)
if [ "$CHANGED_FILES" -gt 0 ]; then
echo "Changed files: $CHANGED_FILES"
git status --short products/ skills/
git add products/ skills/
TIMESTAMP=$(date +"%Y-%m-%d %H:%M")
git commit -m "$TIMESTAMP - Generate skills from docs (code: ADO/${{ parameters.docs2SkillsADOBranch }}, target: GitHub/${{ parameters.targetBranch }})"
TARGET_BRANCH="${{ parameters.targetBranch }}"
TIMESTAMP_BRANCH=$(date +"%Y%m%d-%H%M%S")
PR_BRANCH="docs-sync-$TIMESTAMP_BRANCH"
# Push to PR branch
echo "Pushing changes to PR branch: $PR_BRANCH"
git push origin HEAD:refs/heads/$PR_BRANCH
# Create PR targeting the target branch
echo "Creating PR from $PR_BRANCH to $TARGET_BRANCH"
REPO_URL=$(Build.Repository.Uri)
# Extract owner/repo from URL
REPO_PATH=$(echo $REPO_URL | sed 's|.*github.com/||' | sed 's|.git||')
# Reuse the token already stored by persistCredentials: true
EXTRAHEADER_KEY="http.$(Build.Repository.Uri).extraheader"
export GH_TOKEN=$(git config --get "$EXTRAHEADER_KEY" | sed 's/.*: basic //' | base64 -d | sed 's/.*://')
[ -n "$GH_TOKEN" ] || { echo "ERROR: GH_TOKEN is empty"; exit 1; }
gh pr create \
--title "Sync docs to agent skills - $TIMESTAMP" \
--body "Automated docs sync" \
--base "$TARGET_BRANCH" \
--head "$PR_BRANCH" \
--repo "$REPO_PATH"
echo "PR created successfully"
else
echo "No changes detected in products/ or skills/ directories"
fi