|
| 1 | +# -------------------------------------------------------------------- |
| 2 | +# |
| 3 | +# Licensed to the Apache Software Foundation (ASF) under one or more |
| 4 | +# contributor license agreements. See the NOTICE file distributed |
| 5 | +# with this work for additional information regarding copyright |
| 6 | +# ownership. The ASF licenses this file to You under the Apache |
| 7 | +# License, Version 2.0 (the "License"); you may not use this file |
| 8 | +# except in compliance with the License. You may obtain a copy of the |
| 9 | +# License at |
| 10 | +# |
| 11 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +# |
| 13 | +# Unless required by applicable law or agreed to in writing, software |
| 14 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 15 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| 16 | +# implied. See the License for the specific language governing |
| 17 | +# permissions and limitations under the License. |
| 18 | +# |
| 19 | +# -------------------------------------------------------------------- |
| 20 | +# Apache Cloudberry (Incubating) Compliance Workflow |
| 21 | +# |
| 22 | +# Comprehensive compliance checks for Apache Cloudberry: |
| 23 | +# 1. Apache RAT license header validation |
| 24 | +# 2. Copyright year verification (NOTICE and psql help.c) |
| 25 | +# 3. Binary file presence detection with approved allowlist |
| 26 | +# |
| 27 | +# Based on Apache Rat tool, run locally with: |
| 28 | +# `mvn clean verify -Drat.consoleOutput=true` |
| 29 | +# -------------------------------------------------------------------- |
| 30 | + |
| 31 | +name: Apache Rat License Check |
| 32 | + |
| 33 | +on: |
| 34 | + push: |
| 35 | + branches: [main, cbdb-postgres-merge] |
| 36 | + pull_request: |
| 37 | + branches: [main, cbdb-postgres-merge] |
| 38 | + types: [opened, synchronize, reopened, edited] |
| 39 | + workflow_dispatch: |
| 40 | + |
| 41 | +permissions: |
| 42 | + contents: read |
| 43 | + |
| 44 | +concurrency: |
| 45 | + group: ${{ github.workflow }}-${{ github.ref }} |
| 46 | + cancel-in-progress: true |
| 47 | + |
| 48 | +jobs: |
| 49 | + rat-check: |
| 50 | + runs-on: ubuntu-latest |
| 51 | + timeout-minutes: 10 |
| 52 | + |
| 53 | + steps: |
| 54 | + - name: Check out repository |
| 55 | + uses: actions/checkout@v4 |
| 56 | + with: |
| 57 | + fetch-depth: 1 |
| 58 | + |
| 59 | + - name: Set up Java and Maven |
| 60 | + uses: actions/setup-java@v3 |
| 61 | + with: |
| 62 | + distribution: 'temurin' |
| 63 | + java-version: '11' |
| 64 | + cache: maven |
| 65 | + |
| 66 | + - name: Run Apache Rat check |
| 67 | + run: | |
| 68 | + echo "Running Apache Rat license check..." |
| 69 | + mvn clean verify -Drat.consoleOutput=true | tee rat-output.log |
| 70 | + |
| 71 | + # Check for build failure |
| 72 | + if grep -q "\[INFO\] BUILD FAILURE" rat-output.log; then |
| 73 | + echo "::error::Apache Rat check failed - build failure detected" |
| 74 | + echo "RAT_CHECK=fail" >> $GITHUB_ENV |
| 75 | + else |
| 76 | + echo "RAT_CHECK=pass" >> $GITHUB_ENV |
| 77 | + echo "Apache Rat check passed successfully" |
| 78 | + fi |
| 79 | +
|
| 80 | + - name: Check copyright years are up-to-date |
| 81 | + run: | |
| 82 | + echo "Checking copyright years..." |
| 83 | + current_year=$(date -u +"%Y") |
| 84 | + echo "CURRENT_YEAR=$current_year" >> $GITHUB_ENV |
| 85 | +
|
| 86 | + # Initialize to pass, will be updated if checks fail |
| 87 | + echo "NOTICE_CHECK=pass" >> $GITHUB_ENV |
| 88 | + echo "PSQL_HELP_CHECK=pass" >> $GITHUB_ENV |
| 89 | +
|
| 90 | + # Check NOTICE file |
| 91 | + echo "Checking NOTICE file..." |
| 92 | + if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" NOTICE; then |
| 93 | + echo "::error::NOTICE file does not contain the current year ($current_year)" |
| 94 | + echo "NOTICE_CHECK=fail" >> $GITHUB_ENV |
| 95 | + else |
| 96 | + echo "PASS: NOTICE file contains the current year ($current_year)" |
| 97 | + fi |
| 98 | + |
| 99 | + # Check psql help.c file |
| 100 | + echo "Checking src/bin/psql/help.c..." |
| 101 | + if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" src/bin/psql/help.c; then |
| 102 | + echo "::error::src/bin/psql/help.c does not contain the current year ($current_year)" |
| 103 | + echo "PSQL_HELP_CHECK=fail" >> $GITHUB_ENV |
| 104 | + else |
| 105 | + echo "PASS: src/bin/psql/help.c contains the current year ($current_year)" |
| 106 | + fi |
| 107 | + |
| 108 | + # Continue execution even if checks fail |
| 109 | + if [ "$NOTICE_CHECK" = "pass" ] && [ "$PSQL_HELP_CHECK" = "pass" ]; then |
| 110 | + echo "All copyright year checks passed" |
| 111 | + else |
| 112 | + echo "Copyright year checks completed with errors" |
| 113 | + fi |
| 114 | +
|
| 115 | + - name: Check for binary files |
| 116 | + run: | |
| 117 | + echo "Checking for binary files..." |
| 118 | + echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so, gz, bz2" |
| 119 | + echo "----------------------------------------------------------------------" |
| 120 | + |
| 121 | + # Binary file allowlist, see README.apache.md |
| 122 | + ALLOWLIST=( |
| 123 | + "contrib/formatter_fixedwidth/data/fixedwidth_small_correct.tbl.gz" |
| 124 | + "gpMgmt/demo/gppkg/sample-sources.tar.gz" |
| 125 | + "src/bin/gpfdist/regress/data/exttab1/nation.tbl.gz" |
| 126 | + "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk.tbl.gz" |
| 127 | + "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk_2.tbl.gz" |
| 128 | + "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.bz2" |
| 129 | + "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.gz" |
| 130 | + ) |
| 131 | + |
| 132 | + # Check for specific binary file extensions |
| 133 | + binary_extensions="class jar tar tgz zip exe dll so gz bz2" |
| 134 | + echo "BINARY_EXTENSIONS=${binary_extensions}" >> $GITHUB_ENV |
| 135 | + binary_results="" |
| 136 | + binaryfiles_found=false |
| 137 | + |
| 138 | + for extension in ${binary_extensions}; do |
| 139 | + printf "Checking *.%-4s files..." "${extension}" |
| 140 | + found=$(find . -name "*.${extension}" -type f || true) |
| 141 | + |
| 142 | + # Filter out allowed files |
| 143 | + if [ -n "$found" ]; then |
| 144 | + filtered_found="" |
| 145 | + while IFS= read -r file; do |
| 146 | + is_allowed=false |
| 147 | + for allowlist_file in "${ALLOWLIST[@]}"; do |
| 148 | + if [ "$file" = "./$allowlist_file" ]; then |
| 149 | + is_allowed=true |
| 150 | + echo "Allowed: $file" >> binary_allowlist.txt |
| 151 | + break |
| 152 | + fi |
| 153 | + done |
| 154 | + if [ "$is_allowed" = false ]; then |
| 155 | + filtered_found+="$file"$'\n' |
| 156 | + fi |
| 157 | + done <<< "$found" |
| 158 | + |
| 159 | + filtered_found=$(echo "$filtered_found" | sed '/^$/d') |
| 160 | + |
| 161 | + if [ -n "$filtered_found" ]; then |
| 162 | + echo "FOUND" |
| 163 | + echo "::error::${extension} files should not exist" |
| 164 | + echo "For ASF compatibility: the source tree should not contain" |
| 165 | + echo "binary files as users have a hard time verifying their contents." |
| 166 | + echo "Found files:" |
| 167 | + echo "$filtered_found" | sed 's/^/ /' |
| 168 | + echo "${extension}:${filtered_found}" >> binary_results.txt |
| 169 | + binaryfiles_found=true |
| 170 | + else |
| 171 | + echo "NONE (all allowed)" |
| 172 | + echo "${extension}:none" >> binary_results.txt |
| 173 | + fi |
| 174 | + else |
| 175 | + echo "NONE" |
| 176 | + echo "${extension}:none" >> binary_results.txt |
| 177 | + fi |
| 178 | + done |
| 179 | + |
| 180 | + echo "----------------------------------------------------------------------" |
| 181 | + if [ "$binaryfiles_found" = true ]; then |
| 182 | + echo "ERROR: Non-allowed binary files were found in the source tree" |
| 183 | + echo "BINARY_CHECK=fail" >> $GITHUB_ENV |
| 184 | + else |
| 185 | + echo "PASS: No non-allowed binary files found" |
| 186 | + echo "BINARY_CHECK=pass" >> $GITHUB_ENV |
| 187 | + fi |
| 188 | + |
| 189 | + # Show allowlist summary if any allowed files were found |
| 190 | + if [ -f binary_allowlist.txt ]; then |
| 191 | + echo "" |
| 192 | + echo "Allowed binary files (approved):" |
| 193 | + cat binary_allowlist.txt | sed 's/^/ /' |
| 194 | + fi |
| 195 | +
|
| 196 | + - name: Upload Rat check results |
| 197 | + if: always() |
| 198 | + uses: actions/upload-artifact@v4 |
| 199 | + with: |
| 200 | + name: rat-check-results |
| 201 | + path: rat-output.log |
| 202 | + retention-days: 7 |
| 203 | + |
| 204 | + - name: Generate Job Summary |
| 205 | + if: always() |
| 206 | + run: | |
| 207 | + { |
| 208 | + echo "## Apache Cloudberry Compliance Audit Results" |
| 209 | + echo "- Run Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" |
| 210 | + echo "" |
| 211 | + |
| 212 | + # Copyright Year Check Summary |
| 213 | + echo "### Copyright Year Checks" |
| 214 | + echo "**NOTICE file:**" |
| 215 | + if [ "$NOTICE_CHECK" = "pass" ]; then |
| 216 | + echo "PASS: Contains current year ($CURRENT_YEAR)" |
| 217 | + else |
| 218 | + echo "ERROR: Does not contain current year ($CURRENT_YEAR)" |
| 219 | + fi |
| 220 | + echo "" |
| 221 | + echo "**psql help.c:**" |
| 222 | + if [ "$PSQL_HELP_CHECK" = "pass" ]; then |
| 223 | + echo "PASS: Contains current year ($CURRENT_YEAR)" |
| 224 | + else |
| 225 | + echo "ERROR: Does not contain current year ($CURRENT_YEAR)" |
| 226 | + fi |
| 227 | + echo "" |
| 228 | +
|
| 229 | + # Binary Files Check Summary |
| 230 | + echo "### Binary Files Check" |
| 231 | + echo "Checked extensions: \`${BINARY_EXTENSIONS}\`" |
| 232 | + echo "" |
| 233 | + echo "Results:" |
| 234 | + echo "\`\`\`" |
| 235 | + if [ -f binary_results.txt ]; then |
| 236 | + while IFS=: read -r ext files; do |
| 237 | + if [ "$files" = "none" ]; then |
| 238 | + echo "PASS: No .${ext} files found" |
| 239 | + else |
| 240 | + echo "ERROR: Found .${ext} files:" |
| 241 | + echo "$files" | sed 's/^/ /' |
| 242 | + fi |
| 243 | + done < binary_results.txt |
| 244 | + fi |
| 245 | + echo "\`\`\`" |
| 246 | + echo "" |
| 247 | + |
| 248 | + # Allowlist summary |
| 249 | + if [ -f binary_allowlist.txt ]; then |
| 250 | + echo "### Allowed Binary Files" |
| 251 | + echo "The following binary files are approved for testing purposes:" |
| 252 | + echo "You can see [README.apache.md](https://github.com/apache/cloudberry/blob/main/README.apache.md) for details." |
| 253 | + echo "\`\`\`" |
| 254 | + cat binary_allowlist.txt | sed 's/Allowed: //' |
| 255 | + echo "\`\`\`" |
| 256 | + echo "" |
| 257 | + fi |
| 258 | +
|
| 259 | + # Rat check summary |
| 260 | + if [[ -f rat-output.log ]]; then |
| 261 | + # First extract and display summary statistics (only once) |
| 262 | + if grep -q "Rat check: Summary over all files" rat-output.log; then |
| 263 | + echo "### License Header Check" |
| 264 | + summary_line=$(grep "Rat check: Summary over all files" rat-output.log) |
| 265 | + echo "\`\`\`" |
| 266 | + echo "$summary_line" |
| 267 | + echo "\`\`\`" |
| 268 | + echo "" |
| 269 | + fi |
| 270 | +
|
| 271 | + # Then determine the result status |
| 272 | + if [ "$RAT_CHECK" = "fail" ]; then |
| 273 | + echo "#### Check Failed - License Compliance Issues Detected" |
| 274 | + echo "" |
| 275 | +
|
| 276 | + # Extract and display files with unapproved licenses |
| 277 | + if grep -q "Files with unapproved licenses:" rat-output.log; then |
| 278 | + echo "##### Files with Unapproved Licenses" |
| 279 | + echo "\`\`\`" |
| 280 | + # Get the line with "Files with unapproved licenses:" and all following lines until the dashed line |
| 281 | + sed -n '/Files with unapproved licenses:/,/\[INFO\] ------------------------------------------------------------------------/p' rat-output.log | \ |
| 282 | + grep -v "\[INFO\] ------------------------------------------------------------------------" | \ |
| 283 | + grep -v "^$" | \ |
| 284 | + head -20 |
| 285 | + echo "\`\`\`" |
| 286 | + echo "" |
| 287 | + fi |
| 288 | +
|
| 289 | + echo "**How to fix:**" |
| 290 | + echo "" |
| 291 | + echo "**For new original files you created:**" |
| 292 | + echo "- Add the standard Apache License header to each file" |
| 293 | + echo "" |
| 294 | + echo "**For third-party files with different licenses:**" |
| 295 | + echo "- Add the file to exclusion list in \`pom.xml\` under the rat-maven-plugin configuration" |
| 296 | + echo "- Ensure the license is compatible with Apache License 2.0" |
| 297 | + echo "- Avoid introducing components with incompatible licenses" |
| 298 | + echo "" |
| 299 | + echo "**Need help?**" |
| 300 | + echo "- Run \`mvn clean verify -Drat.consoleOutput=true\` locally for the full report" |
| 301 | + echo "- Email dev@cloudberry.apache.org if you have questions about license compatibility" |
| 302 | +
|
| 303 | + elif [ "$RAT_CHECK" = "pass" ]; then |
| 304 | + echo "#### Check Passed - All Files Comply with Apache License Requirements" |
| 305 | + fi |
| 306 | + fi |
| 307 | + } >> "$GITHUB_STEP_SUMMARY" |
| 308 | +
|
| 309 | + - name: Report Status |
| 310 | + if: always() |
| 311 | + shell: bash {0} |
| 312 | + run: | |
| 313 | + # Check overall status of all checks |
| 314 | + overall_status=0 |
| 315 | + |
| 316 | + # Check Apache RAT status |
| 317 | + if [ "$RAT_CHECK" = "fail" ]; then |
| 318 | + echo "ERROR: Apache Rat check failed" |
| 319 | + overall_status=1 |
| 320 | + elif [ "$RAT_CHECK" = "pass" ]; then |
| 321 | + echo "Apache Rat check passed" |
| 322 | + fi |
| 323 | + |
| 324 | + # Check copyright year status |
| 325 | + if [ -n "$NOTICE_CHECK" ] && [ "$NOTICE_CHECK" = "fail" ]; then |
| 326 | + echo "ERROR: NOTICE file copyright year check failed" |
| 327 | + overall_status=1 |
| 328 | + fi |
| 329 | + if [ -n "$PSQL_HELP_CHECK" ] && [ "$PSQL_HELP_CHECK" = "fail" ]; then |
| 330 | + echo "ERROR: psql help.c copyright year check failed" |
| 331 | + overall_status=1 |
| 332 | + fi |
| 333 | + |
| 334 | + # Check binary files status (if this variable exists) |
| 335 | + if [ -n "$BINARY_CHECK" ] && [ "$BINARY_CHECK" = "fail" ]; then |
| 336 | + echo "ERROR: Binary files check failed" |
| 337 | + overall_status=1 |
| 338 | + fi |
| 339 | + |
| 340 | + # Exit with appropriate status |
| 341 | + if [ $overall_status -eq 0 ]; then |
| 342 | + echo "SUCCESS: All checks passed" |
| 343 | + exit 0 |
| 344 | + else |
| 345 | + echo "FAILURE: One or more checks failed" |
| 346 | + exit 1 |
| 347 | + fi |
0 commit comments