From fb852ff46f5a3983aa5b20ac3269ad1a3f4ad6f0 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 9 Oct 2024 00:02:51 +0800 Subject: [PATCH] Optimize path and link validity check Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 90 +++++++++++++------------ 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index 9d12c34..aa18c4b 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -22,25 +22,28 @@ jobs: run: | cd ${{github.workspace}} fail="FALSE" - url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') - if [ -n "$url_lines" ]; then - for url_line in $url_lines; do - url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') - path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) - if [[ "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/EZa7vjON10ZCpMvE7U-SPMwBRXbVHqe1Ybsa-fmnXayNUA?e=f6FPsl" == "$url" || "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/ESMIcBseFTdIuqkoB7TZy6ABfwR9CkfV49TvTa1X_Jihkg?e=zMH7O7" == "$url" ]]; then - echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." - else - response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" - else - urls_line+=("$url_line") - echo "Status code: $response_retry, Link $url validation failed, will retry later." + # url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" + if [ -n "changed_files" ]; then + for changed_file in $changed_files; do + url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'linkedin') + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, status code: $response, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + urls_line+=("$url_line") + echo "Status code: $response_retry, Link $url validation failed, will retry later." + fi fi - fi + done fi done fi @@ -95,54 +98,53 @@ jobs: branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}" fi link_head="https://github.com/opea-project/docs/blob/main" - IFS=$'\n' + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto') if [ -n "$png_lines" ]; then for png_line in $png_lines; do refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) if [[ "${png_path:0:1}" == "/" ]]; then - check_path=${{github.workspace}}$png_path - elif [[ "${png_path:0:1}" == "#" ]]; then - check_path=${{github.workspace}}/$refer_path$png_path + check_path=$png_path + elif [[ "$png_path" == *#* ]]; then + relative_path=$(echo "$png_path" | cut -d '#' -f1) + if [ -n "$relative_path" ]; then + check_path=$(dirname "$refer_path")/$relative_path + png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') + else + check_path=$refer_path + fi else - check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path + check_path=$(dirname "$refer_path")/$png_path fi - real_path=$(realpath $check_path) - if [ $? -ne 0 ]; then - echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist" - fail="TRUE" - else - url=$link_head$(echo "$real_path" | sed 's|.*/docs||') - response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" - else - echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}" - url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||') + if [ -e "$check_path" ]; then + real_path=$(realpath $check_path) + if [[ "$png_line" == *#* ]]; then + if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then + url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')$png_path response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response_retry, try again**********" + echo "**********Validation failed, try again**********" response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" + echo "*****Retry successfully*****" else - echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path status code: $response_retry" - echo "$png_line" + echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" fail="TRUE" fi else - echo "Check branch ${{ github.event.pull_request.head.ref }} successfully." + echo "Validation succeed $png_line" fi fi fi + else + echo "$check_path does not exist" + fail="TRUE" fi done fi - IFS=$OLDIFS + if [[ "$fail" == "TRUE" ]]; then exit 1 else