Automated PR Review (#3859)

* feat: add automated PR review workflow via Vertex AI Gemini * fix: use workflow name instead of job_workflow_ref for WIF mapping * fix: read review output from file to avoid JS syntax errors in github-script * chore: remove redundant GITHUB_OUTPUT logic in PR review workflow * chore: move configuration identifiers to GitHub Variables * chore: upgrade model to 3.1-pro-preview and sanitize comment title * fix: use full model path in vertex ai SDK for preview models * fix: revert to gemini-2.5-pro due to Vertex API model resolution errors
2026-04-11 14:26:05 +02:00
parent 1cb643b3c1
commit 6847fae28d
4 changed files with 214 additions and 0 deletions
--- a/.github/workflows/pr-review.yml
+++ b/.github/workflows/pr-review.yml
@@ -0,0 +1,97 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Automated PR Review"
+# yamllint disable-line rule:truthy
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - labeled
+
+jobs:
+  pr-review:
+    # Run only if the PR has the 'automated-review' label
+    if: contains(github.event.pull_request.labels.*.name, 'automated-review')
+    permissions:
+      contents: read
+      id-token: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - id: checkout
+        name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Need full history for diffing
+
+      - id: gcp-auth
+        name: Authenticate to Google Cloud
+        uses: google-github-actions/auth@v2
+        with:
+          # yamllint disable-line rule:line-length
+          workload_identity_provider: ${{ vars.REVIEWS_WIF_PROVIDER }}
+          service_account: ${{ vars.REVIEWS_WIF_SA }}
+          access_token_lifetime: 900s
+
+      - id: setup-python
+        name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - id: install-deps
+        name: Install Dependencies
+        run: |
+          pip install google-cloud-aiplatform
+
+      - id: generate-diff
+        name: Generate PR Diff
+        env:
+          BASE_REF: ${{ github.event.pull_request.base.ref }}
+          BASE_SHA: ${{ github.event.pull_request.base.sha }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          # Fetch the target branch to ensure we have the base commit to compare against
+          git fetch origin "$BASE_REF":"$BASE_REF"
+
+          # Generate the diff between the PR base and the PR head
+          git diff "$BASE_SHA"..."$HEAD_SHA" > pr.diff
+
+      - id: run-review
+        name: Run Gemini PR Review
+        env:
+          VERTEX_PROJECT: ${{ vars.REVIEWS_VERTEX_PROJECT }}
+        run: |
+          # The script prints the review to stdout, which we capture into a file
+
+          python3 tools/pr_review.py \
+            --project "$VERTEX_PROJECT" \
+            --diff-file pr.diff > review_output.md
+      - id: pr-comment
+        name: Post comment to Pull Request
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const reviewContent = fs.readFileSync('review_output.md', 'utf8');
+            const output = `### Automated PR Review 🤖\n\n${reviewContent}`;
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: output
+            })
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -224,3 +224,9 @@ Modify one existing README example (do not add a new one) to demonstrate context
    *   **Locals Separation:** Use module-level locals for values referenced directly by resources/outputs. Use block-level "private" locals prefixed with an underscore (`_`) for intermediate transformations.
    *   **Complex Transformations:** Move complex data transformations in `for` or `for_each` loops to `locals` to keep resource blocks clean.

+## File Modification Rules
+- **CRITICAL:** NEVER use shell redirection (`cat << EOF`, `echo "..." >`, `>>`, `tee`) to create, overwrite, or append to files.
+- For creating files, ALWAYS use the native `write_file` tool.
+- For targeted edits or appending to a single file, ALWAYS use the native `replace` tool. (To append, match the last few lines of the file and replace them with the same lines plus your new content).
+- **EXCEPTION (Pattern/Bulk Edits):** You MAY use shell commands (like `sed -i`, `perl -pi`, or `find ... xargs sed`) ONLY for regex-based or pattern-based replacements, particularly across multiple files, where the exact-match `replace` tool is not feasible.
+
--- a/modules/project/identity-providers-defs.tf
+++ b/modules/project/identity-providers-defs.tf
@@ -28,6 +28,10 @@ locals {
        "attribute.repository_owner" = "assertion.repository_owner"
        "attribute.ref"              = "assertion.ref"
        "attribute.fast_sub"         = "\"repo:\" + assertion.repository + \":ref:\" + assertion.ref"
+        "attribute.workflow"         = "assertion.workflow"
+        "attribute.job_workflow_ref" = "assertion.job_workflow_ref"
+        "attribute.event_name"       = "assertion.event_name"
+        "attribute.pr_review_sub"    = "\"event:\" + assertion.event_name + \":workflow:\" + assertion.workflow"
      }
      issuer_uri = "https://token.actions.githubusercontent.com"
    }
--- a/tools/pr_review.py
+++ b/tools/pr_review.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import sys
+
+try:
+  import vertexai
+  from vertexai.generative_models import GenerativeModel
+except ImportError:
+  print("Error: google-cloud-aiplatform is not installed.", file=sys.stderr)
+  print("Please install it via: pip install google-cloud-aiplatform",
+        file=sys.stderr)
+  sys.exit(1)
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      description="Run Gemini PR Review via Vertex AI")
+  parser.add_argument("--project", required=True, help="GCP Project ID")
+  parser.add_argument("--location", default="us-central1", help="GCP Region")
+  parser.add_argument("--model", default="gemini-2.5-pro",
+                      help="Gemini model name")
+  parser.add_argument("--diff-file", required=True,
+                      help="Path to the PR diff file")
+  args = parser.parse_args()
+
+  # Read local repository guidelines
+  repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+  gemini_md_path = os.path.join(repo_root, "GEMINI.md")
+  contributing_md_path = os.path.join(repo_root, "CONTRIBUTING.md")
+
+  guidelines = ""
+  if os.path.exists(gemini_md_path):
+    with open(gemini_md_path, "r") as f:
+      guidelines += f"\n--- GEMINI.md ---\n{f.read()}"
+  if os.path.exists(contributing_md_path):
+    with open(contributing_md_path, "r") as f:
+      guidelines += f"\n--- CONTRIBUTING.md ---\n{f.read()}"
+
+  # Read diff
+  try:
+    with open(args.diff_file, "r") as f:
+      diff_content = f.read()
+  except Exception as e:
+    print(f"Error reading diff file: {e}", file=sys.stderr)
+    sys.exit(1)
+
+  if not diff_content.strip():
+    print("No diff content found. Skipping review.")
+    return
+
+  # Initialize Vertex AI
+  try:
+    vertexai.init(project=args.project, location=args.location)
+  except Exception as e:
+    print(f"Error initializing Vertex AI: {e}", file=sys.stderr)
+    sys.exit(1)
+
+  # Construct the System Instruction
+  system_instruction = f"""You are an expert Google Cloud and Terraform code reviewer.
+Your task is to review a Pull Request diff for the Cloud Foundation Fabric repository.
+
+You MUST strictly enforce the repository's architecture, conventions, and style guidelines.
+Here are the repository guidelines you must follow:
+{guidelines}
+
+Review the provided git diff. Provide a concise, constructive review.
+- Highlight any violations of the guidelines (e.g., naming conventions, missing context support, incorrect IAM patterns, missing tests).
+- Suggest specific code improvements.
+- If the code looks good and follows all guidelines, state that clearly.
+- Format your output in Markdown so it can be posted directly as a GitHub PR comment.
+- CRITICAL: Keep your entire response concise. The GitHub PR comment size limit is 65536 characters. Your response MUST be well under this limit (e.g., maximum 50000 characters). Focus only on the most important feedback.
+"""
+
+  model = GenerativeModel(
+      model_name=args.model,
+      system_instruction=system_instruction,
+  )
+
+  prompt = f"Here is the PR diff to review:\n```diff\n{diff_content}\n```"
+
+  try:
+    # Using a low temperature for a more analytical/deterministic review
+    response = model.generate_content(prompt,
+                                      generation_config={"temperature": 0.2})
+    print(response.text)
+  except Exception as e:
+    print(f"Error calling Vertex AI: {e}", file=sys.stderr)
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+  main()