diff --git a/docs/introduction/configuration.md b/docs/introduction/configuration.md index a0150bcd..981cc661 100644 --- a/docs/introduction/configuration.md +++ b/docs/introduction/configuration.md @@ -109,6 +109,10 @@ gitlab: scan: threads: 10 # gl scan --threads (can override common.threads) + + tf: + output_dir: ./terraform-states # gl tf --output-dir + threads: 4 # gl tf --threads (can override common.threads) ``` ### GitHub diff --git a/internal/cmd/gitlab/gitlab.go b/internal/cmd/gitlab/gitlab.go index ada24725..25bde6dd 100644 --- a/internal/cmd/gitlab/gitlab.go +++ b/internal/cmd/gitlab/gitlab.go @@ -8,6 +8,7 @@ import ( "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/scan" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/schedule" securefiles "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/secureFiles" + "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/tf" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/variables" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/vuln" "github.com/spf13/cobra" @@ -47,6 +48,7 @@ For SOCKS5 proxy: glCmd.AddCommand(renovate.NewRenovateRootCmd()) glCmd.AddCommand(cicd.NewCiCdCmd()) glCmd.AddCommand(schedule.NewScheduleCmd()) + glCmd.AddCommand(tf.NewTFCmd()) glCmd.PersistentFlags().StringVarP(&gitlabUrl, "gitlab", "g", "", "GitLab instance URL") glCmd.PersistentFlags().StringVarP(&gitlabApiToken, "token", "t", "", "GitLab API Token") diff --git a/internal/cmd/gitlab/tf/tf.go b/internal/cmd/gitlab/tf/tf.go new file mode 100644 index 00000000..9aeb9fbd --- /dev/null +++ b/internal/cmd/gitlab/tf/tf.go @@ -0,0 +1,102 @@ +package tf + +import ( + "github.com/CompassSecurity/pipeleek/internal/cmd/flags" + "github.com/CompassSecurity/pipeleek/pkg/config" + tfpkg "github.com/CompassSecurity/pipeleek/pkg/gitlab/tf" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" +) + +type TFCommandOptions struct { + config.CommonScanOptions + OutputDir string +} + +var options = TFCommandOptions{CommonScanOptions: config.DefaultCommonScanOptions()} +var maxArtifactSize string + +func NewTFCmd() *cobra.Command { + tfCmd := &cobra.Command{ + Use: "tf", + Short: "Scan Terraform/OpenTofu state files for secrets", + Long: `Scan GitLab Terraform/OpenTofu state files for secrets + +This command iterates through all projects where you have maintainer access, +checks for Terraform state files stored in GitLab, downloads them locally, +and scans them for secrets using TruffleHog. + +GitLab stores Terraform state natively when using the Terraform HTTP backend. +Each project can have multiple named state files.`, + Example: `# Scan all Terraform states in projects with maintainer access +pipeleek gl tf --token glpat-xxxxxxxxxxx --gitlab https://gitlab.example.com + +# Save state files to custom directory +pipeleek gl tf --token glpat-xxxxxxxxxxx --gitlab https://gitlab.example.com --output-dir ./tf-states + +# Use more threads for faster scanning +pipeleek gl tf --token glpat-xxxxxxxxxxx --gitlab https://gitlab.example.com --threads 10 + +# Scan with high confidence filter only +pipeleek gl tf --token glpat-xxxxxxxxxxx --gitlab https://gitlab.example.com --confidence high`, + Run: tfRun, + } + + // Command-specific flags + tfCmd.Flags().StringVar(&options.OutputDir, "output-dir", "./terraform-states", "Directory to save downloaded state files") + + // Common scan flags (threads, verification, confidence, hit-timeout, etc.) + flags.AddCommonScanFlags(tfCmd, &options.CommonScanOptions, &maxArtifactSize) + + return tfCmd +} + +func tfRun(cmd *cobra.Command, args []string) { + if err := config.AutoBindFlags(cmd, map[string]string{ + "gitlab": "gitlab.url", + "token": "gitlab.token", + "output-dir": "gitlab.tf.output_dir", + "threads": "common.threads", + "truffle-hog-verification": "common.trufflehog_verification", + "confidence": "common.confidence_filter", + "hit-timeout": "common.hit_timeout", + }); err != nil { + log.Fatal().Err(err).Msg("Failed to bind command flags to configuration keys") + } + + if err := config.RequireConfigKeys("gitlab.url", "gitlab.token"); err != nil { + log.Fatal().Err(err).Msg("required configuration missing") + } + + gitlabUrl := config.GetString("gitlab.url") + gitlabApiToken := config.GetString("gitlab.token") + options.OutputDir = config.GetString("gitlab.tf.output_dir") + options.MaxScanGoRoutines = config.GetInt("common.threads") + options.ConfidenceFilter = config.GetStringSlice("common.confidence_filter") + options.TruffleHogVerification = config.GetBool("common.trufflehog_verification") + // HitTimeout comes from flags via AddCommonScanFlags; keep as-is + + if err := config.ValidateURL(gitlabUrl, "GitLab URL"); err != nil { + log.Fatal().Err(err).Msg("Invalid GitLab URL") + } + if err := config.ValidateToken(gitlabApiToken, "GitLab API Token"); err != nil { + log.Fatal().Err(err).Msg("Invalid GitLab API Token") + } + if err := config.ValidateThreadCount(options.MaxScanGoRoutines); err != nil { + log.Fatal().Err(err).Msg("Invalid thread count") + } + + tfOptions := tfpkg.TFOptions{ + GitlabUrl: gitlabUrl, + GitlabApiToken: gitlabApiToken, + OutputDir: options.OutputDir, + Threads: options.MaxScanGoRoutines, + ConfidenceFilter: options.ConfidenceFilter, + TruffleHogVerification: options.TruffleHogVerification, + HitTimeout: options.HitTimeout, + } + + tfpkg.ScanTerraformStates(tfOptions) + + log.Info().Msg("Done, Bye Bye 🏳️‍🌈🔥") +} diff --git a/pipeleek.example.yaml b/pipeleek.example.yaml index 51cd3549..966d8c0e 100644 --- a/pipeleek.example.yaml +++ b/pipeleek.example.yaml @@ -98,6 +98,11 @@ gitlab: threads: 15 # Override common.threads for GitLab scans max_artifact_size: 52428800 # 50MB for GitLab artifacts + # tf - Discover and scan Terraform/OpenTofu state files + tf: + output_dir: ./terraform-states # Directory to save downloaded state files + threads: 4 # Override common.threads for Terraform state scans + #------------------------------------------------------------------------------ # GitHub Platform Configuration #------------------------------------------------------------------------------ diff --git a/pkg/gitlab/tf/tf.go b/pkg/gitlab/tf/tf.go new file mode 100644 index 00000000..f8f853ab --- /dev/null +++ b/pkg/gitlab/tf/tf.go @@ -0,0 +1,249 @@ +package tf + +import ( + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "sync" + "time" + + "github.com/CompassSecurity/pipeleek/pkg/gitlab/util" + "github.com/CompassSecurity/pipeleek/pkg/httpclient" + "github.com/CompassSecurity/pipeleek/pkg/logging" + "github.com/CompassSecurity/pipeleek/pkg/scanner" + "github.com/rs/zerolog/log" + gitlab "gitlab.com/gitlab-org/api/client-go" +) + +type TFOptions struct { + GitlabUrl string + GitlabApiToken string + OutputDir string + Threads int + ConfidenceFilter []string + TruffleHogVerification bool + HitTimeout time.Duration +} + +type terraformState struct { + Name string + ProjectID int + Project *gitlab.Project +} + +// ScanTerraformStates scans all Terraform/OpenTofu state files for secrets +func ScanTerraformStates(options TFOptions) { + log.Info().Msg("Starting Terraform state scan") + + // Initialize scanner + scanner.InitRules(options.ConfidenceFilter) + if !options.TruffleHogVerification { + log.Info().Msg("TruffleHog verification is disabled") + } + + // Create output directory + if err := os.MkdirAll(options.OutputDir, 0o755); err != nil { + log.Fatal().Err(err).Str("dir", options.OutputDir).Msg("Failed to create output directory") + } + + // Initialize GitLab client + git, err := util.GetGitlabClient(options.GitlabApiToken, options.GitlabUrl) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed creating gitlab client") + } + + // Fetch all projects with maintainer access + states := fetchTerraformStates(git, options.GitlabUrl, options.GitlabApiToken) + log.Info().Int("total", len(states)).Msg("Found Terraform states") + + if len(states) == 0 { + log.Warn().Msg("No Terraform states found") + return + } + + // Download and scan states with concurrency + downloadAndScanStates(states, options) + + log.Info().Msg("Terraform state scan complete") +} + +// fetchTerraformStates iterates all projects and finds those with Terraform state +func fetchTerraformStates(git *gitlab.Client, gitlabUrl string, token string) []terraformState { + var states []terraformState + var mu sync.Mutex + + projectOpts := &gitlab.ListProjectsOptions{ + ListOptions: gitlab.ListOptions{PerPage: 100, Page: 1}, + MinAccessLevel: gitlab.Ptr(gitlab.MaintainerPermissions), + OrderBy: gitlab.Ptr("last_activity_at"), + } + + log.Info().Msg("Fetching projects with maintainer access") + + err := util.IterateProjects(git, projectOpts, func(project *gitlab.Project) error { + log.Debug().Str("project", project.PathWithNamespace).Int64("id", project.ID).Msg("Checking project for Terraform state") + + // Check for Terraform state using HTTP API + stateExists := checkTerraformState(gitlabUrl, token, int(project.ID)) + if stateExists { + mu.Lock() + states = append(states, terraformState{ + Name: "default", + ProjectID: int(project.ID), + Project: project, + }) + mu.Unlock() + + log.Info().Str("project", project.PathWithNamespace).Msg("Found Terraform state") + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Error iterating projects") + } + + return states +} + +// checkTerraformState checks if a project has a Terraform state +func checkTerraformState(gitlabUrl string, token string, projectID int) bool { + url := fmt.Sprintf("%s/api/v4/projects/%d/terraform/state/default", gitlabUrl, projectID) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return false + } + req.Header.Set("PRIVATE-TOKEN", token) + + client := httpclient.GetPipeleekHTTPClient("", nil, nil).StandardClient() + resp, err := client.Do(req) + if err != nil { + return false + } + defer resp.Body.Close() + + // 200 means state exists, 404 means no state + return resp.StatusCode == http.StatusOK +} + +// downloadAndScanStates downloads state files and scans them for secrets +func downloadAndScanStates(states []terraformState, options TFOptions) { + var wg sync.WaitGroup + semaphore := make(chan struct{}, options.Threads) + + for _, state := range states { + wg.Add(1) + go func(s terraformState) { + defer wg.Done() + semaphore <- struct{}{} + defer func() { <-semaphore }() + + downloadAndScan(s, options) + }(state) + } + + wg.Wait() +} + +// downloadAndScan downloads a single state file and scans it +func downloadAndScan(state terraformState, options TFOptions) { + // Download state file + url := fmt.Sprintf("%s/api/v4/projects/%d/terraform/state/%s", options.GitlabUrl, state.ProjectID, state.Name) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Error().Err(err).Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Msg("Failed to create request") + return + } + req.Header.Set("PRIVATE-TOKEN", options.GitlabApiToken) + + client := httpclient.GetPipeleekHTTPClient("", nil, nil).StandardClient() + resp, err := client.Do(req) + if err != nil { + log.Error().Err(err).Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Msg("Failed to download Terraform state") + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + log.Error().Int("status", resp.StatusCode).Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Msg("Failed to download Terraform state") + return + } + + // Read state data + stateData, err := io.ReadAll(resp.Body) + if err != nil { + log.Error().Err(err).Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Msg("Failed to read state data") + return + } + + // Save to file + filename := fmt.Sprintf("%d_%s.tfstate", state.ProjectID, sanitizeFilename(state.Name)) + filePath := filepath.Join(options.OutputDir, filename) + + if err := os.WriteFile(filePath, stateData, 0o644); err != nil { + log.Error().Err(err).Str("file", filePath).Msg("Failed to write state file") + return + } + + log.Info().Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Str("file", filePath).Msg("Downloaded Terraform state") + + // Scan the file for secrets + scanStateFile(stateData, filePath, state, options) +} + +// scanStateFile scans a Terraform state file for secrets +func scanStateFile(content []byte, filePath string, state terraformState, options TFOptions) { + log.Debug().Str("file", filePath).Msg("Scanning Terraform state for secrets") + + findings, err := scanner.DetectHits(content, options.Threads, options.TruffleHogVerification, options.HitTimeout) + if err != nil { + log.Debug().Err(err).Str("file", filePath).Msg("Failed detecting secrets") + return + } + + if len(findings) > 0 { + log.Warn().Int("findings", len(findings)).Str("project", state.Project.PathWithNamespace).Str("state", state.Name).Str("file", filePath).Msg("Secrets found in Terraform state") + + for _, finding := range findings { + logging.Hit(). + Str("type", "terraform-state"). + Str("project", state.Project.PathWithNamespace). + Str("url", state.Project.WebURL). + Str("state", state.Name). + Str("file", filePath). + Str("ruleName", finding.Pattern.Pattern.Name). + Str("confidence", finding.Pattern.Pattern.Confidence). + Str("value", finding.Text). + Msg("SECRET") + } + } +} + +// sanitizeFilename removes invalid characters from filenames +func sanitizeFilename(name string) string { + // Replace common invalid characters + replacements := map[rune]rune{ + '/': '_', + '\\': '_', + ':': '_', + '*': '_', + '?': '_', + '"': '_', + '<': '_', + '>': '_', + '|': '_', + } + + runes := []rune(name) + for i, r := range runes { + if replacement, ok := replacements[r]; ok { + runes[i] = replacement + } + } + + return string(runes) +} diff --git a/tests/e2e/gitlab/tf/tf_test.go b/tests/e2e/gitlab/tf/tf_test.go new file mode 100644 index 00000000..34182934 --- /dev/null +++ b/tests/e2e/gitlab/tf/tf_test.go @@ -0,0 +1,204 @@ +//go:build e2e + +package tf + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/CompassSecurity/pipeleek/tests/e2e/internal/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestTFBasic tests the basic tf command functionality with a mock GitLab server +func TestTFBasic(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + tmpDir := t.TempDir() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/terraform/state") { + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "test-user/test-project", +"web_url": "http://localhost/test-user/test-project" +}, +{ +"id": 2, +"path_with_namespace": "test-user/no-tf-state", +"web_url": "http://localhost/test-user/no-tf-state" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "2") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + if strings.Contains(r.URL.Path, "/terraform/state/default") && r.Method == "GET" { + if strings.Contains(r.URL.Path, "/projects/1/") { + w.WriteHeader(http.StatusOK) + tfState := `{"version": 4, "terraform_version": "1.0.0", "serial": 0}` + w.Write([]byte(tfState)) + return + } + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "tf", + "--gitlab", server.URL, + "--token", "test-token", + "--output-dir", tmpDir, + "--threads", "2", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + assert.Contains(t, stdout+stderr, "Found Terraform states") + assert.Contains(t, stdout+stderr, "Downloaded Terraform state") + assert.Contains(t, stdout+stderr, "Terraform state scan complete") +} + +// TestTFNoState tests the tf command when no Terraform state is found +func TestTFNoState(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + tmpDir := t.TempDir() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/terraform/state") { + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "0") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`[]`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "tf", + "--gitlab", server.URL, + "--token", "test-token", + "--output-dir", tmpDir, + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + assert.Contains(t, stdout+stderr, "No Terraform states found") +} + +// TestTFInvalidURL tests the tf command with invalid GitLab URL +func TestTFInvalidURL(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + tmpDir := t.TempDir() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "tf", + "--gitlab", "not-a-valid-url", + "--token", "test-token", + "--output-dir", tmpDir, + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.NotNil(t, exitErr) + assert.Contains(t, stdout+stderr, "Invalid GitLab URL") +} + +// TestTFMissingToken tests the tf command without required token +func TestTFMissingToken(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + tmpDir := t.TempDir() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "tf", + "--gitlab", "https://gitlab.example.com", + "--output-dir", tmpDir, + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.NotNil(t, exitErr) + assert.Contains(t, stdout+stderr, "required configuration missing") +} + +// TestTFOutputDir tests that the tf command creates output directory if it doesn't exist +func TestTFOutputDir(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + tmpBase := t.TempDir() + outputDir := filepath.Join(tmpBase, "nested", "output", "dir") + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/terraform/state") { + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "0") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`[]`)) + return + } + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "tf", + "--gitlab", server.URL, + "--token", "test-token", + "--output-dir", outputDir, + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + _, err := os.Stat(outputDir) + require.NoError(t, err, "Output directory should be created") +}