diff --git a/internal/cmd/github/container/container.go b/internal/cmd/github/container/container.go new file mode 100644 index 00000000..e867b3f0 --- /dev/null +++ b/internal/cmd/github/container/container.go @@ -0,0 +1,109 @@ +package container + +import ( + "github.com/CompassSecurity/pipeleek/pkg/config" + pkgcontainer "github.com/CompassSecurity/pipeleek/pkg/github/container" + pkgscan "github.com/CompassSecurity/pipeleek/pkg/github/scan" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" +) + +var ( + owned bool + member bool + public bool + projectSearchQuery string + page int + repository string + organization string + orderBy string + dangerousPatterns string +) + +func NewContainerScanCmd() *cobra.Command { + containerCmd := &cobra.Command{ + Use: "container", + Short: "Artipacked auditing commands", + Long: "Commands to audit for artipacked misconfiguration in container builds: when Dockerfiles copy secrets during build and leave them in published images.", + } + + containerCmd.AddCommand(NewArtipackedCmd()) + + return containerCmd +} + +func NewArtipackedCmd() *cobra.Command { + artipackedCmd := &cobra.Command{ + Use: "artipacked [no options!]", + Short: "Audit for artipacked misconfiguration (secrets in container images)", + Long: "Scan GitHub repositories for artipacked misconfiguration: dangerous container build patterns that leak secrets like COPY . /path without .dockerignore", + Run: func(cmd *cobra.Command, args []string) { + if err := config.AutoBindFlags(cmd, map[string]string{ + "github": "github.url", + "token": "github.token", + "owned": "github.container.artipacked.owned", + "member": "github.container.artipacked.member", + "public": "github.container.artipacked.public", + "repo": "github.container.artipacked.repo", + "organization": "github.container.artipacked.organization", + "search": "github.container.artipacked.search", + "page": "github.container.artipacked.page", + "order-by": "github.container.artipacked.order_by", + "dangerous-patterns": "github.container.artipacked.dangerous_patterns", + }); err != nil { + log.Fatal().Err(err).Msg("Failed to bind command flags to configuration keys") + } + + githubUrl := config.GetString("github.url") + githubApiToken := config.GetString("github.token") + + if err := config.RequireConfigKeys("github.url", "github.token"); err != nil { + log.Fatal().Err(err).Msg("required configuration missing") + } + + owned = config.GetBool("github.container.artipacked.owned") + member = config.GetBool("github.container.artipacked.member") + public = config.GetBool("github.container.artipacked.public") + repository = config.GetString("github.container.artipacked.repo") + organization = config.GetString("github.container.artipacked.organization") + projectSearchQuery = config.GetString("github.container.artipacked.search") + page = config.GetInt("github.container.artipacked.page") + orderBy = config.GetString("github.container.artipacked.order_by") + + Scan(githubUrl, githubApiToken) + }, + } + + artipackedCmd.PersistentFlags().BoolVarP(&owned, "owned", "o", false, "Scan user owned repositories only") + artipackedCmd.PersistentFlags().BoolVarP(&member, "member", "m", false, "Scan repositories the user is member of") + artipackedCmd.PersistentFlags().BoolVar(&public, "public", false, "Scan public repositories only") + artipackedCmd.Flags().StringP("github", "g", "", "GitHub instance URL") + artipackedCmd.Flags().StringP("token", "t", "", "GitHub API token") + artipackedCmd.Flags().StringVarP(&repository, "repo", "r", "", "Repository to scan (if not set, all repositories will be scanned)") + artipackedCmd.Flags().StringVarP(&organization, "organization", "n", "", "Organization to scan") + artipackedCmd.Flags().StringVarP(&projectSearchQuery, "search", "s", "", "Query string for searching repositories") + artipackedCmd.Flags().IntVarP(&page, "page", "p", 1, "Page number to start fetching repositories from (default 1)") + artipackedCmd.Flags().StringVar(&orderBy, "order-by", "updated", "Order repositories by: stars, forks, updated") + + return artipackedCmd +} + +func Scan(githubUrl, githubApiToken string) { + client := pkgscan.SetupClient(githubApiToken, githubUrl) + + opts := pkgcontainer.ScanOptions{ + GitHubUrl: githubUrl, + GitHubApiToken: githubApiToken, + Owned: owned, + Member: member, + Public: public, + ProjectSearchQuery: projectSearchQuery, + Page: page, + Repository: repository, + Organization: organization, + OrderBy: orderBy, + DangerousPatterns: dangerousPatterns, + } + + pkgcontainer.RunScan(opts, client) +} diff --git a/internal/cmd/github/github.go b/internal/cmd/github/github.go index 6f041447..b2ab3799 100644 --- a/internal/cmd/github/github.go +++ b/internal/cmd/github/github.go @@ -1,6 +1,7 @@ package github import ( + "github.com/CompassSecurity/pipeleek/internal/cmd/github/container" "github.com/CompassSecurity/pipeleek/internal/cmd/github/renovate" "github.com/CompassSecurity/pipeleek/internal/cmd/github/scan" "github.com/spf13/cobra" @@ -15,6 +16,7 @@ func NewGitHubRootCmd() *cobra.Command { ghCmd.AddCommand(scan.NewScanCmd()) ghCmd.AddCommand(renovate.NewRenovateRootCmd()) + ghCmd.AddCommand(container.NewContainerScanCmd()) return ghCmd } diff --git a/internal/cmd/gitlab/container/container.go b/internal/cmd/gitlab/container/container.go new file mode 100644 index 00000000..6388406f --- /dev/null +++ b/internal/cmd/gitlab/container/container.go @@ -0,0 +1,101 @@ +package container + +import ( + "github.com/CompassSecurity/pipeleek/pkg/config" + pkgcontainer "github.com/CompassSecurity/pipeleek/pkg/gitlab/container" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" + gitlab "gitlab.com/gitlab-org/api/client-go" +) + +var ( + owned bool + member bool + projectSearchQuery string + page int + repository string + namespace string + orderBy string + dangerousPatterns string +) + +func NewContainerScanCmd() *cobra.Command { + containerCmd := &cobra.Command{ + Use: "container", + Short: "Artipacked auditing commands", + Long: "Commands to audit for artipacked misconfiguration in container builds: when Dockerfiles copy secrets during build and leave them in published images.", + } + + containerCmd.AddCommand(NewArtipackedCmd()) + + return containerCmd +} + +func NewArtipackedCmd() *cobra.Command { + artipackedCmd := &cobra.Command{ + Use: "artipacked [no options!]", + Short: "Audit for artipacked misconfiguration (secrets in container images)", + Long: "Scan GitLab projects for artipacked misconfiguration: dangerous container build patterns that leak secrets like COPY . /path without .dockerignore", + Run: func(cmd *cobra.Command, args []string) { + if err := config.AutoBindFlags(cmd, map[string]string{ + "gitlab": "gitlab.url", + "token": "gitlab.token", + "owned": "gitlab.container.artipacked.owned", + "member": "gitlab.container.artipacked.member", + "repo": "gitlab.container.artipacked.repo", + "namespace": "gitlab.container.artipacked.namespace", + "search": "gitlab.container.artipacked.search", + "page": "gitlab.container.artipacked.page", + "order-by": "gitlab.container.artipacked.order_by", + "dangerous-patterns": "gitlab.container.artipacked.dangerous_patterns", + }); err != nil { + log.Fatal().Err(err).Msg("Failed to bind command flags to configuration keys") + } + + gitlabUrl := config.GetString("gitlab.url") + gitlabApiToken := config.GetString("gitlab.token") + + if err := config.RequireConfigKeys("gitlab.url", "gitlab.token"); err != nil { + log.Fatal().Err(err).Msg("required configuration missing") + } + + owned = config.GetBool("gitlab.container.artipacked.owned") + member = config.GetBool("gitlab.container.artipacked.member") + repository = config.GetString("gitlab.container.artipacked.repo") + namespace = config.GetString("gitlab.container.artipacked.namespace") + projectSearchQuery = config.GetString("gitlab.container.artipacked.search") + page = config.GetInt("gitlab.container.artipacked.page") + orderBy = config.GetString("gitlab.container.artipacked.order_by") + + Scan(gitlabUrl, gitlabApiToken) + }, + } + + artipackedCmd.PersistentFlags().BoolVarP(&owned, "owned", "o", false, "Scan user owned projects only") + artipackedCmd.PersistentFlags().BoolVarP(&member, "member", "m", false, "Scan projects the user is member of") + artipackedCmd.Flags().StringVarP(&repository, "repo", "r", "", "Repository to scan (if not set, all projects will be scanned)") + artipackedCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "Namespace to scan") + artipackedCmd.Flags().StringVarP(&projectSearchQuery, "search", "s", "", "Query string for searching projects") + artipackedCmd.Flags().IntVarP(&page, "page", "p", 1, "Page number to start fetching projects from (default 1, fetch all pages)") + artipackedCmd.Flags().StringVar(&orderBy, "order-by", "last_activity_at", "Order projects by: id, name, path, created_at, updated_at, star_count, last_activity_at, or similarity") + + return artipackedCmd +} + +func Scan(gitlabUrl, gitlabApiToken string) { + opts := pkgcontainer.ScanOptions{ + GitlabUrl: gitlabUrl, + GitlabApiToken: gitlabApiToken, + Owned: owned, + Member: member, + ProjectSearchQuery: projectSearchQuery, + Page: page, + Repository: repository, + Namespace: namespace, + OrderBy: orderBy, + DangerousPatterns: dangerousPatterns, + MinAccessLevel: int(gitlab.GuestPermissions), + } + + pkgcontainer.RunScan(opts) +} diff --git a/internal/cmd/gitlab/gitlab.go b/internal/cmd/gitlab/gitlab.go index ada24725..92f36328 100644 --- a/internal/cmd/gitlab/gitlab.go +++ b/internal/cmd/gitlab/gitlab.go @@ -2,6 +2,7 @@ package gitlab import ( "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/cicd" + "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/container" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/enum" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/renovate" "github.com/CompassSecurity/pipeleek/internal/cmd/gitlab/runners" @@ -45,6 +46,7 @@ For SOCKS5 proxy: glCmd.AddCommand(securefiles.NewSecureFilesCmd()) glCmd.AddCommand(enum.NewEnumCmd()) glCmd.AddCommand(renovate.NewRenovateRootCmd()) + glCmd.AddCommand(container.NewContainerScanCmd()) glCmd.AddCommand(cicd.NewCiCdCmd()) glCmd.AddCommand(schedule.NewScheduleCmd()) diff --git a/pkg/container/patterns.go b/pkg/container/patterns.go new file mode 100644 index 00000000..191a067a --- /dev/null +++ b/pkg/container/patterns.go @@ -0,0 +1,31 @@ +package container + +import ( + "regexp" +) + +// DefaultPatterns returns the default dangerous patterns to detect in Dockerfiles +func DefaultPatterns() []Pattern { + return []Pattern{ + { + Name: "copy_all_to_root", + Pattern: regexp.MustCompile(`(?i)^COPY\s+\./?(\s+/\s*)?$`), + Description: "Copies entire working directory to root - exposes all files including secrets", + }, + { + Name: "copy_all_anywhere", + Pattern: regexp.MustCompile(`(?i)^COPY\s+(\./?|\*|\.\/\*|\.\*)(\s+|$)`), + Description: "Copies entire working directory into container - may expose sensitive files", + }, + { + Name: "add_all_to_root", + Pattern: regexp.MustCompile(`(?i)^ADD\s+\./?(\s+/\s*)?$`), + Description: "Adds entire working directory to root - exposes all files including secrets", + }, + { + Name: "add_all_anywhere", + Pattern: regexp.MustCompile(`(?i)^ADD\s+(\./?|\*|\.\/\*|\.\*)(\s+|$)`), + Description: "Adds entire working directory into container - may expose sensitive files", + }, + } +} diff --git a/pkg/container/scanner.go b/pkg/container/scanner.go new file mode 100644 index 00000000..ce855c14 --- /dev/null +++ b/pkg/container/scanner.go @@ -0,0 +1,110 @@ +package container + +import ( + "regexp" + "strings" +) + +// IsMultistage checks if Dockerfile content uses multistage builds by counting FROM statements +func IsMultistage(content string) bool { + lines := strings.Split(content, "\n") + + fromCount := 0 + fromPattern := regexp.MustCompile(`(?i)^\s*FROM\s+`) + + for _, line := range lines { + trimmedLine := strings.TrimSpace(line) + // Skip empty lines and comments + if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") { + continue + } + + if fromPattern.MatchString(line) { + fromCount++ + if fromCount > 1 { + return true + } + } + } + + return false +} + +// PatternMatch represents a matched pattern with details +type PatternMatch struct { + PatternName string + MatchedLine string +} + +// ScanDockerfileForPatterns scans Dockerfile content and returns all pattern matches +func ScanDockerfileForPatterns(content string, patterns []Pattern) []PatternMatch { + var matches []PatternMatch + lines := strings.Split(content, "\n") + + // Check against all patterns + for _, pattern := range patterns { + // Search through lines to find a match + for _, line := range lines { + trimmedLine := strings.TrimSpace(line) + // Skip empty lines and comments + if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") { + continue + } + + if pattern.Pattern.MatchString(line) { + matches = append(matches, PatternMatch{ + PatternName: pattern.Name, + MatchedLine: strings.TrimSpace(line), + }) + break // Only match once per pattern + } + } + } + + return matches +} + +// ScanDockerfileContent checks a Dockerfile's content against patterns and returns matched lines +// Deprecated: Use ScanDockerfileForPatterns instead +func ScanDockerfileContent(content string, patterns []Pattern) []string { + var matches []string + lines := strings.Split(content, "\n") + + // Check against all patterns + for _, pattern := range patterns { + // Search through lines to find a match + for _, line := range lines { + trimmedLine := strings.TrimSpace(line) + // Skip empty lines and comments + if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") { + continue + } + + if pattern.Pattern.MatchString(line) { + matches = append(matches, strings.TrimSpace(line)) + break + } + } + } + + return matches +} + +// ScanDockerfileForPattern checks if a Dockerfile matches a specific pattern +func ScanDockerfileForPattern(content string, pattern Pattern) bool { + lines := strings.Split(content, "\n") + + for _, line := range lines { + trimmedLine := strings.TrimSpace(line) + // Skip empty lines and comments + if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") { + continue + } + + if pattern.Pattern.MatchString(line) { + return true + } + } + + return false +} diff --git a/pkg/container/types.go b/pkg/container/types.go new file mode 100644 index 00000000..6cdb6eca --- /dev/null +++ b/pkg/container/types.go @@ -0,0 +1,28 @@ +package container + +import "regexp" + +// Finding represents a dangerous pattern found in a Dockerfile/Containerfile +type Finding struct { + ProjectPath string + ProjectURL string + FilePath string + FileName string + MatchedPattern string + LineContent string + IsMultistage bool + RegistryMetadata *RegistryMetadata +} + +// RegistryMetadata contains information about the most recent container image in the registry +type RegistryMetadata struct { + TagName string + LastUpdate string +} + +// Pattern represents a dangerous pattern to detect +type Pattern struct { + Name string + Pattern *regexp.Regexp + Description string +} diff --git a/pkg/github/container/patterns.go b/pkg/github/container/patterns.go new file mode 100644 index 00000000..ab56fc6b --- /dev/null +++ b/pkg/github/container/patterns.go @@ -0,0 +1,10 @@ +package container + +import ( + sharedcontainer "github.com/CompassSecurity/pipeleek/pkg/container" +) + +// DefaultPatterns returns the default dangerous patterns by delegating to the shared package +func DefaultPatterns() []sharedcontainer.Pattern { + return sharedcontainer.DefaultPatterns() +} diff --git a/pkg/github/container/scanner.go b/pkg/github/container/scanner.go new file mode 100644 index 00000000..e85aaa18 --- /dev/null +++ b/pkg/github/container/scanner.go @@ -0,0 +1,344 @@ +package container + +import ( + "context" + "strings" + + sharedcontainer "github.com/CompassSecurity/pipeleek/pkg/container" + "github.com/google/go-github/v69/github" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +// RunScan performs the container scan with the given options +func RunScan(opts ScanOptions, client *github.Client) { + ctx := context.Background() + + patterns := sharedcontainer.DefaultPatterns() + log.Info().Int("pattern_count", len(patterns)).Msg("Loaded container scan patterns") + + if opts.Repository != "" { + scanSingleRepo(ctx, client, opts.Repository, patterns) + } else if opts.Organization != "" { + scanOrganization(ctx, client, opts.Organization, patterns, opts) + } else { + fetchRepositories(ctx, client, patterns, opts) + } + + log.Info().Msg("Container scan complete") +} + +func scanSingleRepo(ctx context.Context, client *github.Client, repoName string, patterns []sharedcontainer.Pattern) { + log.Info().Str("repository", repoName).Msg("Scanning specific repository for dangerous container patterns") + + parts := strings.Split(repoName, "/") + if len(parts) != 2 { + log.Fatal().Str("repository", repoName).Msg("Invalid repository format, expected owner/repo") + } + owner, repo := parts[0], parts[1] + + repository, _, err := client.Repositories.Get(ctx, owner, repo) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed fetching repository") + } + + scanRepository(ctx, client, repository, patterns) +} + +func scanOrganization(ctx context.Context, client *github.Client, orgName string, patterns []sharedcontainer.Pattern, opts ScanOptions) { + log.Info().Str("organization", orgName).Msg("Scanning organization for dangerous container patterns") + + listOpts := &github.RepositoryListByOrgOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + Page: opts.Page, + }, + } + + repos, _, err := client.Repositories.ListByOrg(ctx, orgName, listOpts) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed fetching organization repositories") + } + + for _, repo := range repos { + log.Debug().Str("url", repo.GetHTMLURL()).Msg("Check repository") + scanRepository(ctx, client, repo, patterns) + } +} + +func fetchRepositories(ctx context.Context, client *github.Client, patterns []sharedcontainer.Pattern, opts ScanOptions) { + log.Info().Msg("Fetching repositories") + + searchOpts := &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + Page: opts.Page, + }, + } + + var query string + if opts.ProjectSearchQuery != "" { + query = opts.ProjectSearchQuery + } else { + // Default query based on options + if opts.Owned { + query = "user:@me" + } else if opts.Member { + query = "user:@me" + } + } + + // Add public filter if requested + if opts.Public { + if query != "" { + query += " is:public" + } else { + query = "is:public" + } + } + + if query == "" { + log.Fatal().Msg("No search criteria specified. Use --owned, --member, --public, --org, --repo, or --search") + } + + result, _, err := client.Search.Repositories(ctx, query, searchOpts) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed searching repositories") + } + + for _, repo := range result.Repositories { + log.Debug().Str("url", repo.GetHTMLURL()).Msg("Check repository") + scanRepository(ctx, client, repo, patterns) + } +} + +func scanRepository(ctx context.Context, client *github.Client, repo *github.Repository, patterns []sharedcontainer.Pattern) { + log.Debug().Str("repository", repo.GetFullName()).Msg("Scanning repository for Dockerfiles") + + owner := repo.GetOwner().GetLogin() + repoName := repo.GetName() + + // Find all Dockerfiles in the repository recursively + dockerfiles := findDockerfiles(ctx, client, owner, repoName) + + if len(dockerfiles) == 0 { + log.Trace().Str("repository", repo.GetFullName()).Msg("No Dockerfile or Containerfile found") + return + } + + log.Debug().Str("repository", repo.GetFullName()).Int("dockerfile_count", len(dockerfiles)).Msg("Found Dockerfiles") + + // Scan all found Dockerfiles + for _, dockerfile := range dockerfiles { + isMultistage := checkIsMultistage(dockerfile.Content) + scanDockerfile(ctx, client, repo, dockerfile.Content, dockerfile.Path, patterns, isMultistage) + } +} + +// DockerfileMatch represents a found Dockerfile +type DockerfileMatch struct { + Path string + Content *github.RepositoryContent +} + +// findDockerfiles recursively searches for all Dockerfile/Containerfile files in the repository +func findDockerfiles(ctx context.Context, client *github.Client, owner, repo string) []DockerfileMatch { + var dockerfiles []DockerfileMatch + const maxDockerfiles = 50 // Limit to prevent scanning huge repos + + dockerfileNames := []string{"Dockerfile", "Containerfile", "dockerfile", "containerfile"} + + // Use GitHub Search API to find files matching Dockerfile patterns + for _, name := range dockerfileNames { + if len(dockerfiles) >= maxDockerfiles { + break + } + + // Search for this filename in the repository + query := strings.Join([]string{ + "repo:" + owner + "/" + repo, + "filename:" + name, + }, " ") + + results, _, err := client.Search.Code(ctx, query, &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 50, + Page: 1, + }, + }) + if err != nil { + log.Trace().Str("repository", owner+"/"+repo).Str("filename", name).Err(err).Msg("Error searching for Dockerfile") + continue + } + + if results.GetTotal() == 0 { + continue + } + + // Fetch each found file's content + for _, result := range results.CodeResults { + if len(dockerfiles) >= maxDockerfiles { + break + } + + path := result.GetPath() + fileContent, _, _, err := client.Repositories.GetContents(ctx, owner, repo, path, nil) + if err != nil { + log.Trace().Str("repository", owner+"/"+repo).Str("file", path).Err(err).Msg("Error fetching Dockerfile content") + continue + } + + if fileContent != nil { + dockerfiles = append(dockerfiles, DockerfileMatch{ + Path: path, + Content: fileContent, + }) + } + } + } + + return dockerfiles +} + +// checkIsMultistage checks if the Dockerfile uses multistage builds +func checkIsMultistage(fileContent *github.RepositoryContent) bool { + content, err := fileContent.GetContent() + if err != nil { + return false + } + + return sharedcontainer.IsMultistage(content) + +} + +func scanDockerfile(ctx context.Context, client *github.Client, repo *github.Repository, fileContent *github.RepositoryContent, fileName string, patterns []sharedcontainer.Pattern, isMultistage bool) { + log.Debug().Str("repository", repo.GetFullName()).Str("file", fileName).Msg("Scanning Dockerfile") + + content, err := fileContent.GetContent() + if err != nil { + log.Error().Str("repository", repo.GetFullName()).Str("file", fileName).Err(err).Msg("Failed to get file content") + return + } + + // Use shared scanner to find pattern matches + matches := sharedcontainer.ScanDockerfileForPatterns(content, patterns) + + for _, match := range matches { + finding := sharedcontainer.Finding{ + ProjectPath: repo.GetFullName(), + ProjectURL: repo.GetHTMLURL(), + FilePath: fileName, + FileName: fileName, + MatchedPattern: match.PatternName, + LineContent: match.MatchedLine, + IsMultistage: isMultistage, + } + + // Fetch registry metadata for the most recent container + finding.RegistryMetadata = fetchRegistryMetadata(ctx, client, repo) + + logFinding(finding) + } +} + +func logFinding(finding sharedcontainer.Finding) { + logEvent := log.WithLevel(zerolog.InfoLevel). + Str("url", finding.ProjectURL). + Str("file", finding.FilePath). + Str("content", finding.LineContent). + Bool("is_multistage", finding.IsMultistage) + + // Add registry metadata if available + if finding.RegistryMetadata != nil { + logEvent = logEvent. + Str("registry_tag", finding.RegistryMetadata.TagName). + Str("registry_last_update", finding.RegistryMetadata.LastUpdate) + } + + logEvent.Msg("Identified") +} + +// fetchRegistryMetadata retrieves metadata about the most recent container image in the repository's registry +func fetchRegistryMetadata(ctx context.Context, client *github.Client, repo *github.Repository) *sharedcontainer.RegistryMetadata { + owner := repo.GetOwner().GetLogin() + repoName := repo.GetName() + + // List container packages for the repository + packages, _, err := client.Organizations.ListPackages(ctx, owner, &github.PackageListOptions{ + PackageType: github.Ptr("container"), + }) + if err != nil { + log.Trace().Str("repository", repo.GetFullName()).Err(err).Msg("Error accessing container registry") + return nil + } + + if len(packages) == 0 { + log.Trace().Str("repository", repo.GetFullName()).Msg("No container packages found in registry") + return nil + } + + // Find package matching the repository name + var targetPackage *github.Package + for _, pkg := range packages { + if strings.Contains(strings.ToLower(pkg.GetName()), strings.ToLower(repoName)) { + targetPackage = pkg + break + } + } + + if targetPackage == nil { + // If no exact match, use the first package + targetPackage = packages[0] + } + + // Get package versions (tags) + versions, _, err := client.Organizations.PackageGetAllVersions(ctx, owner, "container", targetPackage.GetName(), &github.PackageListOptions{ + State: github.Ptr("active"), + }) + if err != nil || len(versions) == 0 { + log.Trace().Str("repository", repo.GetFullName()).Msg("No package versions found") + return nil + } + + // Find the most recent version + var mostRecentVersion *github.PackageVersion + for _, ver := range versions { + if ver.GetCreatedAt().Time.After(mostRecentVersion.GetCreatedAt().Time) || mostRecentVersion == nil { + mostRecentVersion = ver + } + } + + if mostRecentVersion == nil { + return nil + } + + metadata := &sharedcontainer.RegistryMetadata{ + TagName: extractTag(mostRecentVersion), + } + + if !mostRecentVersion.GetCreatedAt().IsZero() { + metadata.LastUpdate = mostRecentVersion.GetCreatedAt().Format("2006-01-02T15:04:05Z07:00") + } + + log.Trace(). + Str("repository", repo.GetFullName()). + Str("tag_name", metadata.TagName). + Str("last_update", metadata.LastUpdate). + Msg("Tag details from API") + + log.Debug(). + Str("repository", repo.GetFullName()). + Str("package", targetPackage.GetName()). + Str("tag", metadata.TagName). + Msg("Fetched registry metadata") + + return metadata +} + +func extractTag(version *github.PackageVersion) string { + if len(version.Metadata.Container.Tags) > 0 { + return version.Metadata.Container.Tags[0] + } + // Fallback to version name + return version.GetName() +} diff --git a/pkg/github/container/types.go b/pkg/github/container/types.go new file mode 100644 index 00000000..04db795e --- /dev/null +++ b/pkg/github/container/types.go @@ -0,0 +1,16 @@ +package container + +// ScanOptions contains all options for the container scan command +type ScanOptions struct { + GitHubUrl string + GitHubApiToken string + Owned bool + Member bool + Public bool + ProjectSearchQuery string + Page int + Repository string + Organization string + OrderBy string + DangerousPatterns string +} diff --git a/pkg/gitlab/container/patterns.go b/pkg/gitlab/container/patterns.go new file mode 100644 index 00000000..ab56fc6b --- /dev/null +++ b/pkg/gitlab/container/patterns.go @@ -0,0 +1,10 @@ +package container + +import ( + sharedcontainer "github.com/CompassSecurity/pipeleek/pkg/container" +) + +// DefaultPatterns returns the default dangerous patterns by delegating to the shared package +func DefaultPatterns() []sharedcontainer.Pattern { + return sharedcontainer.DefaultPatterns() +} diff --git a/pkg/gitlab/container/scanner.go b/pkg/gitlab/container/scanner.go new file mode 100644 index 00000000..083725de --- /dev/null +++ b/pkg/gitlab/container/scanner.go @@ -0,0 +1,352 @@ +package container + +import ( + "encoding/base64" + "strings" + "time" + + sharedcontainer "github.com/CompassSecurity/pipeleek/pkg/container" + "github.com/CompassSecurity/pipeleek/pkg/gitlab/util" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + gitlab "gitlab.com/gitlab-org/api/client-go" +) + +// RunScan performs the container scan with the given options +func RunScan(opts ScanOptions) { + git, err := util.GetGitlabClient(opts.GitlabApiToken, opts.GitlabUrl) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed creating gitlab client") + } + + validateOrderBy(opts.OrderBy) + + patterns := sharedcontainer.DefaultPatterns() + log.Info().Int("pattern_count", len(patterns)).Msg("Loaded container scan patterns") + + if opts.Repository != "" { + scanSingleProject(git, opts.Repository, patterns, opts) + } else if opts.Namespace != "" { + scanNamespace(git, opts.Namespace, patterns, opts) + } else { + fetchProjects(git, patterns, opts) + } + + log.Info().Msg("Container scan complete") +} + +func scanSingleProject(git *gitlab.Client, projectName string, patterns []sharedcontainer.Pattern, opts ScanOptions) { + log.Info().Str("repository", projectName).Msg("Scanning specific repository for dangerous container patterns") + project, resp, err := git.Projects.GetProject(projectName, &gitlab.GetProjectOptions{}) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed fetching project by repository name") + } + if resp.StatusCode == 404 { + log.Fatal().Msg("Project not found") + } + scanProject(git, project, patterns) +} + +func scanNamespace(git *gitlab.Client, namespace string, patterns []sharedcontainer.Pattern, opts ScanOptions) { + log.Info().Str("namespace", namespace).Msg("Scanning specific namespace for dangerous container patterns") + group, _, err := git.Groups.GetGroup(namespace, &gitlab.GetGroupOptions{}) + if err != nil { + log.Fatal().Stack().Err(err).Msg("Failed fetching namespace") + } + + projectOpts := &gitlab.ListGroupProjectsOptions{ + ListOptions: gitlab.ListOptions{ + PerPage: 100, + Page: int64(opts.Page), + }, + OrderBy: gitlab.Ptr(opts.OrderBy), + Owned: gitlab.Ptr(opts.Owned), + Search: gitlab.Ptr(opts.ProjectSearchQuery), + WithShared: gitlab.Ptr(true), + IncludeSubGroups: gitlab.Ptr(true), + } + + err = util.IterateGroupProjects(git, group.ID, projectOpts, func(project *gitlab.Project) error { + log.Debug().Str("url", project.WebURL).Msg("Check project") + scanProject(git, project, patterns) + return nil + }) + if err != nil { + log.Error().Stack().Err(err).Msg("Failed iterating group projects") + return + } + + log.Info().Msg("Fetched all namespace projects") +} + +func fetchProjects(git *gitlab.Client, patterns []sharedcontainer.Pattern, opts ScanOptions) { + log.Info().Msg("Fetching projects") + + projectOpts := &gitlab.ListProjectsOptions{ + ListOptions: gitlab.ListOptions{ + PerPage: 100, + Page: int64(opts.Page), + }, + OrderBy: gitlab.Ptr(opts.OrderBy), + Owned: gitlab.Ptr(opts.Owned), + Membership: gitlab.Ptr(opts.Member), + Search: gitlab.Ptr(opts.ProjectSearchQuery), + } + + err := util.IterateProjects(git, projectOpts, func(project *gitlab.Project) error { + log.Debug().Str("url", project.WebURL).Msg("Check project") + scanProject(git, project, patterns) + return nil + }) + if err != nil { + log.Error().Stack().Err(err).Msg("Failed iterating projects") + return + } + + log.Info().Msg("Fetched all projects") +} + +func scanProject(git *gitlab.Client, project *gitlab.Project, patterns []sharedcontainer.Pattern) { + log.Debug().Str("project", project.PathWithNamespace).Msg("Scanning project for Dockerfiles") + + // Find all Dockerfiles in the project recursively + dockerfiles := findDockerfiles(git, project) + + if len(dockerfiles) == 0 { + log.Trace().Str("project", project.PathWithNamespace).Msg("No Dockerfile or Containerfile found") + return + } + + log.Debug().Str("project", project.PathWithNamespace).Int("dockerfile_count", len(dockerfiles)).Msg("Found Dockerfiles") + + // Scan all found Dockerfiles + for _, dockerfile := range dockerfiles { + isMultistage := checkIsMultistage(dockerfile) + scanDockerfile(git, project, dockerfile, dockerfile.FileName, patterns, isMultistage) + } +} + +// findDockerfiles recursively searches for Dockerfile/Containerfile files up to 2 levels deep +func findDockerfiles(git *gitlab.Client, project *gitlab.Project) []*gitlab.File { + const maxDockerfiles = 50 // Limit to prevent scanning huge repos + const maxDepth = 2 // Only search up to 2 levels deep (root and 1 subfolder level) + + dockerfileNames := map[string]bool{ + "Dockerfile": true, + "Containerfile": true, + "dockerfile": true, + "containerfile": true, + } + + startTime := time.Now() + + var dockerfiles []*gitlab.File + + // Use recursive tree API to fetch entire tree at once with depth limit + treeOpts := &gitlab.ListTreeOptions{ + Recursive: gitlab.Ptr(true), + ListOptions: gitlab.ListOptions{ + PerPage: 100, + Page: 1, + }, + } + + tree, resp, err := git.Repositories.ListTree(project.ID, treeOpts) + if err != nil { + log.Trace().Str("project", project.PathWithNamespace).Err(err).Msg("Error listing recursive tree") + return dockerfiles + } + + if resp == nil || len(tree) == 0 { + log.Trace().Str("project", project.PathWithNamespace).Msg("No files found in tree") + return dockerfiles + } + + // Filter nodes by depth and match Dockerfile names + for _, node := range tree { + if len(dockerfiles) >= maxDockerfiles { + break + } + + // Only process files (blobs) + if node.Type != "blob" { + continue + } + + // Check depth: count slashes in path + // Root level = 0 slashes, first subdir = 1 slash, second subdir = 2 slashes + depth := strings.Count(node.Path, "/") + if depth > maxDepth-1 { + continue // Skip files deeper than maxDepth levels + } + + // Get just the filename from the path + parts := strings.Split(node.Path, "/") + fileName := parts[len(parts)-1] + + if dockerfileNames[fileName] { + // Fetch the file content + file, resp, err := git.RepositoryFiles.GetFile(project.ID, node.Path, &gitlab.GetFileOptions{Ref: gitlab.Ptr("HEAD")}) + if err != nil || resp.StatusCode != 200 { + log.Trace().Str("project", project.PathWithNamespace).Str("file", node.Path).Err(err).Msg("Error fetching Dockerfile") + continue + } + + // Store the path in FileName field + file.FileName = node.Path + dockerfiles = append(dockerfiles, file) + log.Trace().Str("project", project.PathWithNamespace).Str("file", node.Path).Msg("Found Dockerfile") + } + } + + elapsed := time.Since(startTime) + log.Debug().Str("project", project.PathWithNamespace).Int("found", len(dockerfiles)).Dur("elapsed_ms", elapsed).Msg("Dockerfile search complete") + return dockerfiles +} + +// checkIsMultistage checks if the Dockerfile uses multistage builds +func checkIsMultistage(file *gitlab.File) bool { + // Decode the file content + decodedContent, err := base64.StdEncoding.DecodeString(file.Content) + if err != nil { + return false + } + + return sharedcontainer.IsMultistage(string(decodedContent)) +} + +func scanDockerfile(git *gitlab.Client, project *gitlab.Project, file *gitlab.File, fileName string, patterns []sharedcontainer.Pattern, isMultistage bool) { + log.Debug().Str("project", project.PathWithNamespace).Str("file", fileName).Msg("Scanning Dockerfile") + + // The GitLab API returns file content as base64 encoded + decodedContent, err := base64.StdEncoding.DecodeString(file.Content) + if err != nil { + log.Error().Str("project", project.PathWithNamespace).Str("file", fileName).Err(err).Msg("Failed to decode file content") + return + } + + content := string(decodedContent) + + // Use shared scanner to find pattern matches + matches := sharedcontainer.ScanDockerfileForPatterns(content, patterns) + + for _, match := range matches { + finding := sharedcontainer.Finding{ + ProjectPath: project.PathWithNamespace, + ProjectURL: project.WebURL, + FilePath: fileName, + FileName: fileName, + MatchedPattern: match.PatternName, + LineContent: match.MatchedLine, + IsMultistage: isMultistage, + } + + // Fetch registry metadata for the most recent container + finding.RegistryMetadata = fetchRegistryMetadata(git, project) + + logFinding(finding) + } +} + +func logFinding(finding sharedcontainer.Finding) { + logEvent := log.WithLevel(zerolog.InfoLevel). + Str("url", finding.ProjectURL). + Str("file", finding.FilePath). + Str("content", finding.LineContent). + Bool("is_multistage", finding.IsMultistage) + + // Add registry metadata if available + if finding.RegistryMetadata != nil { + logEvent = logEvent. + Str("registry_tag", finding.RegistryMetadata.TagName). + Str("registry_last_update", finding.RegistryMetadata.LastUpdate) + } + + logEvent.Msg("Identified") +} + +// fetchRegistryMetadata retrieves metadata about the most recent container image in the project's registry +func fetchRegistryMetadata(git *gitlab.Client, project *gitlab.Project) *sharedcontainer.RegistryMetadata { + startTime := time.Now() + + // List container repositories for the project + repos, resp, err := git.ContainerRegistry.ListProjectRegistryRepositories(project.ID, &gitlab.ListProjectRegistryRepositoriesOptions{ + ListOptions: gitlab.ListOptions{ + PerPage: 10, + Page: 1, + }, + }) + if err != nil { + log.Trace().Str("project", project.PathWithNamespace).Err(err).Msg("Error accessing container registry") + return nil + } + if resp != nil && resp.StatusCode != 200 { + log.Trace().Str("project", project.PathWithNamespace).Int("status", resp.StatusCode).Msg("Container registry not accessible") + return nil + } + + if len(repos) == 0 { + log.Trace().Str("project", project.PathWithNamespace).Msg("No container repositories found in registry") + return nil + } + + // Get the first repository (most recent activity) + repo := repos[0] + + // List tags for this repository (use list data directly, no per-tag detail calls) + tags, resp, err := git.ContainerRegistry.ListRegistryRepositoryTags(project.ID, repo.ID, &gitlab.ListRegistryRepositoryTagsOptions{ + ListOptions: gitlab.ListOptions{ + PerPage: 100, + Page: 1, + }, + }) + if err != nil || resp.StatusCode != 200 || len(tags) == 0 { + log.Trace().Str("project", project.PathWithNamespace).Str("repo", repo.Path).Msg("No tags found in registry repository") + return nil + } + + // Find the most recent tag using data from the list (no per-tag detail calls) + var mostRecentTag *gitlab.RegistryRepositoryTag + for _, t := range tags { + if t.CreatedAt != nil { + if mostRecentTag == nil || (mostRecentTag.CreatedAt != nil && t.CreatedAt.After(*mostRecentTag.CreatedAt)) { + mostRecentTag = t + } + } + } + + if mostRecentTag == nil { + log.Trace().Str("project", project.PathWithNamespace).Str("repo", repo.Path).Msg("No tags with timestamps found") + return nil + } + + metadata := &sharedcontainer.RegistryMetadata{ + TagName: mostRecentTag.Name, + } + + // Format the timestamp + if mostRecentTag.CreatedAt != nil { + metadata.LastUpdate = mostRecentTag.CreatedAt.Format("2006-01-02T15:04:05Z07:00") + } + + elapsed := time.Since(startTime) + log.Debug(). + Str("project", project.PathWithNamespace). + Str("repo", repo.Path). + Str("tag", mostRecentTag.Name). + Str("last_update", metadata.LastUpdate). + Dur("elapsed_ms", elapsed). + Msg("Fetched registry metadata") + + return metadata +} + +func validateOrderBy(orderBy string) { + validValues := map[string]bool{ + "id": true, "name": true, "path": true, "created_at": true, + "updated_at": true, "star_count": true, "last_activity_at": true, "similarity": true, + } + if !validValues[orderBy] { + log.Fatal().Str("order_by", orderBy).Msg("Invalid order-by value") + } +} diff --git a/pkg/gitlab/container/types.go b/pkg/gitlab/container/types.go new file mode 100644 index 00000000..628b4a05 --- /dev/null +++ b/pkg/gitlab/container/types.go @@ -0,0 +1,16 @@ +package container + +// ScanOptions contains all options for the container scan command +type ScanOptions struct { + GitlabUrl string + GitlabApiToken string + Owned bool + Member bool + ProjectSearchQuery string + Page int + Repository string + Namespace string + OrderBy string + DangerousPatterns string + MinAccessLevel int +} diff --git a/tests/e2e/github/container/container_test.go b/tests/e2e/github/container/container_test.go new file mode 100644 index 00000000..c23fbe68 --- /dev/null +++ b/tests/e2e/github/container/container_test.go @@ -0,0 +1,476 @@ +//go:build e2e + +package container + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/CompassSecurity/pipeleek/tests/e2e/internal/testutil" + "github.com/stretchr/testify/assert" +) + +// TestContainerScanBasic tests basic container scan functionality with a mock GitHub server +func TestContainerScanBasic(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Search repositories endpoint + if strings.Contains(r.URL.Path, "/search/repositories") { + searchResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"id": 1, +"name": "dangerous-app", +"full_name": "test-user/dangerous-app", +"html_url": "http://localhost/test-user/dangerous-app", +"owner": { +"login": "test-user" +} +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(searchResultJSON)) + return + } + + // Search code endpoint (find Dockerfile) + if strings.Contains(r.URL.Path, "/search/code") { + codeResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"name": "Dockerfile", +"path": "Dockerfile", +"sha": "abc123", +"url": "http://localhost/test-user/dangerous-app/contents/Dockerfile", +"repository": { +"id": 1, +"name": "dangerous-app", +"full_name": "test-user/dangerous-app" +} +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(codeResultJSON)) + return + } + + // Get repository endpoint + if strings.Contains(r.URL.Path, "/repos/test-user/dangerous-app") && + !strings.Contains(r.URL.Path, "/contents") { + repoJSON := `{ +"id": 1, +"name": "dangerous-app", +"full_name": "test-user/dangerous-app", +"html_url": "http://localhost/test-user/dangerous-app", +"owner": { +"login": "test-user", +"type": "User" +} +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(repoJSON)) + return + } + + // Get file contents endpoint - return base64 encoded dangerous Dockerfile + if strings.Contains(r.URL.Path, "/repos/test-user/dangerous-app/contents/Dockerfile") { + fileJSON := `{ +"name": "Dockerfile", +"path": "Dockerfile", +"sha": "abc123", +"size": 150, +"type": "file", +"encoding": "base64", +"content": "RlJPTSB1YnVudHU6MjIuMDQKUlVOIGFwdC1nZXQgdXBkYXRlICYmIGFwdC1nZXQgaW5zdGFsbCAteSBjdXJsCkNPUFkgLiAvYXBwCldPUktESVIgL2FwcApSVU4gLi9pbnN0YWxsLnNoCkVOVFJZUE9JTlQgWyIuL3N0YXJ0LnNoIl0=" +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(fileJSON)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + "--token", "test-token", + "--public", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") + assert.Contains(t, output, "test-user/dangerous-app") +} + +// TestContainerScanOwned tests scanning only owned repositories +func TestContainerScanOwned(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Repository search endpoint + if strings.Contains(r.URL.Path, "/search/repositories") { + searchResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"id": 1, +"name": "my-repo", +"full_name": "test-user/my-repo", +"html_url": "http://localhost/test-user/my-repo", +"owner": { +"login": "test-user" +} +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(searchResultJSON)) + return + } + + // Code search endpoint + if strings.Contains(r.URL.Path, "/search/code") { + codeResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"name": "Dockerfile", +"path": "Dockerfile" +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(codeResultJSON)) + return + } + + // Get repository endpoint + if strings.Contains(r.URL.Path, "/repos/test-user/my-repo") && + !strings.Contains(r.URL.Path, "/contents") { + repoJSON := `{ +"id": 1, +"name": "my-repo", +"full_name": "test-user/my-repo", +"html_url": "http://localhost/test-user/my-repo", +"owner": { +"login": "test-user", +"type": "User" +} +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(repoJSON)) + return + } + + // File contents endpoint + if strings.Contains(r.URL.Path, "/repos/test-user/my-repo/contents/Dockerfile") { + fileJSON := `{ +"name": "Dockerfile", +"path": "Dockerfile", +"encoding": "base64", +"content": "RlJPTSB1YnVudHUKQ09QWSAuIC8KUlVOIGVjaG8gZG9uZQ==" +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(fileJSON)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + "--token", "test-token", + "--owned", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") +} + +// TestContainerScanOrganization tests scanning a specific organization +func TestContainerScanOrganization(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Organization repositories endpoint + if strings.Contains(r.URL.Path, "/orgs/my-org/repos") { + reposJSON := `[ +{ +"id": 1, +"name": "test-project", +"full_name": "my-org/test-project", +"html_url": "http://localhost/my-org/test-project", +"owner": { +"login": "my-org", +"type": "Organization" +} +} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(reposJSON)) + return + } + + // Code search endpoint + if strings.Contains(r.URL.Path, "/search/code") { + codeResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"name": "Dockerfile", +"path": "Dockerfile" +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(codeResultJSON)) + return + } + + // File contents endpoint + if strings.Contains(r.URL.Path, "/repos/my-org/test-project/contents/Dockerfile") { + fileJSON := `{ +"name": "Dockerfile", +"path": "Dockerfile", +"encoding": "base64", +"content": "RlJPTSBhbHBpbmUKQ09QWSAuIC90ZXN0CkNNRCBbXCIvYmluL3NoXCJd" +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(fileJSON)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + "--token", "test-token", + "--organization", "my-org", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") + assert.Contains(t, output, "my-org/test-project") +} + +// TestContainerScanSingleRepo tests scanning a single repository +func TestContainerScanSingleRepo(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Single repository endpoint + if strings.Contains(r.URL.Path, "/repos/test-user/test-repo") && + !strings.Contains(r.URL.Path, "/contents") { + repoJSON := `{ +"id": 1, +"name": "test-repo", +"full_name": "test-user/test-repo", +"html_url": "http://localhost/test-user/test-repo", +"owner": { +"login": "test-user", +"type": "User" +} +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(repoJSON)) + return + } + + // Code search endpoint + if strings.Contains(r.URL.Path, "/search/code") { + codeResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"name": "Dockerfile", +"path": "Dockerfile" +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(codeResultJSON)) + return + } + + // File contents endpoint + if strings.Contains(r.URL.Path, "/repos/test-user/test-repo/contents/Dockerfile") { + fileJSON := `{ +"name": "Dockerfile", +"path": "Dockerfile", +"encoding": "base64", +"content": "RlJPTSB1YnVudHUKQUREIC4gL2FwcApSVU4gbWFrZSBidWlsZA==" +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(fileJSON)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + "--token", "test-token", + "--repo", "test-user/test-repo", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") + assert.Contains(t, output, "test-user/test-repo") +} + +// TestContainerScanNoDockerfile tests handling of repositories without Dockerfile +func TestContainerScanNoDockerfile(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Repository search endpoint + if strings.Contains(r.URL.Path, "/search/repositories") { + searchResultJSON := `{ +"total_count": 1, +"incomplete_results": false, +"items": [ +{ +"id": 1, +"name": "no-docker", +"full_name": "test-user/no-docker", +"html_url": "http://localhost/test-user/no-docker", +"owner": { +"login": "test-user" +} +} +] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(searchResultJSON)) + return + } + + // Code search endpoint - no Dockerfile found + if strings.Contains(r.URL.Path, "/search/code") { + codeResultJSON := `{ +"total_count": 0, +"incomplete_results": false, +"items": [] +}` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(codeResultJSON)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + "--token", "test-token", + "--public", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Container scan complete") + // Should not find any dangerous patterns + assert.NotContains(t, output, "Identified") +} + +// TestContainerScanMissingToken tests when required token is missing +func TestContainerScanMissingToken(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gh", "container", "artipacked", + "--github", server.URL, + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.NotNil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "required configuration missing") +} diff --git a/tests/e2e/gitlab/container/container_test.go b/tests/e2e/gitlab/container/container_test.go new file mode 100644 index 00000000..0af09049 --- /dev/null +++ b/tests/e2e/gitlab/container/container_test.go @@ -0,0 +1,473 @@ +//go:build e2e + +package container + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/CompassSecurity/pipeleek/tests/e2e/internal/testutil" + "github.com/stretchr/testify/assert" +) + +// TestContainerScanBasic tests basic container scan functionality with a mock GitLab server +func TestContainerScanBasic(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Project listing endpoint + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/repository/files") && + !strings.Contains(r.URL.Path, "/repository/tree") { + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "test-user/dangerous-app", +"web_url": "http://localhost/test-user/dangerous-app" +}, +{ +"id": 2, +"path_with_namespace": "test-user/safe-app", +"web_url": "http://localhost/test-user/safe-app" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "2") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + // Repository tree endpoint - returns list of files in repo + if strings.Contains(r.URL.Path, "/repository/tree") { + if strings.Contains(r.URL.Path, "/1/") { + // dangerous-app has Dockerfile at root + treeJSON := `[ +{"id":"abc123","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + if strings.Contains(r.URL.Path, "/2/") { + // safe-app has Dockerfile at root + treeJSON := `[ +{"id":"def456","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + } + + // Dockerfile fetch endpoint + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + if strings.Contains(r.URL.Path, "/1/") { + // dangerous-app has dangerous Dockerfile + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + // Properly encode the response (must be base64) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","size":150,"content":"RlJPTSB1YnVudHU6MjIuMDQKUlVOIGFwdC1nZXQgdXBkYXRlICYmIGFwdC1nZXQgaW5zdGFsbCAteSBjdXJsCkNPUFkgLiAvYXBwCldPUktESVIgL2FwcApSVU4gLi9pbnN0YWxsLnNoCkVOVFJZUE9JTlQgWyIuL3N0YXJ0LnNoIl0="}`)) + return + } + if strings.Contains(r.URL.Path, "/2/") { + // safe-app has safe Dockerfile + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","size":100,"content":"RlJPTSB1YnVudHU6MjIuMDQKUlVOIGFwdC1nZXQgdXBkYXRlICYmIGFwdC1nZXQgaW5zdGFsbCAteSBjdXJsCkNPUFkgcmVxdWlyZW1lbnRzLnR4dCAvYXBwLwpXT1JLRElSIC9hcHAKUlVOIHBpcCBpbnN0YWxsIC1yIHJlcXVpcmVtZW50cy50eHQKQ01EIFsicHl0aG9uIiwgImFwcC5weSJd"}`)) + return + } + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") + assert.Contains(t, output, "test-user/dangerous-app") +} + +// TestContainerScanOwned tests scanning only owned projects +func TestContainerScanOwned(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/repository/files") && + !strings.Contains(r.URL.Path, "/repository/tree") { + // Check if owned=true is in query params + if !strings.Contains(r.URL.RawQuery, "owned=true") { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"message": "owned param required"}`)) + return + } + + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "test-user/my-project", +"web_url": "http://localhost/test-user/my-project" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "1") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + // Repository tree endpoint + if strings.Contains(r.URL.Path, "/repository/tree") { + treeJSON := `[ +{"id":"abc123","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","size":100,"content":"RlJPTSB1YnVudHUKQ09QWSAuIC8KUlVOIGVjaG8gZG9uZQ=="}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + "--owned", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") +} + +// TestContainerScanNamespace tests scanning a specific namespace +func TestContainerScanNamespace(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Group endpoint + if strings.Contains(r.URL.Path, "/api/v4/groups/my-group") && + !strings.Contains(r.URL.Path, "/projects") { + groupJSON := `{"id": 10, "name": "my-group", "path": "my-group"}` + w.WriteHeader(http.StatusOK) + w.Write([]byte(groupJSON)) + return + } + + // Group projects endpoint + if strings.Contains(r.URL.Path, "/api/v4/groups") && + strings.Contains(r.URL.Path, "/projects") { + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "my-group/test-project", +"web_url": "http://localhost/my-group/test-project" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "1") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + // Repository tree endpoint + if strings.Contains(r.URL.Path, "/repository/tree") { + treeJSON := `[ +{"id":"abc123","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + + // Dockerfile endpoint + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","content":"RlJPTSBhbHBpbmUKQ09QWSAuIC90ZXN0CkNNRCBbXCIvYmluL3NoXCJd"}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + "--namespace", "my-group", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Scanning specific namespace") + assert.Contains(t, output, "Identified") +} + +// TestContainerScanSingleRepo tests scanning a single repository +func TestContainerScanSingleRepo(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Single project endpoint - GitLab API uses /projects/:id where :id can be URL-encoded path + if strings.Contains(r.URL.Path, "/api/v4/projects/") && + !strings.Contains(r.URL.Path, "/repository/files") && + !strings.Contains(r.URL.Path, "/repository/tree") { + // Return the project when the ID is requested + projectJSON := `{ +"id": 1, +"path_with_namespace": "test-user/test-repo", +"web_url": "http://localhost/test-user/test-repo" +}` + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectJSON)) + return + } + + // Repository tree endpoint + if strings.Contains(r.URL.Path, "/repository/tree") { + treeJSON := `[ +{"id":"abc123","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + + // Dockerfile endpoint + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","content":"RlJPTSB1YnVudHUKQUREIC4gL2FwcApSVU4gbWFrZSBidWlsZA=="}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + "--repo", "test-user/test-repo", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Scanning specific repository") + assert.Contains(t, output, "Identified") +} + +// TestContainerScanNoDockerfile tests handling of projects without Dockerfile +func TestContainerScanNoDockerfile(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/repository/files") { + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "test-user/no-docker", +"web_url": "http://localhost/test-user/no-docker" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "1") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + // No Dockerfile found + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 File Not Found"}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Container scan complete") + // Should not find any dangerous patterns + assert.NotContains(t, output, "Identified") +} + +// TestContainerScanInvalidURL tests with invalid GitLab URL +func TestContainerScanInvalidURL(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", "https://gitlab.example.com", + "--token", "test-token", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + // Should fail due to network error (unreachable host) + assert.NotNil(t, exitErr) +} + +// TestContainerScanMissingToken tests when required token is missing +func TestContainerScanMissingToken(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", "https://gitlab.example.com", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.NotNil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "required configuration missing") +} + +// TestContainerScanWithSearch tests filtering projects by search query +func TestContainerScanWithSearch(t *testing.T) { + if testing.Short() { + t.Skip("Skipping e2e test in short mode") + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v4/projects") && + !strings.Contains(r.URL.Path, "/repository/files") && + !strings.Contains(r.URL.Path, "/repository/tree") { + // Check for search parameter + if !strings.Contains(r.URL.RawQuery, "search=app") { + w.WriteHeader(http.StatusBadRequest) + return + } + + projectsJSON := `[ +{ +"id": 1, +"path_with_namespace": "test-user/my-app", +"web_url": "http://localhost/test-user/my-app" +} +]` + w.Header().Set("X-Page", "1") + w.Header().Set("X-Per-Page", "100") + w.Header().Set("X-Total", "1") + w.Header().Set("X-Total-Pages", "1") + w.WriteHeader(http.StatusOK) + w.Write([]byte(projectsJSON)) + return + } + + // Repository tree endpoint + if strings.Contains(r.URL.Path, "/repository/tree") { + treeJSON := `[ +{"id":"abc123","name":"Dockerfile","type":"blob","path":"Dockerfile","mode":"100644"} +]` + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(treeJSON)) + return + } + + if strings.Contains(r.URL.Path, "/repository/files") && strings.Contains(r.URL.Path, "Dockerfile") { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"file_name":"Dockerfile","file_path":"Dockerfile","content":"RlJPTSBub2RlCkNPUFkgLiAvc3JjClJVTiBucG0gaW5zdGFsbA=="}`)) + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"message": "404 Not Found"}`)) + })) + defer server.Close() + + stdout, stderr, exitErr := testutil.RunCLI(t, []string{ + "gl", "container", "artipacked", + "--gitlab", server.URL, + "--token", "test-token", + "--search", "app", + }, nil, 10*time.Second) + + t.Logf("STDOUT:\n%s", stdout) + t.Logf("STDERR:\n%s", stderr) + + assert.Nil(t, exitErr) + output := stdout + stderr + assert.Contains(t, output, "Identified") +}