✨ Check for shell script's insecure download (#606)

* draft * commit 2 * debug code * draft * draft * rem debug code * fix return value * rename function * add license * typos * fixes * fix suffix * comments
2024-11-04 03:52:31 +03:00 · 2021-06-24 10:24:14 -07:00 · 2021-06-24 10:24:14 -07:00 · 4b1c574420
commit 4b1c574420
parent ece69b2256
6 changed files with 374 additions and 252 deletions
--- a/checks/frozen_deps.go
+++ b/checks/frozen_deps.go
@ -47,11 +47,25 @@ func FrozenDeps(c *checker.CheckRequest) checker.CheckResult {
 		isGitHubActionsWorkflowPinned,
 		isDockerfilePinned,
 		isDockerfileFreeOfInsecureDownloads,
+		isShellScriptFreeOfInsecureDownloads,
 	)(c)
 }

 // TODO(laurent): need to support GCB pinning.

+func isShellScriptFreeOfInsecureDownloads(c *checker.CheckRequest) checker.CheckResult {
+	return CheckFilesContent(CheckFrozenDeps, "*", false, c, validateShellScriptDownloads)
+}
+
+func validateShellScriptDownloads(pathfn string, content []byte,
+	logf func(s string, f ...interface{})) (bool, error) {
+	// Validate the file type.
+	if !isShellScriptFile(pathfn, content) {
+		return true, nil
+	}
+	return validateShellFile(pathfn, content, logf)
+}
+
 func isDockerfileFreeOfInsecureDownloads(c *checker.CheckRequest) checker.CheckResult {
 	return CheckFilesContent(CheckFrozenDeps, "*Dockerfile*", false, c, validateDockerfileDownloads)
 }
@ -64,8 +78,8 @@ func validateDockerfileDownloads(pathfn string, content []byte,
 		return false, fmt.Errorf("cannot read dockerfile content: %w", err)
 	}

-	ret := true
-	files := make(map[string]bool)
+	// nolinter:prealloc
+	var bytes []byte

 	// Walk the Dockerfile's AST.
 	for _, child := range res.AST.Children {
@ -84,26 +98,12 @@ func validateDockerfileDownloads(pathfn string, content []byte,
 			return false, ErrParsingDockerfile
 		}

-		// Validate the command.
+		// Build a file content.
 		cmd := strings.Join(valueList, " ")
-		r, err := validateShellCommand(cmd, pathfn, files, logf)
-		if err != nil {
-			return false, err
-		} else if !r {
-			ret = false
-		}
-
-		// Record the name of downloaded file, if any.
-		fn, b, err := recordFetchFileFromString(cmd)
-		if err != nil {
-			return false, err
-		} else if b {
-			for f := range fn {
-				files[f] = true
-			}
-		}
+		bytes = append(bytes, cmd...)
+		bytes = append(bytes, '\n')
 	}
-	return ret, nil
+	return validateShellFile(pathfn, bytes, logf)
 }

 func isDockerfilePinned(c *checker.CheckRequest) checker.CheckResult {
--- a/checks/frozen_deps_test.go
+++ b/checks/frozen_deps_test.go
@ -359,3 +359,86 @@ func TestDockerfileScriptDownload(t *testing.T) {
 		})
 	}
 }
+
+func TestShellScriptDownload(t *testing.T) {
+	t.Parallel()
+	//nolint
+	type args struct {
+		// Note: this seems to be defined in e2e/e2e_suite_test.go
+		Log      log
+		Filename string
+	}
+
+	type returnValue struct {
+		Error          error
+		Result         bool
+		NumberOfErrors int
+	}
+
+	//nolint
+	tests := []struct {
+		args args
+		want returnValue
+		name string
+	}{
+		{
+			name: "sh script",
+			args: args{
+				Filename: "testdata/script-sh",
+				Log:      log{},
+			},
+			want: returnValue{
+				Error:          nil,
+				Result:         false,
+				NumberOfErrors: 7,
+			},
+		},
+		{
+			name: "bash script",
+			args: args{
+				Filename: "testdata/script-bash",
+				Log:      log{},
+			},
+			want: returnValue{
+				Error:          nil,
+				Result:         false,
+				NumberOfErrors: 7,
+			},
+		},
+		{
+			name: "sh script 2",
+			args: args{
+				Filename: "testdata/script.sh",
+				Log:      log{},
+			},
+			want: returnValue{
+				Error:          nil,
+				Result:         false,
+				NumberOfErrors: 7,
+			},
+		},
+	}
+	//nolint
+	for _, tt := range tests {
+		tt := tt // Re-initializing variable so it is not changed while executing the closure below
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			var content []byte
+			var err error
+			content, err = ioutil.ReadFile(tt.args.Filename)
+			if err != nil {
+				panic(fmt.Errorf("cannot read file: %w", err))
+			}
+
+			r, err := validateShellScriptDownloads(tt.args.Filename, content, tt.args.Log.Logf)
+
+			if !errors.Is(err, tt.want.Error) ||
+				r != tt.want.Result ||
+				len(tt.args.Log.messages) != tt.want.NumberOfErrors {
+				t.Errorf("TestDockerfileScriptDownload:\"%v\": %v (%v,%v,%v) want (%v, %v, %v)\n%v",
+					tt.name, tt.args.Filename, r, err, len(tt.args.Log.messages), tt.want.Result, tt.want.Error, tt.want.NumberOfErrors,
+					strings.Join(tt.args.Log.messages, "\n"))
+			}
+		})
+	}
+}
--- a/checks/shell_download_validate.go
+++ b/checks/shell_download_validate.go
@ -15,6 +15,8 @@
 package checks

 import (
+	"bufio"
+	"bytes"
 	"errors"
 	"fmt"
 	"net/url"
@ -45,10 +47,22 @@ var downloadUtils = []string{
 	"curl", "wget", "gsutil",
 }

+var shellNames = []string{
+	"sh", "bash", "dash", "ksh",
+}
+
 func isBinaryName(expected, name string) bool {
 	return strings.EqualFold(path.Base(name), expected)
 }

+func isExecuteFile(cmd []string, fn string) bool {
+	if len(cmd) == 0 {
+		return false
+	}
+
+	return strings.EqualFold(filepath.Clean(cmd[0]), filepath.Clean(fn))
+}
+
 // see https://serverfault.com/questions/226386/wget-a-script-and-run-it/890417.
 func isDownloadUtility(cmd []string) bool {
 	if len(cmd) == 0 {
@ -216,14 +230,6 @@ func isInterpreterWithFile(cmd []string, fn string) bool {
 	return false
 }

-func isExecuteFile(cmd []string, fn string) bool {
-	if len(cmd) == 0 {
-		return false
-	}
-
-	return strings.EqualFold(filepath.Clean(cmd[0]), filepath.Clean(fn))
-}
-
 func extractCommand(cmd interface{}) ([]string, bool) {
 	c, ok := cmd.(*syntax.CallExpr)
 	if !ok {
@ -429,27 +435,6 @@ func isUnpinnedPakageManagerDownload(node syntax.Node, cmd, pathfn string,
 	return false
 }

-// Detect `fetch | exec`.
-func validateCommandIsNotFetchPipeExecute(cmd, pathfn string, logf func(s string, f ...interface{})) (bool, error) {
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return false, ErrParsingShellCommand
-	}
-
-	cmdValidated := true
-	syntax.Walk(f, func(node syntax.Node) bool {
-		if isFetchPipeExecute(node, cmd, pathfn, logf) {
-			cmdValidated = false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-
-	return cmdValidated, nil
-}
-
 func recordFetchFileFromNode(node syntax.Node) (pathfn string, ok bool, err error) {
 	ss, ok := node.(*syntax.Stmt)
 	if !ok {
@ -473,42 +458,7 @@ func recordFetchFileFromNode(node syntax.Node) (pathfn string, ok bool, err erro
 	return fn, true, nil
 }

-func validateCommandIsNotFetchToFileExecute(cmd, pathfn string, logf func(s string, f ...interface{})) (bool, error) {
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return false, ErrParsingShellCommand
-	}
-
-	cmdValidated := true
-	files := make(map[string]bool)
-	syntax.Walk(f, func(node syntax.Node) bool {
-		if err != nil {
-			return false
-		}
-
-		// Record the file that is downloaded, if any.
-		fn, b, e := recordFetchFileFromNode(node)
-		if e != nil {
-			err = e
-			return false
-		} else if b {
-			files[fn] = true
-		}
-
-		// Check if we're calling a file we previously downloaded.
-		if isExecuteFiles(node, cmd, pathfn, files, logf) {
-			cmdValidated = false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-
-	return cmdValidated, err
-}
-
-func isFetchStdinExecute(node syntax.Node, cmd, pathfn string,
+func isFetchProcSubsExecute(node syntax.Node, cmd, pathfn string,
 	logf func(s string, f ...interface{})) bool {
 	ce, ok := node.(*syntax.CallExpr)
 	if !ok {
@ -599,49 +549,6 @@ func extractInterpreterCommandFromArgs(args []*syntax.Word) (string, bool) {
 	return "", false
 }

-func validateCommandIsNotFetchToStdinExecute(cmd, pathfn string,
-	logf func(s string, f ...interface{})) (bool, error) {
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return false, ErrParsingShellCommand
-	}
-
-	cmdValidated := true
-	syntax.Walk(f, func(node syntax.Node) bool {
-		if isFetchStdinExecute(node, cmd, pathfn, logf) {
-			cmdValidated = false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-
-	return cmdValidated, nil
-}
-
-func validateCommandIsNotFileExecute(cmd, pathfn string, files map[string]bool,
-	logf func(s string, f ...interface{})) (bool, error) {
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return false, ErrParsingShellCommand
-	}
-
-	cmdValidated := true
-	syntax.Walk(f, func(node syntax.Node) bool {
-		// Check if we're calling a file we previously downloaded.
-		if isExecuteFiles(node, cmd, pathfn, files, logf) {
-			cmdValidated = false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-
-	return cmdValidated, nil
-}
-
 func extractInterpreterCommandFromNode(node syntax.Node) (string, bool) {
 	ce, ok := node.(*syntax.CallExpr)
 	if !ok {
@ -665,79 +572,73 @@ func extractInterpreterCommandFromNode(node syntax.Node) (string, bool) {
 	return cs, true
 }

-func extractInterpreterCommandFromString(cmd string) (c string, res bool, err error) {
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return "", false, ErrParsingShellCommand
+func nodeToString(p *syntax.Printer, node syntax.Node) (string, error) {
+	// https://github.com/mvdan/sh/blob/24dd9930bc1cfc7be025f8b75b2e9e9f04524012/syntax/printer.go#L135.
+	var buf bytes.Buffer
+	err := p.Print(&buf, node)
+	// This is ugly, but the parser does not have a defined error type :/.
+	if err != nil && !strings.Contains(err.Error(), "unsupported node type") {
+		return "", fmt.Errorf("syntax.Printer.Print: %w", err)
 	}
-
-	cs := ""
-	ok := false
-	syntax.Walk(f, func(node syntax.Node) bool {
-		if ok {
-			return false
-		}
-		command, commandFound := extractInterpreterCommandFromNode(node)
-		// nolinter
-		if commandFound {
-			cs = command
-			ok = commandFound
-			return false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-	return cs, ok, nil
+	return buf.String(), nil
 }

-func validateCommandIsNotUnpinnedPackageManagerDownload(cmd, pathfn string,
+func validateShellFileAndRecord(pathfn string, content []byte, files map[string]bool,
 	logf func(s string, f ...interface{})) (bool, error) {
-	in := strings.NewReader(cmd)
+	in := strings.NewReader(string(content))
 	f, err := syntax.NewParser().Parse(in, "")
 	if err != nil {
 		return false, ErrParsingShellCommand
 	}

-	cmdValidated := true
+	printer := syntax.NewPrinter()
+	validated := true
+
 	syntax.Walk(f, func(node syntax.Node) bool {
-		// Check if we're calling a file we previously downloaded.
-		if isUnpinnedPakageManagerDownload(node, cmd, pathfn, logf) {
-			cmdValidated = false
-		}
-
-		// Continue walking the node graph.
-		return true
-	})
-
-	return cmdValidated, nil
-}
-
-// The functions below are the only ones that should be called by other files.
-// There needs to be a call to extractInterpreterCommandFromString() prior
-// to calling other functions.
-
-func recordFetchFileFromString(cmd string) (fmap map[string]bool, ok bool, err error) {
-	// Check if the command is calling an interpreter with a string command.
-	c, ok, err := extractInterpreterCommandFromString(cmd)
-	if err != nil {
-		return nil, false, err
-	} else if ok {
-		cmd = c
-	}
-
-	in := strings.NewReader(cmd)
-	f, err := syntax.NewParser().Parse(in, "")
-	if err != nil {
-		return nil, false, ErrParsingShellCommand
-	}
-
-	files := make(map[string]bool)
-	syntax.Walk(f, func(node syntax.Node) bool {
-		if err != nil {
+		cmdStr, e := nodeToString(printer, node)
+		if e != nil {
+			err = e
 			return false
 		}
+
+		// sh -c "CMD".
+		c, ok := extractInterpreterCommandFromNode(node)
+		// nolinter
+		if ok {
+			ok, e := validateShellFileAndRecord(pathfn, []byte(c), files, logf)
+			validated = ok
+			if e != nil {
+				err = e
+				return true
+			}
+		}
+
+		// `curl | bash` (supports `sudo`).
+		if isFetchPipeExecute(node, cmdStr, pathfn, logf) {
+			validated = false
+		}
+
+		// Check if we're calling a file we previously downloaded.
+		// Includes `curl > /tmp/file [&&|;] [bash] /tmp/file`
+		if isExecuteFiles(node, cmdStr, pathfn, files, logf) {
+			validated = false
+		}
+
+		// `bash <(wget -qO- http://website.com/my-script.sh)`. (supports `sudo`).
+		if isFetchProcSubsExecute(node, cmdStr, pathfn, logf) {
+			validated = false
+		}
+
+		// Package manager's unpinned installs.
+		if isUnpinnedPakageManagerDownload(node, cmdStr, pathfn, logf) {
+			validated = false
+		}
+		// TODO(laurent): add check for cat file | bash
+		// TODO(laurent): detect downloads of zip/tar files containing scripts.
+		// TODO(laurent): detect command being an env variable
+		// TODO(laurent): detect unpinned git clone and package manager downloads (go get/install).
+
+		// Record the file that is downloaded, if any.
 		fn, b, e := recordFetchFileFromNode(node)
 		if e != nil {
 			err = e
@ -750,74 +651,55 @@ func recordFetchFileFromString(cmd string) (fmap map[string]bool, ok bool, err e
 		return true
 	})

-	return files, true, err
+	return validated, err
 }

-func validateShellCommand(cmd, pathfn string, downloadedFiles map[string]bool,
-	logf func(s string, f ...interface{})) (bool, error) {
-	ret := true
+// The functions below are the only ones that should be called by other files.
+// There needs to be a call to extractInterpreterCommandFromString() prior
+// to calling other functions.

-	// First, check if the command launches an interpreter with
-	// a string, such as `bash -c "CMD"`, `python -c "CMD"`, `su -c "CMD"`, etc.
-	c, ok, err := extractInterpreterCommandFromString(cmd)
-	if err != nil {
-		return false, err
-	} else if ok {
-		cmd = c
+func isShellScriptFile(pathfn string, content []byte) bool {
+	r := strings.NewReader(string(content))
+	scanner := bufio.NewScanner(r)
+
+	// Check file extension first.
+	for _, name := range shellNames {
+		// Look at the prefix.
+		if strings.HasSuffix(pathfn, "."+name) {
+			return true
+		}
 	}

-	r, err := validateCommandIsNotUnpinnedPackageManagerDownload(cmd, pathfn, logf)
-	if err != nil {
-		return false, err
-	} else if !r {
-		ret = false
+	// Look at file content.
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		//  #!/bin/XXX, #!XXX, #!/usr/bin/env XXX, #!env XXX
+		if !strings.HasPrefix(line, "#!") {
+			continue
+		}
+
+		for _, name := range shellNames {
+			line = line[2:]
+			parts := strings.Split(line, " ")
+			// #!/bin/bash, #!bash -e
+			if len(parts) >= 1 && isBinaryName(name, parts[0]) {
+				return true
+			}
+
+			// #!/bin/env bash
+			if len(parts) >= 2 &&
+				isBinaryName("env", parts[0]) &&
+				isBinaryName(name, parts[1]) {
+				return true
+			}
+		}
 	}

-	// Validate it's not downloading and piping into a shell, like
-	// `curl | bash` (supports `sudo`).
-	r, err = validateCommandIsNotFetchPipeExecute(cmd, pathfn, logf)
-	if err != nil {
-		return false, err
-	} else if !r {
-		ret = false
-	}
-
-	// Validate it is not a download command followed by
-	// an execute: `curl > /tmp/file && /tmp/file`
-	//			   `curl > /tmp/file && bash /tmp/file`
-	//			   `curl > /tmp/file; bash /tmp/file`
-	//			   `curl > /tmp/file; /tmp/file`
-	// (supports `sudo`).
-	r, err = validateCommandIsNotFetchToFileExecute(cmd, pathfn, logf)
-	if err != nil {
-		return false, err
-	} else if !r {
-		ret = false
-	}
-
-	// Validate it's not shelling out by redirecting input to stdin, like
-	// `bash <(wget -qO- http://website.com/my-script.sh)`. (supports `sudo`).
-	r, err = validateCommandIsNotFetchToStdinExecute(cmd, pathfn, logf)
-	if err != nil {
-		return false, err
-	} else if !r {
-		ret = false
-	}
-
-	// TODO(laurent): add check for cat file | bash
-	// TODO(laurent): detect downloads of zip/tar files containing scripts.
-	// TODO(laurent): detect command being an env variable
-	// TODO(laurent): detect unpinned git clone and package manager downloads (go get/install).
-
-	// Check if a previously-downloaded file is executed via
-	// `bash <some-already-downloaded-file>` or directly `<some-already-downloaded-file>`
-	// (supports `sudo`).
-	r, err = validateCommandIsNotFileExecute(cmd, pathfn, downloadedFiles, logf)
-	if err != nil {
-		return false, err
-	} else if !r {
-		ret = false
-	}
-
-	return ret, nil
+	return false
+}
+
+func validateShellFile(pathfn string, content []byte, logf func(s string, f ...interface{})) (bool, error) {
+	files := make(map[string]bool)
+	return validateShellFileAndRecord(pathfn, content, files, logf)
 }
--- a/checks/testdata/script-bash
+++ b/checks/testdata/script-bash
@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2021 Security Scorecard Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Find input files
+MY_INPUT_FILE="${TEST_SRCDIR}/some/path/myinputfile.dat"
+readonly MY_INPUT_FILE
+MY_OUTPUT_FILE="${TEST_TMPDIR}/myoutput.txt"
+readonly MY_OUTPUT_FILE
+
+# Do something
+echo hello || die "Failed in bar()"
+
+# Check something
+check_eq "${A}" "${B}"
+
+echo "PASS"
+
+if [ $1 -gt 100 ]
+then
+    echo Hey that\'s a large number.
+    pwd
+    echo hi && curl -s blabla | bash
+fi
+
+curl bla > myfile
+./myfile
+
+sh -c "curl bla | sh"
+curl bla > file2
+bash -c "file2"
+
+sh -c "curl bla > file1"
+sh -c "./file1"
+
+bash <(wget -qO- http://website.com/my-script.sh)
+
+wget http://file-with-sudo -O /tmp/file3
+bash /tmp/file3
+
+date
--- a/checks/testdata/script-sh
+++ b/checks/testdata/script-sh
@ -0,0 +1,53 @@
+#!/bin/env bash -euo pipefail
+# Copyright 2021 Security Scorecard Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Find input files
+MY_INPUT_FILE="${TEST_SRCDIR}/some/path/myinputfile.dat"
+readonly MY_INPUT_FILE
+MY_OUTPUT_FILE="${TEST_TMPDIR}/myoutput.txt"
+readonly MY_OUTPUT_FILE
+
+# Do something
+echo hello || die "Failed in bar()"
+
+# Check something
+check_eq "${A}" "${B}"
+
+echo "PASS"
+
+if [ $1 -gt 100 ]
+then
+    echo Hey that\'s a large number.
+    pwd
+    echo hi && curl -s blabla | bash
+fi
+
+curl bla > myfile
+./myfile
+
+sh -c "curl bla | sh"
+
+curl bla > file2
+bash -c "file2"
+
+sh -c "curl bla > file1"
+sh -c "./file1"
+
+bash <(wget -qO- http://website.com/my-script.sh)
+
+wget http://file-with-sudo -O /tmp/file3
+bash /tmp/file3
+
+date
--- a/checks/testdata/script.sh
+++ b/checks/testdata/script.sh
@ -0,0 +1,52 @@
+#!/bin/env sh -e
+# Copyright 2021 Security Scorecard Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Find input files
+MY_INPUT_FILE="${TEST_SRCDIR}/some/path/myinputfile.dat"
+readonly MY_INPUT_FILE
+MY_OUTPUT_FILE="${TEST_TMPDIR}/myoutput.txt"
+readonly MY_OUTPUT_FILE
+
+# Do something
+echo hello || die "Failed in bar()"
+
+# Check something
+check_eq "${A}" "${B}"
+
+echo "PASS"
+
+if [ $1 -gt 100 ]
+then
+    echo Hey that\'s a large number.
+    pwd
+    echo hi && curl -s blabla | bash
+fi
+
+curl bla > myfile
+./myfile
+
+sh -c "curl bla | sh"
+curl bla > file2
+bash -c "file2"
+
+sh -c "curl bla > file1"
+sh -c "./file1"
+
+bash <(wget -qO- http://website.com/my-script.sh)
+
+wget http://file-with-sudo -O /tmp/file3
+bash /tmp/file3
+
+date