From 625a6aef62b0b02bd600ac88bbac68e8543e15d3 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Mon, 23 Sep 2019 08:58:11 +0200
Subject: is_ntfs_dotgit(): only verify the leading segment

commit 288a74bcd28229a00c3632f18cba92dbfdf73ee9 upstream.

The config setting `core.protectNTFS` is specifically designed to work
not only on Windows, but anywhere, to allow for repositories hosted on,
say, Linux servers to be protected against NTFS-specific attack vectors.

As a consequence, `is_ntfs_dotgit()` manually splits backslash-separated
paths (but does not do the same for paths separated by forward slashes),
under the assumption that the backslash might not be a valid directory
separator on the _current_ Operating System.

However, the two callers, `verify_path()` and `fsck_tree()`, are
supposed to feed only individual path segments to the `is_ntfs_dotgit()`
function.

This causes a lot of duplicate scanning (and very inefficient scanning,
too, as the inner loop of `is_ntfs_dotgit()` was optimized for
readability rather than for speed.

Let's simplify the design of `is_ntfs_dotgit()` by putting the burden of
splitting the paths by backslashes as directory separators on the
callers of said function.

Consequently, the `verify_path()` function, which already splits the
path by directory separators, now treats backslashes as directory
separators _explicitly_ when `core.protectNTFS` is turned on, even on
platforms where the backslash is _not_ a directory separator.

Note that we have to repeat some code in `verify_path()`: if the
backslash is not a directory separator on the current Operating System,
we want to allow file names like `\`, but we _do_ want to disallow paths
that are clearly intended to cause harm when the repository is cloned on
Windows.

The `fsck_tree()` function (the other caller of `is_ntfs_dotgit()`) now
needs to look for backslashes in tree entries' names specifically when
`core.protectNTFS` is turned on. While it would be tempting to
completely disallow backslashes in that case (much like `fsck` reports
names containing forward slashes as "full paths"), this would be
overzealous: when `core.protectNTFS` is turned on in a non-Windows
setup, backslashes are perfectly valid characters in file names while we
_still_ want to disallow tree entries that are clearly designed to
exploit NTFS-specific behavior.

This simplification will make subsequent changes easier to implement,
such as turning `core.protectNTFS` on by default (not only on Windows)
or protecting against attack vectors involving NTFS Alternate Data
Streams.

Incidentally, this change allows for catching malicious repositories
that contain tree entries of the form `dir\.gitmodules` already on the
server side rather than only on the client side (and previously only on
Windows): in contrast to `is_ntfs_dotgit()`, the
`is_ntfs_dotgitmodules()` function already expects the caller to split
the paths by directory separators.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c       | 18 +++++++++++++++++-
 path.c       |  5 +----
 read-cache.c |  8 ++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/fsck.c b/fsck.c
index def446a3e3..135fe8c4bc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -590,7 +590,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 
 	while (desc.size) {
 		unsigned mode;
-		const char *name;
+		const char *name, *backslash;
 		const struct object_id *oid;
 
 		oid = tree_entry_extract(&desc, &name, &mode);
@@ -612,6 +612,22 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 						 ".gitmodules is a symbolic link");
 		}
 
+		if ((backslash = strchr(name, '\\'))) {
+			while (backslash) {
+				backslash++;
+				has_dotgit |= is_ntfs_dotgit(backslash);
+				if (is_ntfs_dotgitmodules(backslash)) {
+					if (!S_ISLNK(mode))
+						oidhash_insert(&gitmodules_found, oid);
+					else
+						retval += report(options, &item->object,
+								 FSCK_MSG_GITMODULES_SYMLINK,
+								 ".gitmodules is a symbolic link");
+				}
+				backslash = strchr(backslash, '\\');
+			}
+		}
+
 		if (update_tree_entry_gently(&desc)) {
 			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
 			break;
diff --git a/path.c b/path.c
index 0444208dfb..5f23d26668 100644
--- a/path.c
+++ b/path.c
@@ -1273,10 +1273,7 @@ int is_ntfs_dotgit(const char *name)
 			if (only_spaces_and_periods(name, len, 5) &&
 					!strncasecmp(name, "git~1", 5))
 				return 1;
-			if (name[len] != '\\')
-				return 0;
-			name += len + 1;
-			len = -1;
+			return 0;
 		}
 }
 
diff --git a/read-cache.c b/read-cache.c
index 6dca825751..f0f49dbbb7 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -882,7 +882,15 @@ int verify_path(const char *path, unsigned mode)
 			if ((c == '.' && !verify_dotfile(path, mode)) ||
 			    is_dir_sep(c) || c == '\0')
 				return 0;
+		} else if (c == '\\' && protect_ntfs) {
+			if (is_ntfs_dotgit(path))
+				return 0;
+			if (S_ISLNK(mode)) {
+				if (is_ntfs_dotgitmodules(path))
+					return 0;
+			}
 		}
+
 		c = *path++;
 	}
 }
-- 
2.24.0.393.g34dc348eaf

