Package: git / 1:2.11.0-3+deb9u4

fsck-detect-gitmodules-files.diff Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
From 653ee6e49b36d757b40282c1c17a84a0730cb921 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 17:20:08 -0400
Subject: fsck: detect gitmodules files

commit 159e7b080bfa5d34559467cacaa79df89a01afc0 upstream.

In preparation for performing fsck checks on .gitmodules
files, this commit plumbs in the actual detection of the
files. Note that unlike most other fsck checks, this cannot
be a property of a single object: we must know that the
object is found at a ".gitmodules" path at the root tree of
a commit.

Since the fsck code only sees one object at a time, we have
to mark the related objects to fit the puzzle together. When
we see a commit we mark its tree as a root tree, and when
we see a root tree with a .gitmodules file, we mark the
corresponding blob to be checked.

In an ideal world, we'd check the objects in topological
order: commits followed by trees followed by blobs. In that
case we can avoid ever loading an object twice, since all
markings would be complete by the time we get to the marked
objects. And indeed, if we are checking a single packfile,
this is the order in which Git will generally write the
objects. But we can't count on that:

  1. git-fsck may show us the objects in arbitrary order
     (loose objects are fed in sha1 order, but we may also
     have multiple packs, and we process each pack fully in
     sequence).

  2. The type ordering is just what git-pack-objects happens
     to write now. The pack format does not require a
     specific order, and it's possible that future versions
     of Git (or a custom version trying to fool official
     Git's fsck checks!) may order it differently.

  3. We may not even be fscking all of the relevant objects
     at once. Consider pushing with transfer.fsckObjects,
     where one push adds a blob at path "foo", and then a
     second push adds the same blob at path ".gitmodules".
     The blob is not part of the second push at all, but we
     need to mark and check it.

So in the general case, we need to make up to three passes
over the objects: once to make sure we've seen all commits,
then once to cover any trees we might have missed, and then
a final pass to cover any .gitmodules blobs we found in the
second pass.

We can simplify things a bit by loosening the requirement
that we find .gitmodules only at root trees. Technically
a file like "subdir/.gitmodules" is not parsed by Git, but
it's not unreasonable for us to declare that Git is aware of
all ".gitmodules" files and make them eligible for checking.
That lets us drop the root-tree requirement, which
eliminates one pass entirely. And it makes our worst case
much better: instead of potentially queueing every root tree
to be re-examined, the worst case is that we queue each
unique .gitmodules blob for a second look.

This patch just adds the boilerplate to find .gitmodules
files. The actual content checks will come in a subsequent
commit.

[jn: backported to 2.11.y:
 - passing oid->hash instead of oid to lookup_blob
 - using "struct hashmap" directly since "struct oidset" isn't
   available]

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fsck.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fsck.h |  7 +++++
 2 files changed, 97 insertions(+)

diff --git a/fsck.c b/fsck.c
index 56546206ae..7a5fa85adc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -10,6 +10,41 @@
 #include "utf8.h"
 #include "sha1-array.h"
 #include "decorate.h"
+#include "hashmap.h"
+
+struct oidhash_entry {
+	struct hashmap_entry ent;
+	struct object_id oid;
+};
+
+static int oidhash_hashcmp(const void *va, const void *vb,
+			   const void *vkey)
+{
+	const struct oidhash_entry *a = va, *b = vb;
+	const struct object_id *key = vkey;
+	return oidcmp(&a->oid, key ? key : &b->oid);
+}
+
+static struct hashmap gitmodules_found;
+static struct hashmap gitmodules_done;
+
+static void oidhash_insert(struct hashmap *h, const struct object_id *oid)
+{
+	struct oidhash_entry *e;
+
+	if (!h->tablesize)
+		hashmap_init(h, oidhash_hashcmp, 0);
+	e = xmalloc(sizeof(*e));
+	hashmap_entry_init(&e->ent, sha1hash(oid->hash));
+	oidcpy(&e->oid, oid);
+	hashmap_add(h, e);
+}
+
+static int oidhash_contains(struct hashmap *h, const struct object_id *oid)
+{
+	return h->tablesize &&
+		!!hashmap_get_from_hash(h, sha1hash(oid->hash), oid);
+}
 
 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -44,6 +79,7 @@
 	FUNC(MISSING_TAG_ENTRY, ERROR) \
 	FUNC(MISSING_TAG_OBJECT, ERROR) \
 	FUNC(MISSING_TREE, ERROR) \
+	FUNC(MISSING_TREE_OBJECT, ERROR) \
 	FUNC(MISSING_TYPE, ERROR) \
 	FUNC(MISSING_TYPE_ENTRY, ERROR) \
 	FUNC(MULTIPLE_AUTHORS, ERROR) \
@@ -51,6 +87,8 @@
 	FUNC(TREE_NOT_SORTED, ERROR) \
 	FUNC(UNKNOWN_TYPE, ERROR) \
 	FUNC(ZERO_PADDED_DATE, ERROR) \
+	FUNC(GITMODULES_MISSING, ERROR) \
+	FUNC(GITMODULES_BLOB, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -558,6 +596,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		has_dotdot |= !strcmp(name, "..");
 		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 		has_zero_pad |= *(char *)desc.buffer == '0';
+
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name))
+			oidhash_insert(&gitmodules_found, oid);
+
 		if (update_tree_entry_gently(&desc)) {
 			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
 			break;
@@ -930,3 +972,51 @@ int fsck_error_function(struct fsck_options *o,
 	error("object %s: %s", describe_object(o, obj), message);
 	return 1;
 }
+
+int fsck_finish(struct fsck_options *options)
+{
+	int ret = 0;
+	struct hashmap_iter iter;
+	const struct oidhash_entry *e;
+
+	hashmap_iter_init(&gitmodules_found, &iter);
+	while ((e = hashmap_iter_next(&iter))) {
+		const struct object_id *oid = &e->oid;
+		struct blob *blob;
+		enum object_type type;
+		unsigned long size;
+		char *buf;
+
+		if (oidhash_contains(&gitmodules_done, oid))
+			continue;
+
+		blob = lookup_blob(oid->hash);
+		if (!blob) {
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+			continue;
+		}
+
+		buf = read_sha1_file(oid->hash, &type, &size);
+		if (!buf) {
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_MISSING,
+				      "unable to read .gitmodules blob");
+			continue;
+		}
+
+		if (type == OBJ_BLOB)
+			ret |= fsck_blob(blob, buf, size, options);
+		else
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+		free(buf);
+	}
+
+
+	hashmap_free(&gitmodules_found, 1);
+	hashmap_free(&gitmodules_done, 1);
+	return ret;
+}
diff --git a/fsck.h b/fsck.h
index 1891c1863b..f6649ea23b 100644
--- a/fsck.h
+++ b/fsck.h
@@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options);
 
+/*
+ * Some fsck checks are context-dependent, and may end up queued; run this
+ * after completing all fsck_object() calls in order to resolve any remaining
+ * checks.
+ */
+int fsck_finish(struct fsck_options *options);
+
 #endif
-- 
2.17.0.921.gf22659ad46