Package: git / 1:2.11.0-3+deb9u4

fsck-actually-fsck-blob-data.diff Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
From f9eb3aecac9a4a8f1ec86ce131b7085e833f313a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 2 May 2018 15:44:51 -0400
Subject: fsck: actually fsck blob data

commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream.

Because fscking a blob has always been a noop, we didn't
bother passing around the blob data. In preparation for
content-level checks, let's fix up a few things:

  1. The fsck_object() function just returns success for any
     blob. Let's a noop fsck_blob(), which we can fill in
     with actual logic later.

  2. The fsck_loose() function in builtin/fsck.c
     just threw away blob content after loading it. Let's
     hold onto it until after we've called fsck_object().

     The easiest way to do this is to just drop the
     parse_loose_object() helper entirely. Incidentally,
     this also fixes a memory leak: if we successfully
     loaded the object data but did not parse it, we would
     have left the function without freeing it.

  3. When fsck_loose() loads the object data, it
     does so with a custom read_loose_object() helper. This
     function streams any blobs, regardless of size, under
     the assumption that we're only checking the sha1.

     Instead, let's actually load blobs smaller than
     big_file_threshold, as the normal object-reading
     code-paths would do. This lets us fsck small files, and
     a NULL return is an indication that the blob was so big
     that it needed to be streamed, and we can pass that
     information along to fsck_blob().

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 builtin/fsck.c | 51 +++++++++++++++++++++++++-------------------------
 fsck.c         |  8 +++++++-
 sha1_file.c    |  2 +-
 3 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4b91ee95e6..95053ec02f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -318,7 +318,7 @@ static void check_connectivity(void)
 	}
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
 	if (obj->flags & SEEN)
 		return 0;
@@ -330,7 +330,7 @@ static int fsck_obj(struct object *obj)
 
 	if (fsck_walk(obj, NULL, &fsck_obj_options))
 		objerror(obj, "broken links");
-	if (fsck_object(obj, NULL, 0, &fsck_obj_options))
+	if (fsck_object(obj, buffer, size, &fsck_obj_options))
 		return -1;
 
 	if (obj->type == OBJ_TREE) {
@@ -376,7 +376,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
 		return error("%s: object corrupt or missing", sha1_to_hex(sha1));
 	}
 	obj->flags = HAS_OBJ;
-	return fsck_obj(obj);
+	return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -476,43 +476,42 @@ static void get_default_heads(void)
 	}
 }
 
-static struct object *parse_loose_object(const unsigned char *sha1,
-					 const char *path)
+static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
 {
 	struct object *obj;
-	void *contents;
 	enum object_type type;
 	unsigned long size;
+	void *contents;
 	int eaten;
 
-	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
-		return NULL;
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object corrupt or missing: %s",
+		      sha1_to_hex(sha1), path);
+		return 0; /* keep checking other objects */
+	}
 
 	if (!contents && type != OBJ_BLOB)
 		die("BUG: read_loose_object streamed a non-blob");
 
 	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
 
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object could not be parsed: %s",
+		      sha1_to_hex(sha1), path);
+		if (!eaten)
+			free(contents);
+		return 0; /* keep checking other objects */
+	}
+
+	obj->flags = HAS_OBJ;
+	if (fsck_obj(obj, contents, size))
+		errors_found |= ERROR_OBJECT;
+
 	if (!eaten)
 		free(contents);
-	return obj;
-}
-
-static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
-{
-	struct object *obj = parse_loose_object(sha1, path);
-
-	if (!obj) {
-		errors_found |= ERROR_OBJECT;
-		error("%s: object corrupt or missing: %s",
-		      sha1_to_hex(sha1), path);
-		return 0; /* keep checking other objects */
-	}
-
-	obj->flags = HAS_OBJ;
-	if (fsck_obj(obj))
-		errors_found |= ERROR_OBJECT;
-	return 0;
+	return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
diff --git a/fsck.c b/fsck.c
index 1336ead9eb..56546206ae 100644
--- a/fsck.c
+++ b/fsck.c
@@ -893,6 +893,12 @@ static int fsck_tag(struct tag *tag, const char *data,
 	return fsck_tag_buffer(tag, data, size, options);
 }
 
+static int fsck_blob(struct blob *blob, const char *buf,
+		     unsigned long size, struct fsck_options *options)
+{
+	return 0;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options)
 {
@@ -900,7 +906,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
 		return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
 	if (obj->type == OBJ_BLOB)
-		return 0;
+		return fsck_blob((struct blob *)obj, data, size, options);
 	if (obj->type == OBJ_TREE)
 		return fsck_tree((struct tree *) obj, options);
 	if (obj->type == OBJ_COMMIT)
diff --git a/sha1_file.c b/sha1_file.c
index 0a609a5772..65db803392 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3876,7 +3876,7 @@ int read_loose_object(const char *path,
 		goto out;
 	}
 
-	if (*type == OBJ_BLOB) {
+	if (*type == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
 			goto out;
 	} else {
-- 
2.17.0.921.gf22659ad46