File: crawler.c

package info (click to toggle)
rdup 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 784 kB
  • ctags: 108
  • sloc: sh: 3,225; ansic: 1,624; perl: 565; makefile: 65
file content (308 lines) | stat: -rw-r--r-- 7,672 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/*
 * Copyright (c) 2005 - 2007 Miek Gieben
 * See LICENSE for the license
 *
 * Directory crawler
 */
#include "rdup.h"

extern gboolean opt_onefilesystem;
extern gboolean opt_nobackup;
extern gboolean opt_attr;
extern time_t opt_timestamp;
extern gint opt_verbose;
extern GSList *regex_list;

/* xattr.c */
uid_t read_attr_uid(char *path, uid_t u);
gid_t read_attr_gid(char *path, gid_t g);

static struct entry *
entry_dup(struct entry *f)
{
        struct entry *g;
        g = g_malloc(sizeof(struct entry));

        g->f_name       = g_strdup(f->f_name);
        g->f_name_size  = f->f_name_size;
        g->f_lnk	= f->f_lnk;
        g->f_uid        = f->f_uid;
        g->f_gid        = f->f_gid;
        g->f_mode       = f->f_mode;
	g->f_ctime      = f->f_ctime;
	g->f_size       = f->f_size;
	g->f_dev        = f->f_dev;
	g->f_ino        = f->f_ino;
        return g;
}

static void
entry_free(struct entry *f)
{
	g_free(f->f_name);
	g_free(f);
}

/**
 * prepend path leading up to backup directory to the tree
 */
gboolean
dir_prepend(GTree *t, char *path)
{
	char *c;
	char *p;
	char *path2;
	size_t len;
	struct stat s;
	struct entry e;

	path2 = g_strdup(path);
	len   = strlen(path);

	/* add closing / */
	if (path2[len - 1] != DIR_SEP) {
		path2 = g_realloc(path2, len + 2);
		path2[len] = DIR_SEP;
		path2[len + 1] = '\0';
	}

	for (p = path2 + 1; (c = strchr(p, DIR_SEP)); p++) {
		*c = '\0';
		if (lstat(path2, &s) != 0) {
			msg(_("Could not stat path `%s\': %s"), path2, strerror(errno));
			return FALSE;
		}
		e.f_name      = path2;
		e.f_name_size = strlen(path2);
		if (opt_attr) {
			e.f_uid = read_attr_uid(e.f_name, s.st_uid);
			e.f_gid = read_attr_gid(e.f_name, s.st_gid);
		} else {
			e.f_uid       = s.st_uid;
			e.f_gid       = s.st_gid;
		}
		e.f_ctime     = s.st_ctime;
		e.f_mode      = s.st_mode;
		e.f_size      = s.st_size;
		e.f_dev       = s.st_dev;
		e.f_ino       = s.st_ino;
		g_tree_insert(t, (gpointer) entry_dup(&e), VALUE);
		*c = DIR_SEP;
		p = c++;
	}
	g_free(path2);
	return TRUE;
}

/**
 * If new_dir is true then the directory is new - 
 * so all files under it should be included
 * We do this by giving them the value 0 (NULL_DUMP)
 */
void
dir_crawl(GTree *t, GHashTable *linkhash, char *path, gboolean new_dir)
{
	DIR 		*dir;
	FILE 		*f;
	struct dirent 	*dent;
	struct entry    *directory;
	char 		*curpath;
	gchar		*lnk;
	struct stat   	s;
	struct entry	pop;
	struct remove_path rp;
	dev_t 		current_dev;
	size_t 		curpath_len;

	/* dir stack */
	gint32 d = 0;
	gint32 dstack_cnt  = 1;
	struct entry **dirstack =
		g_malloc(dstack_cnt * D_STACKSIZE * sizeof(struct entry *));

	if(!(dir = opendir(path))) {
		/* files are also allowed, check for this, if it isn't give the error */
		if ((f = fopen(path, "r"))) {
			fclose(f);
			g_free(dirstack);
			return;
		}
		msg(_("Cannot enter directory `%s\': %s"), path, strerror(errno));
		g_free(dirstack);
		return;
	}

	/* get device */
#ifdef HAVE_DIRFD
	if (fstat(dirfd(dir), &s) != 0) {
#else
	if (fstat(rdup_dirfd(dir), &s) != 0) {
#endif
		msg(_("Cannot determine holding device of the directory `%s\': %s"), path, 
				strerror(errno));
		closedir(dir);
		g_free(dirstack);
		return;
	}
	current_dev = s.st_dev;

	while((dent = readdir(dir))) {
		if (!g_ascii_strcasecmp(dent->d_name, ".") || 
				!g_ascii_strcasecmp(dent->d_name, ".."))
			continue;

		if (strcmp(path, DIR_SEP_STR) == 0)  {
			curpath = g_strdup_printf("%c%s", DIR_SEP, dent->d_name);
			curpath_len = strlen(curpath);
		} else {
			curpath = g_strdup_printf("%s%c%s", path, DIR_SEP, dent->d_name);
			curpath_len = strlen(curpath);
		}

		if (lstat(curpath, &s) != 0) {
			msg(_("Could not stat path `%s\': %s"), curpath, strerror(errno));
			g_free(curpath);
			continue;
		}

		if (strchr(curpath, '\n')) {
			msg(_("Newline (\\n) found in path `%s\', skipping"), curpath);
			g_free(curpath);
			continue;
		}

		if (S_ISREG(s.st_mode) || S_ISLNK(s.st_mode)) {
			pop.f_name      = curpath;
			pop.f_name_size = curpath_len;
			if (opt_attr) {
				pop.f_uid       = read_attr_uid(pop.f_name, s.st_uid);
				pop.f_gid       = read_attr_gid(pop.f_name, s.st_gid);
			} else {
				pop.f_uid       = s.st_uid;
				pop.f_gid       = s.st_gid;
			}
			if (new_dir)
				pop.f_ctime     = NULL_DUMP;
			else
				pop.f_ctime     = s.st_ctime;
			pop.f_mode      = s.st_mode;
			pop.f_size      = s.st_size;
			pop.f_dev       = s.st_dev;
			pop.f_ino       = s.st_ino;
			pop.f_lnk	= 0;

			if (gfunc_regexp(regex_list, curpath)) {
				g_free(curpath);
				continue;
			}

			if (opt_nobackup && !strcmp(dent->d_name, NOBACKUP)) {
				/* return after seeing .nobackup */
				if (opt_verbose > 0) {
					msg(_("%s found in '%s\'"), NOBACKUP, path);
				}
				/* remove all files found in this path */
				rp.tree = t;
				rp.len  = strlen(path);
				rp.path = path;
				g_tree_foreach(t, gfunc_remove_path, (gpointer)&rp);
				/* add .nobackup back in */
				g_tree_insert(t, (gpointer) entry_dup(&pop), VALUE);
				g_free(dirstack);
				closedir(dir);
				return;
			}

			/* hardlinks */
			if (s.st_nlink > 1) {
				if (( lnk = hardlink(linkhash, &pop))) {
					/* we got a match back */
					pop.f_size = strlen(pop.f_name);  /* old name length */
					lnk = g_strdup_printf("%s -> %s", pop.f_name, lnk);
					pop.f_lnk = 1;
					pop.f_name = lnk;
					pop.f_name_size = strlen(pop.f_name);
				}
			}
			/* symlinks; also put the -> name in f_name */
			if (S_ISLNK(s.st_mode)) {
				char buf[BUFSIZE + 1]; 
				ssize_t i;
				if ((i = readlink(pop.f_name, buf, BUFSIZE)) == -1) {
					msg(_("Error reading link `%s\': %s"), pop.f_name, strerror(errno));
				} else {
					buf[i] = '\0';
					pop.f_size = strlen(pop.f_name); /* old name length */
					pop.f_name = g_strdup_printf("%s -> %s", pop.f_name, buf);
					pop.f_name_size = strlen(pop.f_name);
				}
			}

			g_tree_insert(t, (gpointer) entry_dup(&pop), VALUE);
			g_free(curpath);
			continue;
		} else if(S_ISDIR(s.st_mode)) {
			/* one filesystem */
			if (opt_onefilesystem && s.st_dev != current_dev) {
				msg(_("Walking into different filesystem"));
				g_free(curpath);
				continue;
			}
			/* Exclude list */
			if (gfunc_regexp(regex_list, curpath)) {
				g_free(curpath);
				continue;
			}

			dirstack[d] = g_malloc(sizeof(struct entry));
			dirstack[d]->f_name       = g_strdup(curpath); 
			dirstack[d]->f_name_size  = curpath_len;
			if (opt_attr) {
				dirstack[d]->f_uid = 
					read_attr_uid(dirstack[d]->f_name, s.st_uid);
				dirstack[d]->f_gid = 
					read_attr_gid(dirstack[d]->f_name, s.st_gid);
			} else {
				dirstack[d]->f_uid = s.st_uid;
				dirstack[d]->f_gid = s.st_gid;
			}
			dirstack[d]->f_ctime      = s.st_ctime;
			dirstack[d]->f_mode       = s.st_mode;
			dirstack[d]->f_size       = s.st_size;
			dirstack[d]->f_dev        = s.st_dev;
			dirstack[d]->f_ino        = s.st_ino;
			dirstack[d]->f_lnk        = 0;

			if (s.st_ctime > opt_timestamp)
				new_dir = TRUE;
			else
				new_dir = FALSE;

			if (d++ % D_STACKSIZE == 0) {
				dirstack = g_realloc(dirstack, 
						++dstack_cnt * D_STACKSIZE * 
						sizeof(struct entry *));
			}
			g_free(curpath);
			continue;
		} else {
			if (opt_verbose > 0) {
				msg(_("Neither file nor directory `%s\'"), curpath);
			}
			g_free(curpath);
		}
	}
	closedir(dir);

	while (d > 0) {
		directory = dirstack[--d]; 
		g_tree_insert(t, (gpointer) entry_dup(directory), VALUE);
		/* recurse */
		/* potentially expensive operation. Better would be to when we hit
		 * .nobackup to go up the tree and delete some nodes.... or not */
		dir_crawl(t, linkhash, directory->f_name, new_dir);
		entry_free(directory);
	}
	g_free(dirstack);
	return;
}