File: stats.c

package info (click to toggle)
dupd 1.7.3-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 12,688 kB
  • sloc: ansic: 8,381; sh: 879; makefile: 121; perl: 58
file content (270 lines) | stat: -rw-r--r-- 8,352 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*
  Copyright 2012-2018 Jyri J. Virkki <jyri@virkki.com>

  This file is part of dupd.

  dupd is free software: you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  dupd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with dupd.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <assert.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "main.h"
#include "stats.h"
#include "utils.h"

pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t counters_lock = PTHREAD_MUTEX_INITIALIZER;

int stats_sets_processed[ROUNDS] = { 0,0 };
int stats_sets_dup_done[ROUNDS] = { 0,0 };
int stats_sets_dup_not[ROUNDS] = { 0,0 };
int stats_sets_full_read[ROUNDS] = { 0,0 };
int stats_sets_part_read[ROUNDS] = { 0,0 };
long stats_round_start[ROUNDS] = { -1,-1 };
int stats_round_duration[ROUNDS] = { -1,-1 };
int stats_duplicate_groups = 0;
int stats_reader_loops[ROUNDS] = { 0,0 };
int stats_hasher_loops[ROUNDS][MAX_HASHER_THREADS] = { {0,0}, {0,0} };
int stats_hasher_queue_len[MAX_HASHER_THREADS] = { 0,0 };

long stats_process_start = -1;
long stats_process_duration = -1;

uint64_t stats_total_bytes = 0;
uint64_t stats_total_bytes_read = 0;
uint64_t stats_total_bytes_hashed = 0;
uint64_t stats_comparison_bytes_read = 0;
uint32_t stats_max_pathlist = 0;
uint64_t stats_max_pathlist_size = 0;
uint32_t stats_path_list_entries = 0;
int stats_most_dups = 0;
int stats_duplicate_files = 0;

int stats_full_hash_first = 0;
int stats_full_hash_second = 0;
int stats_partial_hash_second = 0;
int stats_one_block_hash_first = 0;


int stats_size_list_done = 0;
int stats_three_file_compare = 0;
int stats_two_file_compare = 0;
int stats_uniques_saved = 0;
long stats_size_list_avg = 0;

int stats_files_ignored = 0;
int stats_files_error = 0;
long stats_time_scan = -1;
long stats_time_process = 0;
long stats_time_total = 0;
long stats_main_start = 0;
int path_buffer_realloc = 0;
int stats_hashlist_path_realloc = 0;
int stats_hash_list_len_inc = 0;
int scan_list_usage_max = 0;
int scan_list_resizes = 0;
uint64_t stats_read_buffers_allocated = 0;
int stats_flusher_active = 0;
uint32_t stats_fiemap_total_blocks = 0;
uint32_t stats_fiemap_zero_blocks = 0;

uint32_t count_sets_first_read = 0;
uint32_t count_files_completed = 0;
uint32_t stats_sets_first_read_completed = 0;



// Keep from here after revamp
uint32_t s_stats_size_list_count = 0;   // Total size sets processed

uint32_t s_total_files_seen = 0;        // All file entries seen during scan
uint32_t s_files_skip_error = 0;        // Files skipped due to error
uint32_t s_files_skip_notfile = 0;      // Files skipped, not a file
uint32_t s_files_skip_badsep = 0;       // Files skipped, separator conflict
uint32_t s_files_cant_read = 0;         // Files skipped, can't read
uint32_t s_files_hl_skip = 0;           // Files skipped, hardlink-is-unique
uint32_t s_files_too_small = 0;         // Files skipped, too small
uint32_t s_files_in_sizetree = 0;       // Files added to size tree
uint32_t s_files_processed = 0;         // Files entered to path list
uint32_t s_files_completed_dups = 0;    // Files processed, found to be dups
uint32_t s_files_completed_unique = 0;  // Files processed, found to be unique

int current_open_files = 0;


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void report_stats()
{
  LOG_BASE {
    printf("\n");
    char timebuf[20];
    time_string(timebuf, 20, get_current_time_millis() - stats_main_start);
    printf("Total duplicates: %d files in %d groups in %s\n",
           s_files_completed_dups, stats_duplicate_groups, timebuf);
    if (write_db && stats_duplicate_files > 0) {
      printf("Run 'dupd report' to list duplicates.\n");
    }
  }

  uint32_t files_accepted = s_total_files_seen - s_files_too_small -
    s_files_skip_notfile - s_files_skip_error - s_files_skip_badsep -
    s_files_hl_skip;
  uint32_t unique_files = s_files_in_sizetree - s_files_processed;

  LOG_MORE {
    printf("\n");
    printf("Total files seen: %" PRIu32 "\n", s_total_files_seen);
    printf(" (too small: %" PRIu32 ", not file: %"
           PRIu32 ", errors: %" PRIu32 ", skip: %" PRIu32 ", hl_skip: %"
           PRIu32 ")\n",
           s_files_too_small, s_files_skip_notfile,
           s_files_skip_error, s_files_skip_badsep, s_files_hl_skip);

    printf("Files queued for processing: %" PRIu32 " in %" PRIu32 " sets\n",
           files_accepted, s_stats_size_list_count);


    printf(" (files with unique size: %" PRIu32 ")\n", unique_files);
    printf("Total files to process: %" PRIu32 "\n", s_files_processed);
    printf(" Duplicate files: %" PRIu32 "\n", s_files_completed_dups);
    printf(" Unique files: %" PRIu32 "\n", s_files_completed_unique);
    printf(" Unable to read: %" PRIu32 "\n", s_files_cant_read);
    if (hardlink_is_unique) {
      printf(" Skipped hardlinks: %" PRIu32 "\n", s_files_hl_skip);
    }
  }

  if (files_accepted != s_files_in_sizetree - s_files_hl_skip) {
    printf("error: mismatch files_accepted: %" PRIu32
           " != files in sizetree: %" PRIu32 "\n",
           files_accepted, s_files_in_sizetree - s_files_hl_skip);
    exit(1);
  }
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void save_stats()
{
  FILE * fp = fopen(stats_file, "a");
  // TODO needs cleaning up
  fprintf(fp, "using_fiemap %d\n", using_fiemap);
  fprintf(fp, "fiemap_total_blocks %" PRIu32 "\n", stats_fiemap_total_blocks);
  fprintf(fp, "fiemap_zero_blocks %" PRIu32 "\n", stats_fiemap_zero_blocks);
  fprintf(fp, "duplicate_files %" PRIu32 "\n", s_files_completed_dups);
  fprintf(fp, "duplicate_groups %" PRIu32 "\n", stats_duplicate_groups);

  fprintf(fp, "\n");
  fclose(fp);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void inc_stats_read_buffers_allocated(int bytes)
{
  d_mutex_lock(&stats_lock, "increasing buffers");
  stats_read_buffers_allocated += bytes;
  d_mutex_unlock(&stats_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void dec_stats_read_buffers_allocated(int bytes)
{
  d_mutex_lock(&stats_lock, "decreasing buffers");
  stats_read_buffers_allocated -= bytes;
  d_mutex_unlock(&stats_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void increase_unique_counter(int n)
{
  d_mutex_lock(&counters_lock, "counters");
  s_files_completed_unique += n;
  d_mutex_unlock(&counters_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void increase_dup_counter(int n)
{
  d_mutex_lock(&counters_lock, "counters");
  s_files_completed_dups += n;
  d_mutex_unlock(&counters_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void increase_sets_first_read()
{
  d_mutex_lock(&counters_lock, "counters");
  count_sets_first_read++;
  d_mutex_unlock(&counters_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void increase_sets_first_read_completed()
{
  d_mutex_lock(&counters_lock, "counters");
  stats_sets_first_read_completed++;
  d_mutex_unlock(&counters_lock);
}


/** ***************************************************************************
 * Public function, see header file.
 *
 */
void update_open_files(int n)
{
  d_mutex_lock(&counters_lock, "counters");
  current_open_files += n;
  d_mutex_unlock(&counters_lock);
}