File dupdump.c changed (mode: 100644) (index 665ce38..692f9be) |
... |
... |
int main(int argc, char *argv[]) |
97 |
97 |
int c; |
int c; |
98 |
98 |
FILE *out; |
FILE *out; |
99 |
99 |
struct timespec start, end; |
struct timespec start, end; |
100 |
|
time_t diff; |
|
101 |
100 |
|
|
102 |
101 |
while ((c = getopt_long(argc, argv, "zi:o:vdh", options, &options_index)) != -1) { |
while ((c = getopt_long(argc, argv, "zi:o:vdh", options, &options_index)) != -1) { |
103 |
102 |
switch (c) { |
switch (c) { |
|
... |
... |
int main(int argc, char *argv[]) |
129 |
128 |
if (optind >= argc) { |
if (optind >= argc) { |
130 |
129 |
usage(); |
usage(); |
131 |
130 |
fprintf(stderr, "Error: no dirs to scan specified!\n"); |
fprintf(stderr, "Error: no dirs to scan specified!\n"); |
|
131 |
|
fclose(out); |
132 |
132 |
return 1; |
return 1; |
133 |
133 |
} |
} |
134 |
134 |
|
|
|
... |
... |
int main(int argc, char *argv[]) |
181 |
181 |
clock_gettime(CLOCK_MONOTONIC, &end); |
clock_gettime(CLOCK_MONOTONIC, &end); |
182 |
182 |
|
|
183 |
183 |
if (verbose) { |
if (verbose) { |
|
184 |
|
time_t diff; |
|
185 |
|
|
184 |
186 |
dump_stats(); |
dump_stats(); |
185 |
187 |
|
|
186 |
188 |
diff = end.tv_sec - start.tv_sec; |
diff = end.tv_sec - start.tv_sec; |
File store.c changed (mode: 100644) (index 6861842..a87737e) |
... |
... |
static int sha1(const char *file, const size_t len, unsigned char *out) |
94 |
94 |
int fd; |
int fd; |
95 |
95 |
unsigned char buf[65536]; |
unsigned char buf[65536]; |
96 |
96 |
SHA_CTX c; |
SHA_CTX c; |
97 |
|
ssize_t n; |
|
98 |
97 |
size_t bytes; |
size_t bytes; |
99 |
98 |
|
|
100 |
99 |
fd = open(file, O_RDONLY); |
fd = open(file, O_RDONLY); |
|
... |
... |
static int sha1(const char *file, const size_t len, unsigned char *out) |
104 |
103 |
SHA1_Init(&c); |
SHA1_Init(&c); |
105 |
104 |
bytes = 0; |
bytes = 0; |
106 |
105 |
while (bytes < len) { |
while (bytes < len) { |
|
106 |
|
ssize_t n; |
|
107 |
|
|
107 |
108 |
n = read(fd, buf, sizeof(buf)); |
n = read(fd, buf, sizeof(buf)); |
108 |
109 |
if (n == -1) { |
if (n == -1) { |
109 |
110 |
close(fd); |
close(fd); |
|
... |
... |
static void file_mark_up_no_dup_possible(struct file_node *f) |
595 |
596 |
*/ |
*/ |
596 |
597 |
static int compare_file_range(struct file_node *a, struct file_node *b) |
static int compare_file_range(struct file_node *a, struct file_node *b) |
597 |
598 |
{ |
{ |
598 |
|
int err; |
|
599 |
|
struct file_node *q, *p, *dups, *p_last; |
|
|
599 |
|
struct file_node *q, *p, *dups; |
600 |
600 |
|
|
601 |
601 |
if (debug) { |
if (debug) { |
602 |
602 |
fprintf(stderr, "compare_file_range:\n"); |
fprintf(stderr, "compare_file_range:\n"); |
|
... |
... |
static int compare_file_range(struct file_node *a, struct file_node *b) |
616 |
616 |
|
|
617 |
617 |
p = a; |
p = a; |
618 |
618 |
while (p != b->hash_next) { |
while (p != b->hash_next) { |
|
619 |
|
struct file_node *p_last; |
|
620 |
|
|
619 |
621 |
q = p->hash_next; |
q = p->hash_next; |
620 |
622 |
if (q == NULL) |
if (q == NULL) |
621 |
623 |
break; |
break; |
622 |
624 |
|
|
623 |
625 |
p_last = p->duplicates; |
p_last = p->duplicates; |
624 |
626 |
while (q != b->hash_next) { |
while (q != b->hash_next) { |
|
627 |
|
int err; |
|
628 |
|
|
625 |
629 |
/* We do not want to break ->duplicates */ |
/* We do not want to break ->duplicates */ |
626 |
630 |
if (q->skip_compare == 1) { |
if (q->skip_compare == 1) { |
627 |
631 |
q = q->hash_next; |
q = q->hash_next; |
|
... |
... |
static int dir_files_hash(unsigned char *hash, unsigned char *fn, |
783 |
787 |
struct file_node **u; |
struct file_node **u; |
784 |
788 |
unsigned long i, mem; |
unsigned long i, mem; |
785 |
789 |
SHA_CTX c, fnh; |
SHA_CTX c, fnh; |
786 |
|
char *base_name; |
|
787 |
790 |
|
|
788 |
791 |
if (d->files == NULL) { |
if (d->files == NULL) { |
789 |
792 |
memset(hash, 0, SHA_DIGEST_LENGTH); |
memset(hash, 0, SHA_DIGEST_LENGTH); |
|
... |
... |
static int dir_files_hash(unsigned char *hash, unsigned char *fn, |
817 |
820 |
|
|
818 |
821 |
i = 0; |
i = 0; |
819 |
822 |
while (i < d->no_of_files) { |
while (i < d->no_of_files) { |
|
823 |
|
char *base_name; |
|
824 |
|
|
820 |
825 |
SHA1_Update(&c, u[i]->sha1_full, SHA_DIGEST_LENGTH); |
SHA1_Update(&c, u[i]->sha1_full, SHA_DIGEST_LENGTH); |
821 |
826 |
|
|
822 |
827 |
base_name = basename(u[i]->name); |
base_name = basename(u[i]->name); |
|
... |
... |
static long long dir_build_hash(struct dir_node *d) |
941 |
946 |
unsigned char file_names_sha1[SHA_DIGEST_LENGTH]; |
unsigned char file_names_sha1[SHA_DIGEST_LENGTH]; |
942 |
947 |
int err; |
int err; |
943 |
948 |
long long no_of_possible_dirs = 0; |
long long no_of_possible_dirs = 0; |
944 |
|
long long ret; |
|
945 |
|
char *base_name; |
|
946 |
949 |
|
|
947 |
950 |
if (debug) |
if (debug) |
948 |
951 |
fprintf(stderr, "DEBUG: %s [%s] no_dup_possible=%u\n", |
fprintf(stderr, "DEBUG: %s [%s] no_dup_possible=%u\n", |
|
... |
... |
static long long dir_build_hash(struct dir_node *d) |
966 |
969 |
/* Compute hashes */ |
/* Compute hashes */ |
967 |
970 |
subdir = d->subdirs; |
subdir = d->subdirs; |
968 |
971 |
while (subdir) { |
while (subdir) { |
|
972 |
|
long long ret; |
|
973 |
|
|
969 |
974 |
ret = dir_build_hash(subdir); |
ret = dir_build_hash(subdir); |
970 |
975 |
if (ret == -1) |
if (ret == -1) |
971 |
976 |
return -1; |
return -1; |
|
... |
... |
static long long dir_build_hash(struct dir_node *d) |
988 |
993 |
/* At the same time, we build hash of file names */ |
/* At the same time, we build hash of file names */ |
989 |
994 |
subdir = d->subdirs; |
subdir = d->subdirs; |
990 |
995 |
while (subdir) { |
while (subdir) { |
|
996 |
|
char *base_name; |
|
997 |
|
|
991 |
998 |
SHA1_Update(&c, subdir->sha1, SHA_DIGEST_LENGTH); |
SHA1_Update(&c, subdir->sha1, SHA_DIGEST_LENGTH); |
992 |
999 |
|
|
993 |
1000 |
base_name = basename(subdir->name); |
base_name = basename(subdir->name); |
|
... |
... |
static unsigned long long dir_find_dups_populate_list(struct dir_node **u, |
1055 |
1062 |
int dir_find_dups(void) |
int dir_find_dups(void) |
1056 |
1063 |
{ |
{ |
1057 |
1064 |
unsigned long long mem, i, j, first, last; |
unsigned long long mem, i, j, first, last; |
1058 |
|
struct dir_node *d; |
|
1059 |
|
long long err; |
|
1060 |
1065 |
char dump[SHA_DIGEST_LENGTH * 2 + 1]; |
char dump[SHA_DIGEST_LENGTH * 2 + 1]; |
1061 |
1066 |
|
|
1062 |
1067 |
dir_chain_len = 0; |
dir_chain_len = 0; |
|
... |
... |
int dir_find_dups(void) |
1065 |
1070 |
fprintf(stderr, "[*] dir_find_dups...\n"); |
fprintf(stderr, "[*] dir_find_dups...\n"); |
1066 |
1071 |
|
|
1067 |
1072 |
for (i = 0; i < dir_info_count; i++) { |
for (i = 0; i < dir_info_count; i++) { |
|
1073 |
|
long long err; |
|
1074 |
|
|
1068 |
1075 |
err = dir_build_hash(dir_info[i]); |
err = dir_build_hash(dir_info[i]); |
1069 |
1076 |
if (err == -1) |
if (err == -1) |
1070 |
1077 |
return -1; |
return -1; |
|
... |
... |
int dir_find_dups(void) |
1085 |
1092 |
|
|
1086 |
1093 |
j = 0; |
j = 0; |
1087 |
1094 |
for (i = 0; i < dir_info_count; i++) { |
for (i = 0; i < dir_info_count; i++) { |
|
1095 |
|
struct dir_node *d; |
|
1096 |
|
|
1088 |
1097 |
d = dir_info[i]; |
d = dir_info[i]; |
1089 |
1098 |
j = dir_find_dups_populate_list(dir_chain, j, d); |
j = dir_find_dups_populate_list(dir_chain, j, d); |
1090 |
1099 |
|
|
|
... |
... |
__cold static void file_dump_duplicates(struct file_node *f, |
1343 |
1352 |
__cold void dump_duplicates(const unsigned long long min_size, const unsigned int zero) |
__cold void dump_duplicates(const unsigned long long min_size, const unsigned int zero) |
1344 |
1353 |
{ |
{ |
1345 |
1354 |
unsigned int i; |
unsigned int i; |
1346 |
|
struct dir_node *d; |
|
1347 |
1355 |
struct file_node *f; |
struct file_node *f; |
1348 |
1356 |
unsigned int hash; |
unsigned int hash; |
1349 |
1357 |
|
|
1350 |
1358 |
if (debug) |
if (debug) |
1351 |
1359 |
fprintf(stderr, "[*] Dump duplicated dirs...\n"); |
fprintf(stderr, "[*] Dump duplicated dirs...\n"); |
1352 |
1360 |
for (i = 0; i < dir_chain_len; i++) { |
for (i = 0; i < dir_chain_len; i++) { |
|
1361 |
|
struct dir_node *d; |
|
1362 |
|
|
1353 |
1363 |
if (debug) |
if (debug) |
1354 |
1364 |
fprintf(stderr, "\tdump_duplicates[%u]...\n", i); |
fprintf(stderr, "\tdump_duplicates[%u]...\n", i); |
1355 |
1365 |
d = dir_chain[i]; |
d = dir_chain[i]; |