File dupdump.c changed (mode: 100644) (index f10b7b7..b293669) |
9 |
9 |
#include <stdlib.h> |
#include <stdlib.h> |
10 |
10 |
#include <string.h> |
#include <string.h> |
11 |
11 |
#include <errno.h> |
#include <errno.h> |
|
12 |
|
#include <getopt.h> |
12 |
13 |
|
|
13 |
14 |
#include "store.h" |
#include "store.h" |
14 |
15 |
|
|
15 |
16 |
static off_t min_size = 0; |
static off_t min_size = 0; |
16 |
|
static int verbose = 10; |
|
|
17 |
|
static int verbose = 0; |
|
18 |
|
static int debug = 0; |
|
19 |
|
|
|
20 |
|
static struct option options[] = |
|
21 |
|
{ |
|
22 |
|
{"min-size", required_argument, NULL, 'i'}, |
|
23 |
|
{"verbose", no_argument, NULL, 'v'}, |
|
24 |
|
{"debug", no_argument, NULL, 'd'}, |
|
25 |
|
{NULL, 0, NULL, 0} |
|
26 |
|
}; |
|
27 |
|
|
|
28 |
|
static void usage(void) |
|
29 |
|
{ |
|
30 |
|
fprintf(stderr, "Usage [options] <dir1> [<dir2>] ...\n" |
|
31 |
|
" --min-size -i Ignore files under this size (default 1)\n" |
|
32 |
|
" --verbose -v Be more verbose\n" |
|
33 |
|
" --debug -d Print debug information\n" |
|
34 |
|
); |
|
35 |
|
} |
17 |
36 |
|
|
18 |
37 |
static int callback(const char *fpath, const struct stat *s, int tflag, |
static int callback(const char *fpath, const struct stat *s, int tflag, |
19 |
38 |
struct FTW *ftwbuf) |
struct FTW *ftwbuf) |
|
... |
... |
static int callback(const char *fpath, const struct stat *s, int tflag, |
32 |
51 |
if ((!S_ISREG(s->st_mode)) && (!S_ISDIR(s->st_mode))) |
if ((!S_ISREG(s->st_mode)) && (!S_ISDIR(s->st_mode))) |
33 |
52 |
return 0; |
return 0; |
34 |
53 |
|
|
35 |
|
/* Ignore wat was already seen */ |
|
|
54 |
|
/* Ignore what was already seen */ |
36 |
55 |
if (dev_ino_seen(tflag, s->st_dev, s->st_ino) == 1) { |
if (dev_ino_seen(tflag, s->st_dev, s->st_ino) == 1) { |
37 |
56 |
if (verbose >= 3) |
if (verbose >= 3) |
38 |
57 |
fprintf(stderr, "\tINFO: Object skiped because" |
fprintf(stderr, "\tINFO: Object skiped because" |
|
... |
... |
static int callback(const char *fpath, const struct stat *s, int tflag, |
62 |
81 |
|
|
63 |
82 |
int main(int argc, char *argv[]) |
int main(int argc, char *argv[]) |
64 |
83 |
{ |
{ |
65 |
|
int flags = 0, i; |
|
|
84 |
|
int flags = 0; |
66 |
85 |
int err; |
int err; |
67 |
|
|
|
68 |
|
if (argc < 2) { |
|
69 |
|
fprintf(stderr, "Usage: dumpdump dir1 [dir2] [dir3]\n"); |
|
70 |
|
return 1; |
|
|
86 |
|
int options_index = 0; |
|
87 |
|
char c; |
|
88 |
|
|
|
89 |
|
while ((c = getopt_long(argc, argv, "i:vdh", options, &options_index)) != -1) { |
|
90 |
|
switch (c) { |
|
91 |
|
case 'i': min_size = strtoul(optarg, NULL, 10); break; |
|
92 |
|
case 'v': verbose = 1; break; |
|
93 |
|
case 'd': debug = 1; break; |
|
94 |
|
default: |
|
95 |
|
usage(); |
|
96 |
|
return 1; |
|
97 |
|
} |
71 |
98 |
} |
} |
72 |
99 |
|
|
73 |
100 |
flags |= FTW_PHYS; /* Do not follow symlinks */ |
flags |= FTW_PHYS; /* Do not follow symlinks */ |
74 |
101 |
flags |= FTW_ACTIONRETVAL; /* To skip hierarchies */ |
flags |= FTW_ACTIONRETVAL; /* To skip hierarchies */ |
75 |
102 |
|
|
76 |
|
i = 1; |
|
77 |
|
while (argv[i]) { |
|
78 |
|
fprintf(stderr, "Processing dir %s...\n", argv[i]); |
|
79 |
|
err = nftw(argv[i], callback, 100, flags); |
|
|
103 |
|
if (optind >= argc) { |
|
104 |
|
usage(); |
|
105 |
|
fprintf(stderr, "No dirs to scan specified!\n"); |
|
106 |
|
return 1; |
|
107 |
|
} |
|
108 |
|
|
|
109 |
|
set_debug(debug); |
|
110 |
|
|
|
111 |
|
if (verbose) |
|
112 |
|
fprintf(stderr, "Scanning for duplicates, min-size %lld\n", |
|
113 |
|
min_size); |
|
114 |
|
|
|
115 |
|
while (optind < argc) { |
|
116 |
|
if (verbose) |
|
117 |
|
fprintf(stderr, "Processing dir %s...\n", argv[optind]); |
|
118 |
|
|
|
119 |
|
err = nftw(argv[optind], callback, 100, flags); |
80 |
120 |
if (err == -1) { |
if (err == -1) { |
81 |
121 |
fprintf(stderr, "Cannot search dir [%s] [%d] (%s)\n", |
fprintf(stderr, "Cannot search dir [%s] [%d] (%s)\n", |
82 |
|
argv[i], err, strerror(errno)); |
|
|
122 |
|
argv[optind], err, strerror(errno)); |
83 |
123 |
return 1; |
return 1; |
84 |
124 |
} |
} |
85 |
125 |
|
|
86 |
|
i++; |
|
|
126 |
|
optind++; |
87 |
127 |
} |
} |
88 |
128 |
|
|
89 |
|
if (verbose >= 2) |
|
|
129 |
|
if (debug) |
90 |
130 |
dump_files(); |
dump_files(); |
91 |
131 |
|
|
92 |
|
/* Check for file duplicates */ |
|
|
132 |
|
if (verbose) |
|
133 |
|
fprintf(stderr, "Find duplicate files...\n"); |
93 |
134 |
err = file_find_dups(); |
err = file_find_dups(); |
94 |
135 |
if (err != 0) { |
if (err != 0) { |
95 |
136 |
fprintf(stderr, "Error comparing files!\n"); |
fprintf(stderr, "Error comparing files!\n"); |
96 |
137 |
return 1; |
return 1; |
97 |
138 |
} |
} |
98 |
139 |
|
|
99 |
|
/* Check for dir duplicates */ |
|
|
140 |
|
if (verbose) |
|
141 |
|
fprintf(stderr, "Find duplicate dirs...\n"); |
100 |
142 |
err = dir_find_dups(); |
err = dir_find_dups(); |
101 |
143 |
if (err != 0) { |
if (err != 0) { |
102 |
144 |
fprintf(stderr, "Error comparing dirs!\n"); |
fprintf(stderr, "Error comparing dirs!\n"); |
103 |
145 |
return 1; |
return 1; |
104 |
146 |
} |
} |
105 |
147 |
|
|
106 |
|
dump_dirs(); |
|
|
148 |
|
if (debug) |
|
149 |
|
dump_dirs(); |
107 |
150 |
|
|
108 |
|
fprintf(stderr, "\nDUMP DUPLICATES...\n\n"); |
|
109 |
151 |
dump_duplicates(min_size); |
dump_duplicates(min_size); |
110 |
152 |
|
|
111 |
|
dump_stats(); |
|
|
153 |
|
if (verbose) |
|
154 |
|
dump_stats(); |
112 |
155 |
|
|
113 |
156 |
return 0; |
return 0; |
114 |
157 |
} |
} |
File store.c changed (mode: 100644) (index 0ca71a0..5d5b290) |
... |
... |
static unsigned int dir_info_count; |
37 |
37 |
static struct dir_node *dir_current[MAX_DEPTH]; |
static struct dir_node *dir_current[MAX_DEPTH]; |
38 |
38 |
static unsigned char sha1_zero[SHA_DIGEST_LENGTH]; |
static unsigned char sha1_zero[SHA_DIGEST_LENGTH]; |
39 |
39 |
static struct dev_ino *dev_ino_hash[DEV_INO_HASH_SIZE]; |
static struct dev_ino *dev_ino_hash[DEV_INO_HASH_SIZE]; |
|
40 |
|
static int debug = 0; |
|
41 |
|
|
|
42 |
|
/* ############### Misc functions ############### */ |
|
43 |
|
void set_debug(const unsigned int level) |
|
44 |
|
{ |
|
45 |
|
debug = level; |
|
46 |
|
} |
40 |
47 |
|
|
41 |
48 |
/* ############### Memory functions ############### */ |
/* ############### Memory functions ############### */ |
42 |
49 |
static void *xmalloc(size_t size) |
static void *xmalloc(size_t size) |
|
... |
... |
int compare_sha1(const unsigned char *a, const unsigned char *b) |
137 |
144 |
|
|
138 |
145 |
sha1_dump(sha1_a, a, 0); |
sha1_dump(sha1_a, a, 0); |
139 |
146 |
sha1_dump(sha1_b, a, 0); |
sha1_dump(sha1_b, a, 0); |
140 |
|
fprintf(stderr, "\t\tComparing [%s] with [%s]\n", sha1_a, sha1_b); |
|
141 |
147 |
return memcmp(a, b, SHA_DIGEST_LENGTH); |
return memcmp(a, b, SHA_DIGEST_LENGTH); |
142 |
148 |
} |
} |
143 |
149 |
|
|
|
... |
... |
static void dir_mark_do_not_dump(struct dir_node *d) |
484 |
490 |
struct file_node *file; |
struct file_node *file; |
485 |
491 |
struct dir_node *subdir; |
struct dir_node *subdir; |
486 |
492 |
|
|
487 |
|
fprintf(stderr, "DEBUG: dir_mark_do_not_dump(%s)\n", d->name); |
|
|
493 |
|
if (debug) |
|
494 |
|
fprintf(stderr, "DEBUG: dir_mark_do_not_dump(%s)\n", d->name); |
488 |
495 |
if ((d == NULL) || (d->do_not_dump == 1)) |
if ((d == NULL) || (d->do_not_dump == 1)) |
489 |
496 |
return; |
return; |
490 |
497 |
|
|
|
... |
... |
static void dir_mark_do_not_dump(struct dir_node *d) |
498 |
505 |
|
|
499 |
506 |
file = d->files; |
file = d->files; |
500 |
507 |
while (file) { |
while (file) { |
501 |
|
fprintf(stderr, "\tSet do_not_dump=1 on [%s]\n", file->name); |
|
|
508 |
|
if (debug) |
|
509 |
|
fprintf(stderr, "\tSet do_not_dump=1 on [%s]\n", file->name); |
502 |
510 |
file->do_not_dump = 1; |
file->do_not_dump = 1; |
503 |
511 |
file = file->next; |
file = file->next; |
504 |
512 |
} |
} |
|
... |
... |
static void file_mark_no_dup_possible(struct file_node *f) |
539 |
547 |
dir_mark_no_dup_possible(f->parent); |
dir_mark_no_dup_possible(f->parent); |
540 |
548 |
} |
} |
541 |
549 |
|
|
542 |
|
/* |
|
543 |
|
* Mark a file to not be dumped |
|
544 |
|
*/ |
|
545 |
|
static void file_mark_do_not_dump(struct file_node *f) |
|
546 |
|
{ |
|
547 |
|
if ((f == NULL) || (f->do_not_dump == 1)) |
|
548 |
|
return; |
|
549 |
|
|
|
550 |
|
f->do_not_dump = 1; |
|
551 |
|
} |
|
552 |
|
|
|
553 |
550 |
/* |
/* |
554 |
551 |
* Compare the same size files using hashes |
* Compare the same size files using hashes |
555 |
552 |
* @a - start of the chain of files with same size |
* @a - start of the chain of files with same size |
|
... |
... |
static int compare_file_range(struct file_node *a, struct file_node *b) |
562 |
559 |
int err; |
int err; |
563 |
560 |
struct file_node *q, *p, *dups, *p_last; |
struct file_node *q, *p, *dups, *p_last; |
564 |
561 |
|
|
565 |
|
fprintf(stderr, "compare_file_range:"); |
|
566 |
|
q = a; |
|
567 |
|
while (q != b->hash_next) { |
|
568 |
|
fprintf(stderr, " %s", q->name); |
|
569 |
|
q = q->hash_next; |
|
|
562 |
|
if (debug) { |
|
563 |
|
fprintf(stderr, "compare_file_range:"); |
|
564 |
|
q = a; |
|
565 |
|
while (q != b->hash_next) { |
|
566 |
|
fprintf(stderr, " %s", q->name); |
|
567 |
|
q = q->hash_next; |
|
568 |
|
} |
|
569 |
|
fprintf(stderr, ".\n"); |
570 |
570 |
} |
} |
571 |
|
fprintf(stderr, ".\n"); |
|
572 |
571 |
|
|
573 |
572 |
/* Mark all as unique */ |
/* Mark all as unique */ |
574 |
573 |
q = a; |
q = a; |
|
... |
... |
static int compare_file_range(struct file_node *a, struct file_node *b) |
592 |
591 |
} |
} |
593 |
592 |
|
|
594 |
593 |
err = compare_files(p, q); |
err = compare_files(p, q); |
595 |
|
fprintf(stderr, "COMPARING [%s] with [%s] = %d\n", p->name, q->name, err); |
|
|
594 |
|
if (debug) |
|
595 |
|
fprintf(stderr, "COMPARING [%s] with [%s] = %d\n", p->name, q->name, err); |
596 |
596 |
if (err == -1) |
if (err == -1) |
597 |
597 |
return -1; |
return -1; |
598 |
598 |
|
|
|
... |
... |
static int compare_file_range(struct file_node *a, struct file_node *b) |
607 |
607 |
p_last->duplicates = q; |
p_last->duplicates = q; |
608 |
608 |
p_last = q; |
p_last = q; |
609 |
609 |
|
|
610 |
|
fprintf(stderr, "\tp[%s]->duplicates: ", p->name); |
|
611 |
|
dups = p->duplicates; |
|
612 |
|
while (dups) { |
|
613 |
|
fprintf(stderr, " %s", dups->name); |
|
614 |
|
dups = dups->duplicates; |
|
|
610 |
|
if (debug) { |
|
611 |
|
fprintf(stderr, "\tp[%s]->duplicates: ", p->name); |
|
612 |
|
dups = p->duplicates; |
|
613 |
|
while (dups) { |
|
614 |
|
fprintf(stderr, " %s", dups->name); |
|
615 |
|
dups = dups->duplicates; |
|
616 |
|
} |
|
617 |
|
fprintf(stderr, "\n"); |
615 |
618 |
} |
} |
616 |
|
fprintf(stderr, "\n"); |
|
617 |
619 |
|
|
618 |
620 |
p->unique = 0; |
p->unique = 0; |
619 |
621 |
q->unique = 0; |
q->unique = 0; |
|
... |
... |
int file_find_dups(void) |
648 |
650 |
unsigned int hash; |
unsigned int hash; |
649 |
651 |
unsigned long long size; |
unsigned long long size; |
650 |
652 |
|
|
651 |
|
fprintf(stderr, "file_find_dups START...\n"); |
|
652 |
653 |
for (hash = 0; hash < HASH_SIZE; hash++) { |
for (hash = 0; hash < HASH_SIZE; hash++) { |
653 |
654 |
if (file_info[hash] == NULL) |
if (file_info[hash] == NULL) |
654 |
655 |
continue; |
continue; |
655 |
656 |
|
|
656 |
|
fprintf(stderr, "file_find_dups[%u]...\n", hash); |
|
|
657 |
|
if (debug) |
|
658 |
|
fprintf(stderr, "file_find_dups[%u]...\n", hash); |
657 |
659 |
|
|
658 |
660 |
/* We need at least 2 nodes */ |
/* We need at least 2 nodes */ |
659 |
661 |
if (file_info[hash]->hash_next == NULL) { |
if (file_info[hash]->hash_next == NULL) { |
|
... |
... |
int file_find_dups(void) |
685 |
687 |
first = last->hash_next; |
first = last->hash_next; |
686 |
688 |
} |
} |
687 |
689 |
|
|
688 |
|
fprintf(stderr, "Dump chain %u: ", hash); |
|
689 |
|
q = file_info[hash]; |
|
690 |
|
while (q) { |
|
691 |
|
fprintf(stderr, "%s(", q->name); |
|
692 |
|
dups = q->duplicates; |
|
693 |
|
while(dups) { |
|
694 |
|
fprintf(stderr, " %s", dups->name); |
|
695 |
|
dups = dups->duplicates; |
|
|
690 |
|
if (debug) { |
|
691 |
|
fprintf(stderr, "Dump chain %u: ", hash); |
|
692 |
|
q = file_info[hash]; |
|
693 |
|
while (q) { |
|
694 |
|
fprintf(stderr, "%s(", q->name); |
|
695 |
|
dups = q->duplicates; |
|
696 |
|
while(dups) { |
|
697 |
|
fprintf(stderr, " %s", dups->name); |
|
698 |
|
dups = dups->duplicates; |
|
699 |
|
} |
|
700 |
|
fprintf(stderr, ") -> "); |
|
701 |
|
q = q->hash_next; |
696 |
702 |
} |
} |
697 |
|
fprintf(stderr, ") -> "); |
|
698 |
|
q = q->hash_next; |
|
|
703 |
|
fprintf(stderr, "\n"); |
699 |
704 |
} |
} |
700 |
|
fprintf(stderr, "\n"); |
|
701 |
705 |
} |
} |
702 |
706 |
|
|
703 |
|
fprintf(stderr, "file_find_dups ENDS...\n"); |
|
704 |
707 |
return 0; |
return 0; |
705 |
708 |
} |
} |
706 |
709 |
|
|
|
... |
... |
static int dir_files_hash(unsigned char *hash, struct dir_node *d) |
738 |
741 |
struct file_node **u; |
struct file_node **u; |
739 |
742 |
unsigned int i, mem; |
unsigned int i, mem; |
740 |
743 |
SHA_CTX c; |
SHA_CTX c; |
741 |
|
char dump[SHA_DIGEST_LENGTH * 2 + 1]; |
|
742 |
744 |
|
|
743 |
745 |
if (d->files == NULL) { |
if (d->files == NULL) { |
744 |
746 |
memset(hash, 0, SHA_DIGEST_LENGTH); |
memset(hash, 0, SHA_DIGEST_LENGTH); |
|
... |
... |
static int dir_files_hash(unsigned char *hash, struct dir_node *d) |
760 |
762 |
|
|
761 |
763 |
qsort(u, d->no_of_files, sizeof(struct file_node *), file_compare_hashes); |
qsort(u, d->no_of_files, sizeof(struct file_node *), file_compare_hashes); |
762 |
764 |
|
|
763 |
|
/* |
|
764 |
|
fprintf(stderr, "DEBUG: dump after qsort [%s]\n", d->name); |
|
765 |
|
for (i = 0; i < d->no_of_files; i++) { |
|
766 |
|
sha1_dump(dump, u[i]->sha1_full, 0); |
|
767 |
|
fprintf(stderr, "DEBUG: %s\t%u\t%s\n", dump, u[i]->parent->level, u[i]->name); |
|
768 |
|
} |
|
769 |
|
*/ |
|
770 |
|
|
|
771 |
765 |
SHA1_Init(&c); |
SHA1_Init(&c); |
772 |
766 |
|
|
773 |
767 |
i = 0; |
i = 0; |
|
... |
... |
static long long dir_build_hash(struct dir_node *d) |
795 |
789 |
long long no_of_possible_dirs = 0; |
long long no_of_possible_dirs = 0; |
796 |
790 |
long long ret; |
long long ret; |
797 |
791 |
|
|
798 |
|
fprintf(stderr, "DEBUG: %s [%s] no_dup_possible=%u\n", |
|
799 |
|
__FUNCTION__, d->name, d->no_dup_possible); |
|
|
792 |
|
if (debug) |
|
793 |
|
fprintf(stderr, "DEBUG: %s [%s] no_dup_possible=%u\n", |
|
794 |
|
__FUNCTION__, d->name, d->no_dup_possible); |
800 |
795 |
|
|
801 |
796 |
/* We check current dir first. */ |
/* We check current dir first. */ |
802 |
797 |
if (d->no_dup_possible == 0) |
if (d->no_dup_possible == 0) |
|
... |
... |
static long long dir_find_dups_populate_list(struct dir_node **u, |
877 |
872 |
struct dir_node *subdir; |
struct dir_node *subdir; |
878 |
873 |
long long new_pos; |
long long new_pos; |
879 |
874 |
|
|
880 |
|
/* |
|
881 |
|
fprintf(stderr, "\tDEBUG: ENTER %s [%s] pos=%lld\n", |
|
882 |
|
__FUNCTION__, d->name, pos); |
|
883 |
|
*/ |
|
884 |
|
|
|
885 |
875 |
new_pos = pos; |
new_pos = pos; |
886 |
876 |
|
|
887 |
877 |
/* We check current dir first. */ |
/* We check current dir first. */ |
|
... |
... |
static long long dir_find_dups_populate_list(struct dir_node **u, |
896 |
886 |
subdir = subdir->next_sibling; |
subdir = subdir->next_sibling; |
897 |
887 |
} |
} |
898 |
888 |
|
|
899 |
|
/* |
|
900 |
|
fprintf(stderr, "\tDEBUG: EXIT %s [%s] new_pos=%lld\n", |
|
901 |
|
__FUNCTION__, d->name, new_pos); |
|
902 |
|
*/ |
|
903 |
|
|
|
904 |
889 |
return new_pos; |
return new_pos; |
905 |
890 |
} |
} |
906 |
891 |
|
|
|
... |
... |
int dir_find_dups(void) |
920 |
905 |
struct dir_node **u; |
struct dir_node **u; |
921 |
906 |
char dump[SHA_DIGEST_LENGTH * 2 + 1]; |
char dump[SHA_DIGEST_LENGTH * 2 + 1]; |
922 |
907 |
|
|
923 |
|
fprintf(stderr, "DEBUG: %s...\n", __FUNCTION__); |
|
924 |
908 |
for (i = 0; i < dir_info_count; i++) { |
for (i = 0; i < dir_info_count; i++) { |
925 |
|
fprintf(stderr, "\tDEBUG: [%llu] build hash for [%s]...\n", i, dir_info[i]->name); |
|
926 |
909 |
err = dir_build_hash(dir_info[i]); |
err = dir_build_hash(dir_info[i]); |
927 |
910 |
if (err == -1) |
if (err == -1) |
928 |
911 |
return -1; |
return -1; |
929 |
912 |
|
|
930 |
913 |
no_of_possible_dirs += err; |
no_of_possible_dirs += err; |
931 |
914 |
} |
} |
932 |
|
fprintf(stderr, "\tDEBUG: no_of_possible_dirs = %lld\n", no_of_possible_dirs); |
|
933 |
915 |
|
|
934 |
916 |
/* Allocate an array that we will pass to qsort */ |
/* Allocate an array that we will pass to qsort */ |
935 |
917 |
mem = no_of_possible_dirs * sizeof(struct dir_node *); |
mem = no_of_possible_dirs * sizeof(struct dir_node *); |
|
... |
... |
int dir_find_dups(void) |
943 |
925 |
j = 0; |
j = 0; |
944 |
926 |
for (i = 0; i < dir_info_count; i++) { |
for (i = 0; i < dir_info_count; i++) { |
945 |
927 |
d = dir_info[i]; |
d = dir_info[i]; |
946 |
|
fprintf(stderr, "dir_find_dups[i=%llu, j=%lld] [%s]\n", i, j, d->name); |
|
|
928 |
|
if (debug) |
|
929 |
|
fprintf(stderr, "dir_find_dups[i=%llu, j=%lld] [%s]\n", i, j, d->name); |
947 |
930 |
|
|
948 |
931 |
j += dir_find_dups_populate_list(u, j, d); |
j += dir_find_dups_populate_list(u, j, d); |
949 |
932 |
|
|
|
... |
... |
int dir_find_dups(void) |
952 |
935 |
break; |
break; |
953 |
936 |
} |
} |
954 |
937 |
|
|
955 |
|
fprintf(stderr, "dir u (j=%lld): ", j); |
|
956 |
|
for (i = 0; i < no_of_possible_dirs; i++) |
|
957 |
|
fprintf(stderr, "[%lld]=%s ", i, u[i]->name); |
|
958 |
|
fprintf(stderr, "\n"); |
|
|
938 |
|
if (debug) { |
|
939 |
|
fprintf(stderr, "dir u (j=%lld): ", j); |
|
940 |
|
for (i = 0; i < no_of_possible_dirs; i++) |
|
941 |
|
fprintf(stderr, "[%lld]=%s ", i, u[i]->name); |
|
942 |
|
fprintf(stderr, "\n"); |
|
943 |
|
} |
959 |
944 |
|
|
960 |
945 |
/* Order by hash */ |
/* Order by hash */ |
961 |
946 |
qsort(u, no_of_possible_dirs, sizeof(struct dir_node *), dir_compare_hashes); |
qsort(u, no_of_possible_dirs, sizeof(struct dir_node *), dir_compare_hashes); |
962 |
947 |
|
|
963 |
|
fprintf(stderr, "DEBUG: dump after dir qsort [%s]\n", d->name); |
|
964 |
|
for (i = 0; i < no_of_possible_dirs; i++) { |
|
965 |
|
sha1_dump(dump, u[i]->sha1, 0); |
|
966 |
|
fprintf(stderr, "DEBUG: %s\t%u\t%s\n", dump, u[i]->level, u[i]->name); |
|
|
948 |
|
if (debug) { |
|
949 |
|
fprintf(stderr, "DEBUG: dump after dir qsort [%s]\n", d->name); |
|
950 |
|
for (i = 0; i < no_of_possible_dirs; i++) { |
|
951 |
|
sha1_dump(dump, u[i]->sha1, 0); |
|
952 |
|
fprintf(stderr, "DEBUG: %s\t%u\t%s\n", dump, u[i]->level, u[i]->name); |
|
953 |
|
} |
967 |
954 |
} |
} |
968 |
955 |
|
|
969 |
956 |
first = 0; |
first = 0; |
|
... |
... |
void dir_dump_duplicates(struct dir_node *d) |
1028 |
1015 |
} |
} |
1029 |
1016 |
|
|
1030 |
1017 |
dir_mark_left(d); |
dir_mark_left(d); |
1031 |
|
fprintf(stderr, "dir_dump_duplicates: set do_not_dump on left [%s]\n", d->name); |
|
|
1018 |
|
if (debug) |
|
1019 |
|
fprintf(stderr, "dir_dump_duplicates: set do_not_dump on left [%s]\n", d->name); |
1032 |
1020 |
dir_mark_do_not_dump(d); |
dir_mark_do_not_dump(d); |
1033 |
1021 |
|
|
1034 |
|
fprintf(stderr, "dir_dump_duplicates: set do_not_dump on right [%s]\n", p->name); |
|
|
1022 |
|
if (debug) |
|
1023 |
|
fprintf(stderr, "dir_dump_duplicates: set do_not_dump on right [%s]\n", p->name); |
1035 |
1024 |
dir_mark_do_not_dump(p); |
dir_mark_do_not_dump(p); |
1036 |
1025 |
|
|
1037 |
|
fprintf(stderr, "DIR\t%s\t%s\n", |
|
1038 |
|
d->name, p->name); |
|
1039 |
1026 |
printf("DIR\t%s\t%s\n", |
printf("DIR\t%s\t%s\n", |
1040 |
1027 |
d->name, p->name); |
d->name, p->name); |
1041 |
1028 |
p = p->hash_next; |
p = p->hash_next; |
|
... |
... |
void file_dump_duplicates(struct file_node *f, |
1050 |
1037 |
{ |
{ |
1051 |
1038 |
struct file_node *p, *first_left; |
struct file_node *p, *first_left; |
1052 |
1039 |
|
|
1053 |
|
fprintf(stderr, "\tfile_dump_duplicates [%s]\n", f->name); |
|
1054 |
|
file_dump_node(f, 1); |
|
|
1040 |
|
if (debug) |
|
1041 |
|
file_dump_node(f, 1); |
1055 |
1042 |
|
|
1056 |
1043 |
if (f->duplicates == NULL) { |
if (f->duplicates == NULL) { |
1057 |
|
fprintf(stderr, "\tignore duplicate file because ->duplicates is NULL\n"); |
|
|
1044 |
|
if (debug) |
|
1045 |
|
fprintf(stderr, "\tignore duplicate file because ->duplicates is NULL\n"); |
1058 |
1046 |
return; |
return; |
1059 |
1047 |
} |
} |
1060 |
1048 |
|
|
1061 |
1049 |
if (f->no_dup_possible == 1) { |
if (f->no_dup_possible == 1) { |
1062 |
|
fprintf(stderr, "\tignore duplicate file because no_dup_possible=1\n"); |
|
|
1050 |
|
if (debug) |
|
1051 |
|
fprintf(stderr, "\tignore duplicate file because no_dup_possible=1\n"); |
1063 |
1052 |
return; |
return; |
1064 |
1053 |
} |
} |
1065 |
1054 |
|
|
1066 |
1055 |
if (f->do_not_dump == 1) { |
if (f->do_not_dump == 1) { |
1067 |
|
fprintf(stderr, "\tignore duplicate file because do_not_dump=1\n"); |
|
|
1056 |
|
if (debug) |
|
1057 |
|
fprintf(stderr, "\tignore duplicate file because do_not_dump=1\n"); |
1068 |
1058 |
return; |
return; |
1069 |
1059 |
} |
} |
1070 |
1060 |
|
|
1071 |
1061 |
if (f->size < min_size) { |
if (f->size < min_size) { |
1072 |
|
fprintf(stderr, "\tignore duplicate file because size < min\n"); |
|
|
1062 |
|
if (debug) |
|
1063 |
|
fprintf(stderr, "\tignore duplicate file because size < min\n"); |
1073 |
1064 |
return; |
return; |
1074 |
1065 |
} |
} |
1075 |
1066 |
|
|
|
... |
... |
void file_dump_duplicates(struct file_node *f, |
1085 |
1076 |
p = p->duplicates; |
p = p->duplicates; |
1086 |
1077 |
} |
} |
1087 |
1078 |
} |
} |
1088 |
|
fprintf(stderr, "first_left = [%s]\n", first_left->name); |
|
|
1079 |
|
if (debug) |
|
1080 |
|
fprintf(stderr, "first_left = [%s]\n", first_left->name); |
1089 |
1081 |
|
|
1090 |
1082 |
/* now, dump */ |
/* now, dump */ |
1091 |
1083 |
p = f; |
p = f; |
|
... |
... |
void file_dump_duplicates(struct file_node *f, |
1095 |
1087 |
* it for dirs. |
* it for dirs. |
1096 |
1088 |
*/ |
*/ |
1097 |
1089 |
if (p->do_not_dump == 1) { |
if (p->do_not_dump == 1) { |
1098 |
|
fprintf(stderr, "\t\tignore duplicate file in chain because do_not_dump=1 [%s]\n", p->name); |
|
|
1090 |
|
if (debug) |
|
1091 |
|
fprintf(stderr, "\t\tignore duplicate file in chain because do_not_dump=1 [%s]\n", p->name); |
1099 |
1092 |
p = p->duplicates; |
p = p->duplicates; |
1100 |
1093 |
continue; |
continue; |
1101 |
1094 |
} |
} |
|
... |
... |
void file_dump_duplicates(struct file_node *f, |
1105 |
1098 |
continue; |
continue; |
1106 |
1099 |
} |
} |
1107 |
1100 |
|
|
1108 |
|
fprintf(stderr, "Because we will dump [%s] as a left, set do_not_dump=1\n", first_left->name); |
|
|
1101 |
|
if (debug) |
|
1102 |
|
fprintf(stderr, "Because we will dump [%s] as a left, set do_not_dump=1\n", first_left->name); |
1109 |
1103 |
first_left->left = 1; |
first_left->left = 1; |
1110 |
1104 |
first_left->do_not_dump = 1; |
first_left->do_not_dump = 1; |
1111 |
1105 |
|
|
1112 |
1106 |
/* Prevent this file to appear again in the dump */ |
/* Prevent this file to appear again in the dump */ |
1113 |
|
fprintf(stderr, "Because [%s] is a right, set do_not_dump=1\n", p->name); |
|
|
1107 |
|
if (debug) |
|
1108 |
|
fprintf(stderr, "Because [%s] is a right, set do_not_dump=1\n", p->name); |
1114 |
1109 |
p->do_not_dump = 1; |
p->do_not_dump = 1; |
1115 |
1110 |
|
|
1116 |
|
fprintf(stderr, "\t\t\t%s = %s\n", |
|
1117 |
|
first_left->name, p->name); |
|
1118 |
1111 |
printf("FILE\t%s\t%s\n", |
printf("FILE\t%s\t%s\n", |
1119 |
1112 |
first_left->name, p->name); |
first_left->name, p->name); |
1120 |
1113 |
p = p->duplicates; |
p = p->duplicates; |
|
... |
... |
void dump_duplicates(const unsigned long long min_size) |
1132 |
1125 |
struct file_node *f; |
struct file_node *f; |
1133 |
1126 |
unsigned int hash; |
unsigned int hash; |
1134 |
1127 |
|
|
1135 |
|
fprintf(stderr, "Dump duplicates (bigger than %llu)...\n", min_size); |
|
1136 |
|
|
|
1137 |
1128 |
for (i = 0; i < dir_info_count; i++) { |
for (i = 0; i < dir_info_count; i++) { |
1138 |
|
fprintf(stderr, "\tdump_duplicates[%u]...\n", i); |
|
|
1129 |
|
if (debug) |
|
1130 |
|
fprintf(stderr, "\tdump_duplicates[%u]...\n", i); |
1139 |
1131 |
d = dir_info[i]; |
d = dir_info[i]; |
1140 |
1132 |
dir_dump_duplicates(d); |
dir_dump_duplicates(d); |
1141 |
1133 |
|
|
|
... |
... |
void dump_duplicates(const unsigned long long min_size) |
1147 |
1139 |
} |
} |
1148 |
1140 |
|
|
1149 |
1141 |
/* Now, we dump remaining files */ |
/* Now, we dump remaining files */ |
1150 |
|
fprintf(stderr, "DEBUG: Dump duplicated files...\n"); |
|
|
1142 |
|
if (debug) |
|
1143 |
|
fprintf(stderr, "DEBUG: Dump duplicated files...\n"); |
1151 |
1144 |
for (hash = 0; hash < HASH_SIZE; hash++) { |
for (hash = 0; hash < HASH_SIZE; hash++) { |
1152 |
1145 |
if (file_info[hash] == NULL) |
if (file_info[hash] == NULL) |
1153 |
1146 |
continue; |
continue; |
1154 |
1147 |
|
|
1155 |
|
fprintf(stderr, "Dump duplicates in hash %u\n", hash); |
|
|
1148 |
|
if (debug) |
|
1149 |
|
fprintf(stderr, "Dump duplicates in hash %u\n", hash); |
1156 |
1150 |
|
|
1157 |
1151 |
f = file_info[hash]; |
f = file_info[hash]; |
1158 |
1152 |
while (f) { |
while (f) { |