/dupdump.c (c59d9bbf4076703d2ffc82502f91595393199bce) (3981 bytes) (mode 100644) (type blob)

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE

#include <ftw.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <time.h>
#include <sys/time.h>

#include "store.h"

static unsigned long long	min_size = 0;
static unsigned short		verbose = 0;
static unsigned short		debug = 0;
static unsigned int		zero = 0;
static char			*out_file;

static struct option options[] =
{
	{"zero",	no_argument,		NULL,	'z'},
	{"min-size",	required_argument,	NULL,	'i'},
	{"out",		required_argument,	NULL,	'o'},
	{"verbose",	no_argument,		NULL,	'v'},
	{"debug",	no_argument,		NULL,	'd'},
	{NULL,		0,			NULL,	0}
};

static void usage(void)
{
	fprintf(stderr, "Usage: dupdump [options] <dir1> [<dir2>] ...\n"
		"	--zero		-z	Use \\0 to separate columns\n"
		"	--min-size	-i	Ignore files under this size (default 1)\n"
		"	--out		-o	Where to store results (default stdout)\n"
		"	--verbose	-v	Be more verbose (can be specified multiple times)\n"
		"	--debug		-d	Print debug information (can be specified multiple times)\n"
		);
}

/*
 * Callback for nftw function
 */
static int callback(const char *fpath, const struct stat *s, int tflag,
	struct FTW *ftwbuf)
{
	int err;

	if (verbose >= 2)
		fprintf(stderr, "%08x %s size=%jd base=%d level=%d dev=%lu ino=%lu\n",
			tflag, fpath,
			s->st_size,
			ftwbuf->base, ftwbuf->level,
			s->st_dev,
			s->st_ino);

	/* Allow only normal files and dirs */
	if ((!S_ISREG(s->st_mode)) && (!S_ISDIR(s->st_mode)))
		return 0;

	/* Add dir */
	if (tflag == FTW_D) {
		err = dir_add(fpath, s, ftwbuf->level);
		if (err) {
			fprintf(stderr, "ERROR: Probably out of memory!\n");
			return FTW_STOP;
		}

		return FTW_CONTINUE;
	}

	err = file_add(fpath, s, ftwbuf->level);
	if (err) {
		fprintf(stderr, "ERROR: Cannot add file!\n");
		return FTW_STOP;
	}

	return FTW_CONTINUE;
}

int main(int argc, char *argv[])
{
	int flags = 0;
	int err;
	int options_index = 0;
	int c;
	FILE *out;
	struct timeval start, end;

	while ((c = getopt_long(argc, argv, "zi:o:vdh", options, &options_index)) != -1) {
		switch (c) {
		case 'z': zero = 1; break;
		case 'i': min_size = strtoull(optarg, NULL, 10); break;
		case 'o': out_file = optarg; break;
		case 'v': verbose++; break;
		case 'd': debug++; break;
		default:
			usage();
			return 1;
		}
	}

	if (out_file == NULL) {
		out = stdout;
	} else {
		out = fopen(out_file, "w");
		if (out == NULL) {
			fprintf(stderr, "Cannot open results file (%s)!\n",
				strerror(errno));
			return 1;
		}
	}

	flags |= FTW_PHYS; /* Do not follow symlinks */
	flags |= FTW_ACTIONRETVAL; /* To skip hierarchies */

	if (optind >= argc) {
		usage();
		fprintf(stderr, "Error: no dirs to scan specified!\n");
		fclose(out);
		return 1;
	}

	set_debug(debug);
	set_out(out);

	if (verbose)
		fprintf(stderr, "Scanning for duplicates, min-size %llu\n",
			min_size);

	gettimeofday(&start, NULL);
	while (optind < argc) {
		if (verbose)
			fprintf(stderr, "Processing dir %s...\n", argv[optind]);

		err = nftw(argv[optind], callback, 100, flags);
		if (err == -1) {
			fprintf(stderr, "Error: cannot search dir [%s] [%d] (%s)\n",
				argv[optind], err, strerror(errno));
			return 1;
		}

		optind++;
	}

	if (debug > 2)
		dump_files();

	if (verbose)
		fprintf(stderr, "Finding duplicate files...\n");
	err = file_find_dups();
	if (err != 0) {
		fprintf(stderr, "Error comparing files!\n");
		return 1;
	}

	if (verbose)
		fprintf(stderr, "Finding duplicate dirs...\n");
	err = dir_find_dups();
	if (err != 0) {
		fprintf(stderr, "Error in finding dups procedure!\n");
		return 1;
	}

	if (debug > 2)
		dump_dirs();

	dump_duplicates(min_size, zero);

	gettimeofday(&end, NULL);

	if (verbose) {
		time_t diff;

		dump_stats();

		diff = end.tv_sec - start.tv_sec;
		fprintf(stderr, "[*] Time: %luh%lum%lus.\n",
			diff / 3600, (diff % 3600) / 60, diff % 60);
	}

	dev_ino_seen_clean();

	return 0;
}


Mode Type Size Ref File
100644 blob 20 85940595c7c3a70ebc0bd5da9b35bc6b6a16a71a .exclude
100644 blob 105 9e50f3bfb5cc392fa65019aef80cab5093162bd2 .gitignore
100644 blob 35147 94a9ed024d3859793618152ea559a168bbcbb5e2 LICENSE
100644 blob 635 5ec5fadb5ab8ec7839ca5f11414aa2a855cffa03 Makefile.in
100644 blob 2627 9f4bbb9647e9fea4e861fa9a04bf32a716a2da05 README
100644 blob 2216 4699616f54bc9be1acd4b252ddd76b75e9eeb48a TODO
100755 blob 31 382d4ea2c0c98b1b25ea01f1e194cfc4990ac527 configure
100755 blob 15674 c93b35dad5dedf498b90aafcbf409a4844b1bc8c duilder
100644 blob 807 741ea33bf42f98943be21be26fc7e1b6b38d8378 duilder.conf
100644 blob 2040 22eee88f6126c7effa781bcb8fde0c58ca487731 dupdump.1
100644 blob 3981 c59d9bbf4076703d2ffc82502f91595393199bce dupdump.c
100644 blob 805 a992c9f287eb58cd910aca63c6e009526ec2595f dupdump.spec.in
100755 blob 205 677395e91b18c8272dc795ace0d17ec5610e2d70 process.sh
100644 blob 30737 8f737a70836f0180a635351bfd342d2d0efbfe89 store.c
100644 blob 1916 113ca447b857e1890ad0db35a95a06849330b8db store.h
040000 tree - 2f1796ebce0f596969d86738ee6b635521296929 tests
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/catalinux/dupdump

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/catalinux/dupdump

Clone this repository using git:
git clone git://git.rocketgit.com/user/catalinux/dupdump

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main