PATCH: du faster link checking, h/k fixes

Ulf Lilleengen lulf at kerneled.org
Fri Jul 2 16:42:41 PDT 2004


This patch has it origin in http://www.freebsd.org/cgi/getmsg.cgi?fetch=960969+0+/usr/local/www/db/text/2004/cvs-all/20040516.cvs-all from the pending merges list, no. 2200.

The patch includes:
- Faster link checking
- Better ANSI code
- -h/-k interaction fixes

Diff follows

- Ulf Lilleengen
--- du.c	2004-07-03 01:32:23.000000000 +0000
+++ du_new.c	2004-07-03 01:29:41.000000000 +0000
@@ -72,8 +72,8 @@
 #define	TERA_SI_SZ (TERA_SZ(1000ULL))
 #define	PETA_SI_SZ (PETA_SZ(1000ULL))
 
-#define	HASHSIZE	256		/* power of 2 only */
-#define HASHMASK	(HASHSIZE - 1)
+/*#define	HASHSIZE	256	*/	/* power of 2 only */
+/*#define HASHMASK	(HASHSIZE - 1)*/
 
 unsigned long long vals_si [] = {1, KILO_SI_SZ, MEGA_SI_SZ, GIGA_SI_SZ, TERA_SI_SZ, PETA_SI_SZ};
 unsigned long long vals_base2[] = {1, KILO_2_SZ, MEGA_2_SZ, GIGA_2_SZ, TERA_2_SZ, PETA_2_SZ};
@@ -89,7 +89,7 @@
 	SLIST_ENTRY(ignentry)	next;
 };
 
-int		linkchk(FTSENT *);
+static int		linkchk(FTSENT *);
 static void	usage(void);
 void		prthumanval(double);
 unit_t		unit_adjust(double *);
@@ -108,6 +108,7 @@
 	int		depth;
 	int		Hflag, Lflag, Pflag, aflag, sflag, dflag, cflag, hflag, ch, notused, rval;
 	char 		**save;
+	static char		dot[] = ".";
 
 	Hflag = Lflag = Pflag = aflag = sflag = dflag = cflag = hflag = 0;
 	
@@ -158,8 +159,8 @@
 				valp = vals_base2;
 				break;
 			case 'k':
-				if (!hflag)
-					putenv("BLOCKSIZE=1024");
+				hflag = 0;
+				putenv("BLOCKSIZE=1024");
 				break;
 			case 'r':		 /* Compatibility. */
 				break;
@@ -216,7 +217,7 @@
 
 	if (!*argv) {
 		argv = save;
-		argv[0] = ".";
+		argv[0] = dot;
 		argv[1] = NULL;
 	}
 
@@ -274,7 +275,7 @@
 						(void) printf("\t%s\n", p->fts_path);
 					} else {
 						(void) printf("%qd\t%s\n",
-							howmany(p->fts_statp->st_blocks, blocksize),
+							(long long)howmany(p->fts_statp->st_blocks, blocksize),
 							p->fts_path);
 					}
 				}
@@ -300,47 +301,136 @@
 	exit(rval);
 }
 
-
-typedef struct _ID {
-	dev_t	dev;
-	ino_t	inode;
-} ID;
-
-
-int
+static int
 linkchk(FTSENT *p)
 {
-	static ID **files;
-	static int *maxfiles, *nfiles;
-	static ID *filesph[HASHSIZE];
-	static int maxfilesh[HASHSIZE], nfilesh[HASHSIZE];
-	ID *fp, *start;
-	ino_t ino;
-	dev_t dev;
-	int index;
-
-	ino = p->fts_statp->st_ino;
-	index = ino & HASHMASK;
-	files = &filesph[index];
-	maxfiles = &maxfilesh[index];
-	nfiles = &nfilesh[index];
+	struct links_entry {
+		struct links_entry *next;
+		struct links_entry *previous;
+		int		links;
+		dev_t		dev;
+		ino_t		ino;
+	};
+
+	static const size_t links_hash_initial_size = 8192;
+	static struct links_entry **buckets;
+	static struct links_entry *free_list;
+	static size_t number_buckets;
+	static unsigned long number_entries;
+	static char stop_allocating;
+	struct links_entry *le, **new_buckets;
+	struct stat *st;
+	size_t i, new_size;
+	int count, hash;
+
+	st = p->fts_statp;
+
+	/* If necessary, initialize the hash table. */
+	if (buckets == NULL) {
+		number_buckets = links_hash_initial_size;
+		buckets = malloc(number_buckets * sizeof(buckets[0]));
+		if (buckets == NULL)
+			errx(1, "No memory for hardlink detection");
+		for (i = 0; i < number_buckets; i++)
+			buckets[i] = NULL;
+	}
+
+	/* If the hash table is getting too full, enlarge it. */
+	if (number_entries > number_buckets * 10 && !stop_allocating) {
+		new_size = number_buckets * 2;
+		new_buckets = malloc(new_size * sizeof(struct links_entry *));
+		count = 0;
+
+		/* Try releasing the free list to see if that helps. */
+		if (new_buckets == NULL && free_list != NULL) {
+			while (free_list != NULL) {
+				le = free_list;
+				free_list = le->next;
+				free(le);
+			}
+			new_buckets = malloc(new_size * sizeof(new_buckets[0]));
+		}
+
+		if (new_buckets == NULL) {
+			stop_allocating = 1;
+			warnx("No more memory for tracking hard links");
+		} else {
+			memset(new_buckets, 0, new_size * sizeof(struct links_entry *));
+			for (i = 0; i < number_buckets; i++) {
+				while (buckets[i] != NULL) {
+					/* Remove entry from old bucket. */
+					le = buckets[i];
+					buckets[i] = le->next;
 	
-	dev = p->fts_statp->st_dev;
-	if ((start = *files) != NULL) {
-		for (fp = start + *nfiles - 1; fp >= start; --fp)
-			if (ino == fp->inode && dev == fp->dev)
-				return (1);
+					/* Add entry to new bucket. */
+					hash = (le->dev ^ le->ino) % new_size;
+	
+					if (new_buckets[hash] != NULL)
+						new_buckets[hash]->previous = le;
+					le->next = new_buckets[hash];
+					le->previous = NULL;
+					new_buckets[hash] = le;
+				}
+			}
+			free(buckets);
+			buckets = new_buckets;
+			number_buckets = new_size;
+		}
+	}
+
+	/* Try to locate this entry in the hash table. */
+	hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
+	for (le = buckets[hash]; le != NULL; le = le->next) {
+		if (le->dev == st->st_dev && le->ino == st->st_ino) {
+			/*
+			 * Save memory by releasing an entry when we've seen
+			 * all of it's links.
+			 */
+			if (--le->links <= 0) {
+				if (le->previous != NULL)
+					le->previous->next = le->next;
+				if (le->next != NULL)
+					le->next->previous = le->previous;
+				if (buckets[hash] == le)
+					buckets[hash] = le->next;
+				number_entries--;
+				/* Recycle this node through the free list */
+				if (stop_allocating) {
+					free(le);
+				} else {
+					le->next = free_list;
+					free_list = le;
+				}
+			}
+			return (1);
+		}
 	}
 
-	if (*nfiles == *maxfiles) {
-		*maxfiles = (*maxfiles + 128) + (*maxfiles / 2);
-		*files = realloc((char *)*files, sizeof(ID) * *maxfiles);
-		if (*files == NULL)
-			errx(1, "can't allocate memory");
+	if (stop_allocating)
+		return (0);
+
+	/* Add this entry to the links cache. */
+	if (free_list != NULL) {
+		/* Pull a node from the free list if we can. */
+		le = free_list;
+		free_list = le->next;
+	} else
+		/* Malloc one if we have to. */
+		le = malloc(sizeof(struct links_entry));
+	if (le == NULL) {
+		stop_allocating = 1;
+		warnx("No more memory for tracking hard links");
+		return (0);
 	}
-	(*files)[*nfiles].inode = ino;
-	(*files)[*nfiles].dev = dev;
-	++(*nfiles);
+	le->dev = st->st_dev;
+	le->ino = st->st_ino;
+	le->links = st->st_nlink - 1;
+	number_entries++;
+	le->next = buckets[hash];
+	le->previous = NULL;
+	if (buckets[hash] != NULL)
+		buckets[hash]->previous = le;
+	buckets[hash] = le;
 	return (0);
 }
 




More information about the Submit mailing list