PATCH: du faster link checking, h/k fixes
Ulf Lilleengen
lulf at kerneled.org
Fri Jul 2 16:42:41 PDT 2004
This patch has it origin in http://www.freebsd.org/cgi/getmsg.cgi?fetch=960969+0+/usr/local/www/db/text/2004/cvs-all/20040516.cvs-all from the pending merges list, no. 2200.
The patch includes:
- Faster link checking
- Better ANSI code
- -h/-k interaction fixes
Diff follows
- Ulf Lilleengen
--- du.c 2004-07-03 01:32:23.000000000 +0000
+++ du_new.c 2004-07-03 01:29:41.000000000 +0000
@@ -72,8 +72,8 @@
#define TERA_SI_SZ (TERA_SZ(1000ULL))
#define PETA_SI_SZ (PETA_SZ(1000ULL))
-#define HASHSIZE 256 /* power of 2 only */
-#define HASHMASK (HASHSIZE - 1)
+/*#define HASHSIZE 256 */ /* power of 2 only */
+/*#define HASHMASK (HASHSIZE - 1)*/
unsigned long long vals_si [] = {1, KILO_SI_SZ, MEGA_SI_SZ, GIGA_SI_SZ, TERA_SI_SZ, PETA_SI_SZ};
unsigned long long vals_base2[] = {1, KILO_2_SZ, MEGA_2_SZ, GIGA_2_SZ, TERA_2_SZ, PETA_2_SZ};
@@ -89,7 +89,7 @@
SLIST_ENTRY(ignentry) next;
};
-int linkchk(FTSENT *);
+static int linkchk(FTSENT *);
static void usage(void);
void prthumanval(double);
unit_t unit_adjust(double *);
@@ -108,6 +108,7 @@
int depth;
int Hflag, Lflag, Pflag, aflag, sflag, dflag, cflag, hflag, ch, notused, rval;
char **save;
+ static char dot[] = ".";
Hflag = Lflag = Pflag = aflag = sflag = dflag = cflag = hflag = 0;
@@ -158,8 +159,8 @@
valp = vals_base2;
break;
case 'k':
- if (!hflag)
- putenv("BLOCKSIZE=1024");
+ hflag = 0;
+ putenv("BLOCKSIZE=1024");
break;
case 'r': /* Compatibility. */
break;
@@ -216,7 +217,7 @@
if (!*argv) {
argv = save;
- argv[0] = ".";
+ argv[0] = dot;
argv[1] = NULL;
}
@@ -274,7 +275,7 @@
(void) printf("\t%s\n", p->fts_path);
} else {
(void) printf("%qd\t%s\n",
- howmany(p->fts_statp->st_blocks, blocksize),
+ (long long)howmany(p->fts_statp->st_blocks, blocksize),
p->fts_path);
}
}
@@ -300,47 +301,136 @@
exit(rval);
}
-
-typedef struct _ID {
- dev_t dev;
- ino_t inode;
-} ID;
-
-
-int
+static int
linkchk(FTSENT *p)
{
- static ID **files;
- static int *maxfiles, *nfiles;
- static ID *filesph[HASHSIZE];
- static int maxfilesh[HASHSIZE], nfilesh[HASHSIZE];
- ID *fp, *start;
- ino_t ino;
- dev_t dev;
- int index;
-
- ino = p->fts_statp->st_ino;
- index = ino & HASHMASK;
- files = &filesph[index];
- maxfiles = &maxfilesh[index];
- nfiles = &nfilesh[index];
+ struct links_entry {
+ struct links_entry *next;
+ struct links_entry *previous;
+ int links;
+ dev_t dev;
+ ino_t ino;
+ };
+
+ static const size_t links_hash_initial_size = 8192;
+ static struct links_entry **buckets;
+ static struct links_entry *free_list;
+ static size_t number_buckets;
+ static unsigned long number_entries;
+ static char stop_allocating;
+ struct links_entry *le, **new_buckets;
+ struct stat *st;
+ size_t i, new_size;
+ int count, hash;
+
+ st = p->fts_statp;
+
+ /* If necessary, initialize the hash table. */
+ if (buckets == NULL) {
+ number_buckets = links_hash_initial_size;
+ buckets = malloc(number_buckets * sizeof(buckets[0]));
+ if (buckets == NULL)
+ errx(1, "No memory for hardlink detection");
+ for (i = 0; i < number_buckets; i++)
+ buckets[i] = NULL;
+ }
+
+ /* If the hash table is getting too full, enlarge it. */
+ if (number_entries > number_buckets * 10 && !stop_allocating) {
+ new_size = number_buckets * 2;
+ new_buckets = malloc(new_size * sizeof(struct links_entry *));
+ count = 0;
+
+ /* Try releasing the free list to see if that helps. */
+ if (new_buckets == NULL && free_list != NULL) {
+ while (free_list != NULL) {
+ le = free_list;
+ free_list = le->next;
+ free(le);
+ }
+ new_buckets = malloc(new_size * sizeof(new_buckets[0]));
+ }
+
+ if (new_buckets == NULL) {
+ stop_allocating = 1;
+ warnx("No more memory for tracking hard links");
+ } else {
+ memset(new_buckets, 0, new_size * sizeof(struct links_entry *));
+ for (i = 0; i < number_buckets; i++) {
+ while (buckets[i] != NULL) {
+ /* Remove entry from old bucket. */
+ le = buckets[i];
+ buckets[i] = le->next;
- dev = p->fts_statp->st_dev;
- if ((start = *files) != NULL) {
- for (fp = start + *nfiles - 1; fp >= start; --fp)
- if (ino == fp->inode && dev == fp->dev)
- return (1);
+ /* Add entry to new bucket. */
+ hash = (le->dev ^ le->ino) % new_size;
+
+ if (new_buckets[hash] != NULL)
+ new_buckets[hash]->previous = le;
+ le->next = new_buckets[hash];
+ le->previous = NULL;
+ new_buckets[hash] = le;
+ }
+ }
+ free(buckets);
+ buckets = new_buckets;
+ number_buckets = new_size;
+ }
+ }
+
+ /* Try to locate this entry in the hash table. */
+ hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
+ for (le = buckets[hash]; le != NULL; le = le->next) {
+ if (le->dev == st->st_dev && le->ino == st->st_ino) {
+ /*
+ * Save memory by releasing an entry when we've seen
+ * all of it's links.
+ */
+ if (--le->links <= 0) {
+ if (le->previous != NULL)
+ le->previous->next = le->next;
+ if (le->next != NULL)
+ le->next->previous = le->previous;
+ if (buckets[hash] == le)
+ buckets[hash] = le->next;
+ number_entries--;
+ /* Recycle this node through the free list */
+ if (stop_allocating) {
+ free(le);
+ } else {
+ le->next = free_list;
+ free_list = le;
+ }
+ }
+ return (1);
+ }
}
- if (*nfiles == *maxfiles) {
- *maxfiles = (*maxfiles + 128) + (*maxfiles / 2);
- *files = realloc((char *)*files, sizeof(ID) * *maxfiles);
- if (*files == NULL)
- errx(1, "can't allocate memory");
+ if (stop_allocating)
+ return (0);
+
+ /* Add this entry to the links cache. */
+ if (free_list != NULL) {
+ /* Pull a node from the free list if we can. */
+ le = free_list;
+ free_list = le->next;
+ } else
+ /* Malloc one if we have to. */
+ le = malloc(sizeof(struct links_entry));
+ if (le == NULL) {
+ stop_allocating = 1;
+ warnx("No more memory for tracking hard links");
+ return (0);
}
- (*files)[*nfiles].inode = ino;
- (*files)[*nfiles].dev = dev;
- ++(*nfiles);
+ le->dev = st->st_dev;
+ le->ino = st->st_ino;
+ le->links = st->st_nlink - 1;
+ number_entries++;
+ le->next = buckets[hash];
+ le->previous = NULL;
+ if (buckets[hash] != NULL)
+ buckets[hash]->previous = le;
+ buckets[hash] = le;
return (0);
}
More information about the Submit
mailing list