du: Dedup hardlinks - sbase - suckless unix tools
 (HTM) git clone git://git.suckless.org/sbase
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit e8fe04c543eab13e892beda05cb9bccb9e4f441e
 (DIR) parent 39a4c55378294437627421571a51b64bd5e09623
 (HTM) Author: remph <lhr@disroot.org>
       Date:   Mon,  3 Mar 2025 19:52:37 +0100
       
       du: Dedup hardlinks
       
       Conform to POSIX, which says `Files with multiple links shall be counted
       and written for only one entry,' in the 2008[1] and 2013[2] editions, and
       uses more words to say the same thing in the 2017[3] and 2024[4] editions.
       This patch also keeps inodes between operands and dedups symlinks if
       applicable, which are implementation-defined in 2017 and required in 2024.
       See also the `RATIONALE' section in the 2024 edition.
       
       [1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html
       [2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html
       [3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html
       [4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html
       
       Diffstat:
         M du.c                                |      53 ++++++++++++++++++++++++++++++-
       
       1 file changed, 52 insertions(+), 1 deletion(-)
       ---
 (DIR) diff --git a/du.c b/du.c
       @@ -5,6 +5,7 @@
        #include <errno.h>
        #include <fcntl.h>
        #include <limits.h>
       +#include <search.h>
        #include <stdint.h>
        #include <stdlib.h>
        #include <stdio.h>
       @@ -20,6 +21,11 @@ static int aflag = 0;
        static int sflag = 0;
        static int hflag = 0;
        
       +struct file {
       +        dev_t devno;
       +        ino_t inode;
       +};
       +
        static void
        printpath(off_t n, const char *path)
        {
       @@ -35,16 +41,61 @@ nblks(blkcnt_t blocks)
                return (512 * blocks + blksize - 1) / blksize;
        }
        
       +static int
       +cmp(const void *p1, const void *p2)
       +{
       +        const struct file *f1 = p1, *f2 = p2;
       +
       +        if (f1->devno > f2->devno)
       +                return -1;
       +        if (f1->devno < f2->devno)
       +                return 1;
       +
       +        /* f1->devno == f2->devno */
       +        if (f1->inode < f2->inode)
       +                return -1;
       +        if (f1->inode > f2->inode)
       +                return 1;
       +
       +        return 0;
       +}
       +
       +static int
       +duplicated(dev_t dev, ino_t ino)
       +{
       +        static void *tree;
       +        struct file **fpp, *fp, file = {dev, ino};
       +
       +        if ((fpp = tsearch(&file, &tree, cmp)) == NULL)
       +                eprintf("%s:", argv0);
       +
       +        if (*fpp != &file)
       +                return 1;
       +
       +        /* new file added */
       +        fp = emalloc(sizeof(*fp));
       +        *fp = file;
       +        *fpp = fp;
       +
       +        return 0;
       +}
       +
        static void
        du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r)
        {
                off_t *total = data, subtotal;
        
                subtotal = nblks(st->st_blocks);
       -        if (S_ISDIR(st->st_mode))
       +        if (S_ISDIR(st->st_mode)) {
                        recurse(dirfd, path, &subtotal, r);
       +        } else if (r->follow != 'P' || st->st_nlink > 1) {
       +                if (duplicated(st->st_dev, st->st_ino))
       +                        goto print;
       +        }
       +
                *total += subtotal;
        
       +print:
                if (!r->depth)
                        printpath(*total, r->path);
                else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || aflag))