roll.sh: rolling sum / percentage - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 760ab4feb548764ecf6a66a6e07d4875f2cfed95
 (DIR) parent 9b2a72e5008bf382175263892765d4cfbedf54a5
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Thu, 29 May 2025 15:42:11 +0200
       
       roll.sh: rolling sum / percentage
       
       Diffstat:
         A roll.sh                             |     125 +++++++++++++++++++++++++++++++
       
       1 file changed, 125 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/roll.sh b/roll.sh
       @@ -0,0 +1,125 @@
       +#!/bin/sh
       +# rolling sum and percentage.
       +# input should be sorted descending by value.
       +
       +# TODO: add and skip header column?
       +# TODO: rolling sum per group vs group total.
       +# TODO: simplify
       +
       +# usage(argv0)
       +usage() {
       +        printf "Usage:   %s [-k columns] [-v value]\n" "$1" >&2
       +        printf "Example: %s -k 3,4 -v 2" "$1 < input.tsv > output.tsv" >&2
       +        exit 2
       +}
       +
       +kflag=
       +kval='""'
       +vflag=
       +vval='2'
       +while getopts k:v: name
       +do
       +        case $name in
       +        k)
       +                kflag=1
       +                kval="$OPTARG"
       +                ;; # key columns
       +        v)  
       +                vflag=1
       +                vval="$OPTARG"
       +                ;; # value column
       +        
       +        ?)
       +                usage "$0"
       +                ;;
       +        esac
       +done
       +
       +# create awk expression to evaluate and to make it configurable.
       +kexpr="$(printf '%s' "$kval" | sed -e 's@,@        @g' -e 's@\([0-9]\+\)@\$\1@g')"
       +vexpr="$(printf '%s' "$vval" | sed -e 's@\([0-9]\+\)@\$\1@g')"
       +
       +# rolling sum, but ignoring values <= 0
       +LC_ALL=C awk '
       +BEGIN {
       +        FS = OFS = "\t";
       +        ORS = "";
       +        i = 0;
       +        kk = 0;
       +}
       +{
       +        line[i] = $0;
       +        i++;
       +        
       +        # config key column(s), if non-empty then it is grouped.
       +        k = '"$kexpr"';
       +        
       +        # value field.
       +        value = '"$vexpr"' + 0.0; # config value column.
       +        if (value <= 0.0)
       +                value = 0.0;
       +        
       +        values[i] = value;
       +        # total per group
       +        if (k != "") {
       +                gtotal[k] += (value+0.0);
       +                
       +                keys[i] = k;
       +                if (!ukeycount[k]) {
       +                        ukeycount[k] = 1;
       +                        uniqkeys[kk] = k;
       +                        kk++;
       +                }
       +        }
       +                
       +        # total
       +        total += (value+0.0);
       +}
       +END {
       +        sum = 0.0;
       +        for (j = 0; j < i; j++) {
       +                l = line[j];
       +
       +                value = values[j];
       +                sum += value;
       +                
       +                print l;
       +                print OFS;
       +                printf("%.5f", sum);
       +                print OFS;
       +                if (total > 0)
       +                        printf("%.5f", ((sum / total)*100.0));
       +                
       +                k = keys[j];
       +                if (k != "") {
       +                        print "\t";
       +                        gs = gsum[k] + 0.0;
       +                        printf("%.5f", gs);
       +                        print OFS;
       +                        gt = gtotal[k] + 0.0;
       +                        # percentage of the whole group vs total.
       +                        if (gt > 0)
       +                                printf("%.5f", ((gt / total) * 100.0));
       +                        gsum[k] += value;
       +                        print OFS;
       +                        # percentage of the group total.
       +                        if (gt > 0)
       +                                printf("%.5f", ((value / gt) * 100.0));
       +                        
       +                }
       +                print "\n";
       +        }
       +        
       +        # summary / totals (per group).
       +        #for (j = 0; j < kk; j++) {
       +        #        k = uniqkeys[j];
       +        #        print k;
       +        #        print OFS;
       +        #        gt = gtotal[k] + 0.0;
       +        #        printf("%.5f", gt);
       +        #        print OFS;
       +        #        if (total > 0)
       +        #                printf("%.5f", ((gt / total) * 100.0));
       +        #        print "\n";
       +        #}
       +}'