roll.sh: rolling sum / percentage - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 760ab4feb548764ecf6a66a6e07d4875f2cfed95
(DIR) parent 9b2a72e5008bf382175263892765d4cfbedf54a5
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 29 May 2025 15:42:11 +0200
roll.sh: rolling sum / percentage
Diffstat:
A roll.sh | 125 +++++++++++++++++++++++++++++++
1 file changed, 125 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/roll.sh b/roll.sh
@@ -0,0 +1,125 @@
+#!/bin/sh
+# rolling sum and percentage.
+# input should be sorted descending by value.
+
+# TODO: add and skip header column?
+# TODO: rolling sum per group vs group total.
+# TODO: simplify
+
+# usage(argv0)
+usage() {
+ printf "Usage: %s [-k columns] [-v value]\n" "$1" >&2
+ printf "Example: %s -k 3,4 -v 2" "$1 < input.tsv > output.tsv" >&2
+ exit 2
+}
+
+kflag=
+kval='""'
+vflag=
+vval='2'
+while getopts k:v: name
+do
+ case $name in
+ k)
+ kflag=1
+ kval="$OPTARG"
+ ;; # key columns
+ v)
+ vflag=1
+ vval="$OPTARG"
+ ;; # value column
+
+ ?)
+ usage "$0"
+ ;;
+ esac
+done
+
+# create awk expression to evaluate and to make it configurable.
+kexpr="$(printf '%s' "$kval" | sed -e 's@,@ @g' -e 's@\([0-9]\+\)@\$\1@g')"
+vexpr="$(printf '%s' "$vval" | sed -e 's@\([0-9]\+\)@\$\1@g')"
+
+# rolling sum, but ignoring values <= 0
+LC_ALL=C awk '
+BEGIN {
+ FS = OFS = "\t";
+ ORS = "";
+ i = 0;
+ kk = 0;
+}
+{
+ line[i] = $0;
+ i++;
+
+ # config key column(s), if non-empty then it is grouped.
+ k = '"$kexpr"';
+
+ # value field.
+ value = '"$vexpr"' + 0.0; # config value column.
+ if (value <= 0.0)
+ value = 0.0;
+
+ values[i] = value;
+ # total per group
+ if (k != "") {
+ gtotal[k] += (value+0.0);
+
+ keys[i] = k;
+ if (!ukeycount[k]) {
+ ukeycount[k] = 1;
+ uniqkeys[kk] = k;
+ kk++;
+ }
+ }
+
+ # total
+ total += (value+0.0);
+}
+END {
+ sum = 0.0;
+ for (j = 0; j < i; j++) {
+ l = line[j];
+
+ value = values[j];
+ sum += value;
+
+ print l;
+ print OFS;
+ printf("%.5f", sum);
+ print OFS;
+ if (total > 0)
+ printf("%.5f", ((sum / total)*100.0));
+
+ k = keys[j];
+ if (k != "") {
+ print "\t";
+ gs = gsum[k] + 0.0;
+ printf("%.5f", gs);
+ print OFS;
+ gt = gtotal[k] + 0.0;
+ # percentage of the whole group vs total.
+ if (gt > 0)
+ printf("%.5f", ((gt / total) * 100.0));
+ gsum[k] += value;
+ print OFS;
+ # percentage of the group total.
+ if (gt > 0)
+ printf("%.5f", ((value / gt) * 100.0));
+
+ }
+ print "\n";
+ }
+
+ # summary / totals (per group).
+ #for (j = 0; j < kk; j++) {
+ # k = uniqkeys[j];
+ # print k;
+ # print OFS;
+ # gt = gtotal[k] + 0.0;
+ # printf("%.5f", gt);
+ # print OFS;
+ # if (total > 0)
+ # printf("%.5f", ((gt / total) * 100.0));
+ # print "\n";
+ #}
+}'