textract_memeuse.awk - bitreich-memestats - statistical analysis of tags in Freenode/#bitreich-en
 (HTM) git clone git://src.adamsgaard.dk/bitreich-memestats
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       textract_memeuse.awk (1490B)
       ---
            1 #!/usr/bin/awk -f
            2 # get timestamp, user, and tag from irssi log in default format
            3 
            4 function extract_date(s) {
            5         gsub(/--- Day changed ... /, "", s);
            6 
            7         match(s, /[0-9][0-9]/);
            8         day = substr(s, RSTART, RLENGTH);
            9 
           10         match(s, /[0-9][0-9][0-9][0-9]/);
           11         year = substr(s, RSTART, RLENGTH);
           12 
           13         month = s;
           14         gsub(/.*Jan .*/, "01", month);
           15         gsub(/.*Feb .*/, "02", month);
           16         gsub(/.*Mar .*/, "03", month);
           17         gsub(/.*Apr .*/, "04", month);
           18         gsub(/.*May .*/, "05", month);
           19         gsub(/.*Jun .*/, "06", month);
           20         gsub(/.*Jul .*/, "07", month);
           21         gsub(/.*Aug .*/, "08", month);
           22         gsub(/.*Sep .*/, "09", month);
           23         gsub(/.*Oct .*/, "10", month);
           24         gsub(/.*Nov .*/, "11", month);
           25         gsub(/.*Dec .*/, "12", month);
           26 }
           27 
           28 function extract_user(s) {
           29         if (/<.*>/) {  # regular message
           30                 gsub(/.*<./, "", s);
           31                 gsub(/>.*/, "", s);
           32         } else {       # action
           33                 gsub(/.*\* /, "", s);
           34                 gsub(/ .*/, "", s);
           35         }
           36         return s;
           37 }
           38 
           39 function extract_tag(s) {
           40         match(s, /#.*/);
           41         s = substr(s, RSTART, RLENGTH);
           42         gsub(/ .*/, "", s);
           43         gsub(/[!?:,\.]$/, "", s);
           44         return s;
           45 }
           46 
           47 function extract_time(s) {
           48         match(s, /[0-2][0-9]:[0-5][0-9]/);
           49         return substr(s, RSTART, RLENGTH);
           50 }
           51 
           52 { }
           53 
           54 # update date for subsequent entries
           55 /--- Day changed / {
           56         extract_date($0);
           57         n_days++;
           58 }
           59 
           60 # find tag in current line
           61 / #[A-Za-z0-9]+/ {
           62         if (! /< annna>/ && ! /#bitreich-en/) {
           63                 tag = extract_tag($0);
           64                 if (!match(tag, "#nospoil")) {
           65                         printf("%s\t%s\-%s\-%s\t%s\t%s\t%s\n",
           66                                    n_days, year, month, day,
           67                                    extract_time($0), extract_user($0), tag);
           68                 }
           69         }
           70 }