library(data.table) # for fread and other data.table functions library(tidyverse) # for as_tibble to feed into ggplot dt01=fread("c:/u/beale.wordlist.asc.proc01.txt") # exploratory dt01 ?stringr case library(stringr) str_sub(dt01$V2, 1, 2) dt01[,.N,by=str_sub(V2,1,2)][,.N] dt01[,str_sub(V2,1,2),by=str_sub(V2,1,2)] dt01[,.N] dt01[,.N,by=str_sub(V1,1,2)] dt01[,.(V3 = str_sub(V2,1,2))][, .N, by = V3][order(-N)] # [,.N] log(906 ^ 6) log2(906 ^ 6) log10(906 ^ 6) log2(6667^6) 6667^6 log10(6667^6) log2(dt01[,.(V3 = str_sub(V2,1,1))][, .N, by = V3][order(-N)][,.N]^8) data.table(x=1:8 )[,.(x,y=8-x) ][,.(x, y, log2(dt01[,.(V3 = str_sub(V2,1,x))][, .N, by = V3][order(-N)][,.N]^y))]