#!/bin/sh # To unpack, sh this message. For more explanation, read the next few lines. # --This message is a .shar file, i.e., a shell archive. # --To unpack the files it contains into some empty directory DIR, # --first cd (change directory) to DIR. # --Then put this message into a file in DIR. # --(Use a FILENAME which differs from those to be created!) # --Remove from file FILENAME the lines prior to the "#!/bin/sh" line above. # --Finally execute "sh FILENAME". PATH=/bin:/usr/bin echo processing file icicle.r 1>&2 cat > icicle.r <<'CUT HERE------------ icicle.r' # icicle.r # This software is a RATFOR implementation of the method called ICICLE. # The authors of this software are Joseph B Kruskal and James M Landwehr. # Copyright (c) 1993 by AT&T. # Permission to use, copy, modify, and distribute this software for any # purpose without fee is hereby granted, provided that this entire # notice is included in all copies of any software which is or includes # a copy or modification of this software and in all copies of the # supporting documentation for such software. # THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMP- # LIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY # REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY # OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. # This software comes from the SECOND MDS Package of AT&T Bell Laboratories # For explanation of the method this software implements, see # "Icicle Plots: Better Displays for Hierarchical Clustering" by # Joseph B Kruskal and James M Landwehr, in The American Statistician, # May 1983, Volume 37, Number 2, pages 162-168. # This file contains ICICLE proper, a program for printing ICICLE plots, # written in the RATFOR language. It consists of five subroutines (there # is no main routine). Its FORTRAN expansion is in the ICICLE.f file. # This file was created by Joseph B Kruskal, James M Landwehr, and Jean # E McRae at AT&T Bell Laboratories, Murray Hill, New Jersey, U.S.A. # It was completed in November, 1984. It has been thoroughly tested # by the authors. # Copyright November, 1984 by AT&T Bell Laboratories. # AT&T Bell Laboratories makes no warranties, express or implied, with # respect to this program. In particular, Bell Laboratories makes no # warranty of merchantability, fitness for a particular use, freedom # from infringement of any patent, copyright or trademark, nor any # warranty as to accuracy. Bell Laboratories assumes no obligation to # furnish any assistance of any kind whatsoever, or to furnish any # additional information or documentaion. #----+----@----+----@----+----@----+----@----+----@----+----@----+----@ # This program contains five subroutines, "icicle", "icilev", #"icipos", "icidis", "icires", separated by lines of #-#-#-... . # "icicle" actually prints out the ICICLE plot. #The other 4 subroutines are optional routines for "mode-setting" or #immediately returning extra information. Mode-setting modifies the #display mode used in subsequent calls to "icicle", as indicated here: # "icilev" can be used to change what clustering LEVels are displayed, # or to return to the calling routine those which were displayed. # "icipos" can be used to change the POSitions used for the objects, # or to return to the calling routine those which were used. # "icidis" can be used to change the DISplay appearance. # "icires" can be used to RESet all modes to default values. #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #MAXN and MAXNLEV are also defined in the hiclust.r file . The values #there need not be the same as the values here. define MAXN 200 #MAXN means the maximum permitted number of objects (max nobj) define MAXNLEV 200 #MAXNLEV means maximum permitted number of levels for the display define NCHARMAX1 4 #NCHARMAX1 = 1+ max number of characters in internal labels #The following are based on a page width of 132 columns. define DISWIDTH 114 #DISWIDTH = (number of columns on page) - 18 define DISWIDTHALT 103 #DISWIDTHALT = (number of columns on page) - 29 define SPACER 18 define SPACERALT 29 #spacing prior to header labels define DISPLAYWIDTHTOTAL 600 #Conceptually, DISPLAYWIDTHTOTAL = maximum value permitted for #(number of objects) * (width) # 3 is used as the maximum value permitted for width. define DISRATIO 200.0 #Used in logarithmic spacing of display levels define NEWPAGE 12 #The formfeed (octal 014) character, for use in starting new page define TRUE 0 define FALSE 1 #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #subroutine icicle, Ratfor version. #Produces the ICICLE plot subroutine icicle( nobj, joi1, joi2, joilev, sdissw, nlabcm, nlabch,_ extlab, title) #EXPLANATION OF THE ARGUMENTS, IN ORDER # nobj = input, number of objects # joi1(*) = input; "join 1" is first input array of join pointers # joi2(*) = input; "join 2" is second input array of join pointers # joilev(*) = input; "join level" has levels at which clusters join # sdissw = input; similarity-dissimilarity switch # +1.0=dissim, -1.0=simil # nlabcm = input; "no. of label char.s: max possible" is # number of rows in external label array # nlabch = input; "no. of label char.s" is # number of characters in longest external label # extlab(nlabcm,*) = input; external label = user labels for objects # title = input; user title for this run # EXPLANATION OF SOME OTHER VARIABLES # width = width per position in display. Default = min = 2 # distot = width of total display (may be several pages wide) # nchar1 = 1 + maximum number of characters in a label # labsw = label switch. # 0 = use object numbers, else use external labels # objll(MAXN) = left-most object of cluster at present join level # which contains indicated object # objr(MAXN) = right-hand neighbor of indicated object # objjoi(MAXN) = right-most object of left cluster making join # NCHARMAX1 = 1+ max number of characters in internal labels # wall(NCHARMAX1,MAXN) = array showing which potential walls are in use # label(MAXN) = internal array of labels for the objects # displa(DISPLAYWIDTHTOTAL)= display = one-line buffer for output # diswid = width of display on one page. # nstrip = number of strips = how many pages wide the display is #Explanation of logarithmic spacing of display levels # DISRATIO is compilation-time variable, defined below as 200 # Two constants, disadd and disrat , are calculated. # The additive constant disadd is chosen so that # (top display level + disadd)/(bottom display level + disadd)=DISRATIO # The multiplicative constant disrat is chosen so that # (display level + disadd) is uniformly spaced on log scale. # Thus each new display level is calculated by # (display level + disadd)/(next display level + disadd) = disrat #Common shared with the subroutines icilev, icipos, icidis common /iclev/ kdisle,ndisle,dislev common /icpos/ kobjpo,objpos common /icdis/ width, echar, fchar, jchar, bchar #Nonarray variables integer nobj, kdisle, ndisle, ndisl1, kobjpo, nlabcm, nlabch, nchar integer labsw, width, distot, digit, ldis, rdis, nchar1 real sdissw, disle character echar*1, jchar*1, fchar*1, bchar*1, schar*1 #Array variables which are arguments integer joi1(*), joi2(*), objpos(MAXN) real joilev(*) character extlab(nlabcm, *)*1 character title*80 #Array variables which are NOT arguments integer objll(MAXN), objr(MAXN), objjoi(MAXN) real dislev(MAXNLEV) character label(NCHARMAX1,MAXN)*1, wall(MAXN)*1 character displa(DISPLAYWIDTHTOTAL)*1 #Default values for the modes data kdisle /0/ #switch which controls use of levels data kobjpo /0/ #switch which controls positioning of objects # echar, fchar, jchar, bchar are the display characters data width, echar, fchar, jchar, bchar /2, "&", "&", "=", " "/ #EXECUTABLE CODE STARTS HERE #---------------------------------------------------------------------- #PART 1. INITIALIZATION distot=width*(nobj-1)+1 #distot = display width total (all strips) fnobj=float(nobj) #fnobj = floating number of objects nobjdi=1+ifix(alog10(fnobj+0.5)) #nobjdi = number of object digits if(nlabch!=0) {nchar=nlabch; labsw=1} #nchar = number of characters else {nchar=nobjdi; labsw=0} #labsw = label switch: 1=ext, 0=int nchar1=nchar + 1 npos=nobj #npos = number of object positions in display #npos is always equal to nobj. Distinction is for conceptual clarity. njoile=nobj-1 #njoile = number of join levels in display #Initialize handling of display levels. switch(kdisle) { case 2, 3: #Use join levels as display levels. #Therefore, ignore dislev. Instead, use one level for each join ndisl1=njoile case 1: #Use display levels provided in dislev #Use number of display levels from ndisle ndisl1=ndisle case 0: #Logarithmic spacing of display levels. if(ndisle != 0) ndisl1=ndisle else ndisl1=njoile #Calculate disrat and disadd, based on rationale explained above distop=joilev(njoile) disbot=joilev(1) disadd=(distop - DISRATIO * disbot)/(DISRATIO - 1.0) disrat=(1.0/DISRATIO)**(1.0/float(ndisl1-1)) } ndisle=ndisl1 #For use in icilev #---------------------------------------------------------------------- #PART 2. CALCULATE objll(), objr() RECURSIVELY. ALSO CALC objjoi() #Recursion follows the original merging process #At each stage of recursion, for clusters defined at that stage: # objll yields left-most object of largest current cluster containing # argument object. # objr yields right-hand neighbor of argument object in the largest # current cluster containing argument object. However, # if argument is right-most object, objr yields object itself # objjoi yields right-most object of left cluster of argument level. #Initialize objll(), objr(). Each initial cluster has 1 object. do iobj=1,nobj { objll(iobj)=iobj objr(iobj)=iobj } #Recursive calculation itself do ijoile=1,njoile { #Start of long do loop on ijoile i1obj=joi1(ijoile) #An object in first cluster being joined i1objl=objll(i1obj) #Find left-most object in first cluster #Find right-most object in first cluster i1objr=objr(i1obj) #right-hand neighbor of object WITHIN cluster while(i1objr!=objr(i1objr)) #Iterate till right end of cl reached { i1objr=objr(i1objr) if( !(1<=i1objr & i1objr<=nobj) ) { #i1objr should lie between 1 and nobj nerror = 2 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 2." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } #(Still in long do loop on ijoile) i2obj=joi2(ijoile) #An object in second cluster being joined i2objl=objll(i2obj) #Find left-most object in second cluster #Find right-most object in second cluster i2objr=objr(i2obj) #right-hand neighbor of object WITHIN cluster while(i2objr!=objr(i2objr)) #Iterate until right end of cl reached { i2objr=objr(i2objr) if( !(1<=i2objr & i2objr<=nobj) ) { #i2objr should lie between 1 and nobj nerror=3 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 3." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } #(Still in long do loop on ijoile) #Decide in which order to join the two clusters # joisid=1 means first cluster on left # joisid=2 means second cluster on left #Present version of program: # if using object position arrangement provided by calling routine # (kobjpo=1), chooses case as required # if creating object position arrangement (kobjpo=0) # always uses case 1 #Possible improvement of program: when creating arrangement (kobjpo=0), #make choice of case depend on cluster sizes or other things, so as #to obtain desirable ICICLE diagram. switch(kobjpo) { case 1: #Use arrangement given in objpos joisid=0 do ipos=1,npos { if(objpos(ipos)==i1objl) joisid=1 #first cluster on left if(objpos(ipos)==i2objl) joisid=2 #second cluster on left if(joisid!=0) break } if(joisid==0) { #joisid should be defined in preceding do loop nerror=4 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 4." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } case 0: #Create arrangement of objects if kobjpo=0 joisid=1 #first cluster on left } #(Still in long do loop on ijoile) #Join the two clusters in order selected switch(joisid) { #Start of switch on joisid case 1: #first cluster on left objjoi(ijoile)=i1objr #Right-most object of left cluster objr(i1objr)=i2objl #Recursive updating of objr #Recursive updating of objll iobj=i2objl repeat { objll(iobj)=i1objl if(iobj==objr(iobj)) break else iobj=objr(iobj) if( !(1<=iobj & iobj<=nobj) ) { #iobj should lie between 1 and nobj nerror=5 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 5." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } case 2: #second cluster on left objjoi(ijoile)=i2objr #Right-most object of left cluster objr(i2objr)=i1objl #Recursive updating of objr #Recursive updating of objll iobj=i1objl repeat { objll(iobj)=i2objl if(iobj==objr(iobj)) break else iobj=objr(iobj) if( !(1<=iobj & iobj<=nobj) ) { #iobj should lie between 1 and nobj nerror=6 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 6." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } } #End of switch on joisid } #End of long do loop on ijoile #End of definition of objll, objr, objjoi lllobj=objll(1) #left-most object in entire set #---------------------------------------------------------------------- #PART 3. DEAL WITH OBJECT POSITION ARRAY, objpos() switch(kobjpo) { #Start of switch on kojbpo case 1: #If object position array was provided by calling program, check #consistency with clustering provided by the calling program. kcons=TRUE #To start with, assume consistent for(ipos=1; ipos=1; ichar=ichar-1) { i=iobj if(labsw!=0) label(ichar,ipos)=extlab(ichar,iobj) else { digit=mod(i,10) i=(i-digit)/10 write(schar,1) digit 1 format(i1) label(ichar,ipos)=schar } } label(nchar1,ipos)=echar iobj=objr(iobj) if( !(1<=iobj & iobj<=nobj) ) { #iobj should be between 1 and nobj nerror=7 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 7." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } do ichar=1,nchar #print object labels at top of columns { do ipos=1,npos { idis1=width*(ipos-1)+1 displa(idis1)=label(ichar,ipos) if(ipos==npos) break for(idis=idis1+1; idis<=width*ipos; idis=idis+1) { displa(idis)=" " } } if(kdisle!=2) print 22, (displa(idis),idis=ldis,rdis) else print 23, (displa(idis),idis=ldis,rdis) 22 format(SPACER_x, DISWIDTH_a1) 23 format(SPACERALT_x, DISWIDTHALT_a1) #change blanks to fill characters, in case some labels shorter do ipos=1,npos { if(label(ichar,ipos)==" ") label(ichar,ipos)=fchar } } print * , " " #blank line #PRINT BODY OF DISPLAY #Initialize join level counter, ijoile, and wall. ijoile=njoile do ipos=1,npos wall(ipos)=jchar #fill wall with join character wall(npos)=bchar #blank out last entry of wall #Initialize phase counter, ipha, and set number of phases, npha. ipha=0 npha=nchar1 #Initialize display level counter, idisle, and disl23. idisle=ndisl1 disl23 = 0.0 #Special indicator for cases 2 and 3 #Initialize display level, disle. switch(kdisle) { case 0: disle=joilev(njoile) #Start with root node join level case 1: disle=dislev(ndisl1) #Use display levels from calling program case 2, 3: disle=joilev(ndisl1) #Use join levels } dislev(ndisl1)=disle #Save disle in dislev repeat { #Start of repeat loop to print one strip. #Update ijoile, wall. while( (joilev(ijoile)-disle)*sdissw+disl23 > 0. & ijoile>=1) #Normally: Continue until joilev moves past disle. disl23=0.0 #Cases 2,3, on first entry: Skip loop. disl23=0.0 #Cases 2,3, later entry: Do loop just once. disl23=1.0 { #Start of while loop to update ijoile and wall. if(kdisle==2|kdisle==3)disl23 = 1.0 #Reset 2,3 indicator. #Find right-most object of left cluster making join iobj=objjoi(ijoile) #Find its position by counting from left-most object ipos=1 for(jobj=lllobj; jobj!=iobj; jobj=objr(jobj)) { ipos=ipos+1 if( !(1<=jobj & jobj<=nobj) ) { #jobj should be between 1 and nobj nerror=8 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 8." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return } } #Blank out the join character making this join wall(ipos)=bchar #Blank out new singleton groups. if(wall(ipos+1)==bchar) do ichar=1,nchar1 label(ichar,ipos+1)=bchar if( ( ipos>1 & wall(ipos-1)==bchar ) | ipos==1 ) do ichar=1,nchar1 label(ichar,ipos)=bchar #Update ijoile. ijoile=ijoile-1 if(disl23==1.0) break #Move just one join level in cases 2,3 } #End of while loop to update ijoile and wall. #Update ipha. ipha=ipha+1; if(ipha>npha) ipha=1 #Fill displa from label and wall. do ipos=1,npos { idis1=width*(ipos-1)+1 displa(idis1)=label(ipha,ipos) if(ipos==npos) break for(idis=idis1+1; idis<=width*ipos; idis=idis+1) { displa(idis)=wall(ipos) } } #THE PRIMARY OUTPUT STATEMENT #Print one line of display. if(kdisle!=2) print 32, idisle, disle, (displa(idis), idis=ldis,rdis) else if (ijoile==njoile) print 33,idisle, disle, (displa(idis), idis=ldis,rdis) else print 34, idisle, disle, joilev(idisle+1),_ (displa(idis), idis=ldis,rdis) 32 format(1x,i3,2x,g11.5,1x,DISWIDTH_a1) 33 format(1x,i3,2x,g11.5," No limit ",1x,DISWIDTHALT_a1) 34 format(1x,i3,2x,g11.5,g11.5,1x,DISWIDTHALT_a1) #update idisle and apply stopping rule idisle=idisle-1 if(idisle<1) break #Update disle and save disle in dislev switch(kdisle) { case 0: disle=(disle+disadd)*disrat - disadd #Calculate display level case 1: disle=dislev(idisle) #Use display level from calling program case 2, 3: disle=joilev(ijoile-1) #Use next join level as display level } dislev(idisle)=disle #Save disle } #End of repeat loop to print one strip. } #End of long for loop through strips. Index is ldis. #---------------------------------------------------------------------- return end #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #subroutine icilev, Ratfor version. #sets the display levels according to user preference subroutine icilev(sw, array, n) #Argument description follows: # sw = input. It controls handling of levels, as described below. # It can be -1, 0 (default value) 1, 2, 3, or 4. # It changes the display mode, except when sw=-1. # array = input or output. It holds display levels. # It is used only with sw values -1, 1. # n = input. It indicates the number of display levels # It is used only with sw values -1, 0, 1. # What icilev does for different sw values. #-1 = Does NOT change display mode. Instead, has immediate effect: # display level values are returned in "array" ("array" is output) # If n!=0, n lowest levels returned; if n=0, all levels returned # 0 = Log spacing used for display levels. # If n!=0, n levels used; if n=0, no. of levels=number of objects. # 1 = n levels from "array" are used in display("array" is input). # 2 = One level is used for each join. Also, TWO display levels are # printed on each line, to show INTERVAL over which this # clustering is relevant. # 3 = Same, but as space saver only FIRST display level is printed. common /iclev/ kdisle, ndisle, dislev integer kdisle, n, n1, ndisle, sw real dislev(MAXNLEV), array(n) switch(sw) { case -1, 1: if(n==0) { n1=ndisle n=ndisle } else n1=n } switch(sw) { case -1: do i = 1,n1 array(i) = dislev(i) return case 1: do i = 1,n dislev(i) = array(i) case 0, 2, 3: default: #kdisle has invalid value print *, "----------------------------------------" print *,_ "ICICLE: Call to icilev ignored, because first argument is invalid" print *, "----------------------------------------" return } kdisle = sw #levels switch ndisle = n #number of levels return end #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #subroutine icipos, Ratfor version. # Allows the user to set the position order of the objects in the # display (i.e., change the display mode) in subsequent calls to # icicle, or to find what position order was used during the last call # to icicle subroutine icipos(sw, array, n) # Argument description follows: # sw is input; controls handling of object positions (see below). # array is input or output; gives positions of objects. # It is used only with sw values -1 and 1. # n is input; number of positions in "array". # It is used only with sw values -1 and 1. # sw has three possible values (0 is default value): # -1 : Does NOT change display mode. Instead, has immediate effect: # ICICLE returns n object positions (from previous use) # in "array" ("array" is output). # 0 : ICICLE will supply its own object positions in future # 1 : ICICLE reads in n object positions from "array" (input). It # will use these object positions in future calls to icicle. # However, if they are not consistent with hierarchical # clustering, modifications are made as necessary (but the input # "array" is never changed). common /icpos/ kobjpo, objpos integer kobjpo,objpos(MAXN),sw,array(n),n switch(sw) { case -1: do i = 1,n array(i) = objpos(i) return case 1: do i = 1,n objpos(i) = array(i) case 0: default: #invalid value print *, "----------------------------------------" print *,_ "ICICLE: Call to icipos ignored, because first argument is invalid" print *, "----------------------------------------" return } kobjpo = sw return end #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #subroutine icidis, Ratfor version. #Enters ICICLE display parameters subroutine icidis(widthi, echari, fchari, jchari, bchari) #If this subroutine is not called, ICICLE will use default values of # display parameters. #width = width per position = number of columns from one object column #to next in display, measured "center to center". minimum=default=2. #echar = end character. Default = "&". # Separates vertical repetitions of labels. #fchar = fill character. Default = "&". # Fills out shorter labels to length of longer ones. #jchar = join character. Default = "=". # Joins objects which belong to the same cluster. #bchar = blank character. Default = " ". # Used for white space which separates objects in # distinct clusters. common /icdis/ width, echar, fchar, jchar, bchar #display parameters integer width, widthi character echari*1, jchari*1, fchari*1, bchari*1 character echar*1, jchar*1, fchar*1, bchar*1 if( !( 2<=widthi & widthi<=5 ) ) { print *, "----------------------------------------" print *,_ "ICICLE: Width per position has been changed to between 2 and 5" print *, "----------------------------------------" } width = min(max(2, widthi),5) if(widthi>3) { print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *,_ "ICICLE: If width per position > 3, array 'displa' may overflow" print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" } echar = echari jchar = jchari fchar = fchari bchar = bchari return end #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# #subroutine icires, Ratfor version. #Resets all modes to their default values. subroutine icires integer dummyi real dummyr(1) call icilev(0,dummyr,0) call icipos(0,dummyr,dummyi) call icidis(2, "&", "&", "=", " ") #default values for all arguments return end CUT HERE------------ icicle.r echo processing file icicle.f 1>&2 cat > icicle.f <<'CUT HERE------------ icicle.f' c# icicle.f c# This software is the FORTRAN expansion of the program in icicle.r c- The authors of this software are Joseph B Kruskal and James M Landwehr. c- Copyright (c) 1993 by AT&T. c- Permission to use, copy, modify, and distribute this software for any c- purpose without fee is hereby granted, provided that this entire c- notice is included in all copies of any software which is or includes c- a copy or modification of this software and in all copies of the c- supporting documentation for such software. c- THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMP- c- LIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY c- REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY c- OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. c- This software comes from the SECOND MDS Package of AT&T Bell Laboratories c- For explanation of the method this software implements, see c- "Icicle Plots: Better Displays for Hierarchical Clustering" by c- Joseph B Kruskal and James M Landwehr, in The American Statistician, c- May 1983, Volume 37, Number 2, pages 162-168. c- This file contains ICICLE proper, a program for printing ICICLE plots, c# in Fortran. It consists of five subroutines (there is no main routine) c# It is the Fortran expansion of the ICICLE.r file, c# except that the comments have been modified as appropriate. c- This file was created by Joseph B Kruskal, James M Landwehr, and Jean c- E McRae at AT&T Bell Laboratories, Murray Hill, New Jersey, U.S.A. c- It was completed in November, 1984. It has been thoroughly tested c- by the authors. c- Copyright November, 1984 by AT&T Bell Laboratories. c- AT&T Bell Laboratories makes no warranties, express or implied, with c- respect to this program. In particular, Bell Laboratories makes no c- warranty of merchantability, fitness for a particular use, freedom c- from infringement of any patent, copyright or trademark, nor any c- warranty as to accuracy. Bell Laboratories assumes no obligation to c- furnish any assistance of any kind whatsoever, or to furnish any c- additional information or documentaion. c# Explanation of the comment convention used: c# A comment that starts with "c-" is comment line in Ratfor version. c# A comment that starts with "c+" comes from line with statement on it, c# and belongs to the immediately following Fortran line, if any. c# A comment that starts with "c " corresponds to a Ratfor statement. c# A comment that starts with "c#" was added or altered after expansion. c#----+----@----+----@----+----@----+----@----+----@----+----@----+----@ c- This program contains five subroutines, "icicle", "icilev", c-"icipos", "icidis", "icires", separated by lines of #-#-#-... . c- "icicle" actually prints out the ICICLE plot. c-The other 4 subroutines are optional routines for "mode-setting" or c-immediately returning extra information. Mode-setting modifies the c-display mode used in subsequent calls to "icicle", as indicated here: c- "icilev" can be used to change what clustering LEVels are displayed, c- or to return to the calling routine those which were displayed. c- "icipos" can be used to change the POSitions used for the objects, c- or to return to the calling routine those which were used. c- "icidis" can be used to change the DISplay appearance. c- "icires" can be used to RESet all modes to default values. c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c# The Ratfor DEFINE statements and their explanations have been deleted c# because there is nothing in Fortran that corresonds to DEFINE. c# The page width and all array sizes in this Fortran version are those c# which result from the DEFINE statements in the Ratfor version. c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c-subroutine icicle, Fortran version. c-Produces the ICICLE plot subroutine icicle( nobj, joi1, joi2, joilev, sdissw, nlabcm, & nlabch, extlab, title) c-EXPLANATION OF THE ARGUMENTS, IN ORDER c- nobj = input, number of objects c- joi1(*) = input; "join 1" is first input array of join pointers c- joi2(*) = input; "join 2" is second input array of join pointers c- joilev(*) = input; "join level" has levels at which clusters join c- sdissw = input; similarity-dissimilarity switch c- +1.0=dissim, -1.0=simil c- nlabcm = input; "no. of label char.s: max possible" is c- number of rows in external label array c- nlabch = input; "no. of label char.s" is c- number of characters in longest external label c- extlab(nlabcm,*) = input; external label = user labels for objects c- title = input; user title for this run c- EXPLANATION OF SOME OTHER VARIABLES c- width = width per position in display. Default = min = 2 c- distot = width of total display (may be several pages wide) c- nchar1 = 1 + maximum number of characters in a label c- labsw = label switch. c- 0 = use object numbers, else use external labels c- objll(MAXN) = left-most object of cluster at present join level c- which contains indicated object c- objr(MAXN) = right-hand neighbor of indicated object c- objjoi(MAXN) = right-most object of left cluster making join c- NCHARMAX1 = 1+ max number of characters in internal labels c- wall(NCHARMAX1,MAXN) = array showing which potential walls are in use c- label(MAXN) = internal array of labels for the objects c- displa(DISPLAYWIDTHTOTAL)= display = one-line buffer for output c- diswid = width of display on one page. c- nstrip = number of strips = how many pages wide the display is c-Explanation of logarithmic spacing of display levels c- DISRATIO is compilation-time variable, defined below as 200 c- Two constants, disadd and disrat , are calculated. c- The additive constant disadd is chosen so that c- (top display level + disadd)/(bottom display level + disadd)=DISRATIO c- The multiplicative constant disrat is chosen so that c- (display level + disadd) is uniformly spaced on log scale. c- Thus each new display level is calculated by c- (display level + disadd)/(next display level + disadd) = disrat c-Common shared with the subroutines icilev, icipos, icidis common /iclev/ kdisle,ndisle,dislev common /icpos/ kobjpo,objpos common /icdis/ width, echar, fchar, jchar, bchar c-Nonarray variables integer nobj, kdisle, ndisle, ndisl1, kobjpo, nlabcm, nlabch, & nchar integer labsw, width, distot, digit, ldis, rdis, nchar1 real sdissw, disle character echar*1, jchar*1, fchar*1, bchar*1, schar*1 c-Array variables which are arguments integer joi1(*), joi2(*), objpos(200) real joilev(*) character extlab(nlabcm, *)*1 character title*80 c-Array variables which are NOT arguments integer objll(200), objr(200), objjoi(200) real dislev(200) character label(4,200)*1, wall(200)*1 character displa(600)*1 c-Default values for the modes c+switch which controls use of levels data kdisle /0/ c+switch which controls positioning of objects data kobjpo /0/ c- echar, fchar, jchar, bchar are the display characters data width, echar, fchar, jchar, bchar /2, "&", "&", "=", " "/ c-EXECUTABLE CODE STARTS HERE c----------------------------------------------------------------------- c-PART 1. INITIALIZATION c+distot = display width total (all strips) distot=width*(nobj-1)+1 c+fnobj = floating number of objects fnobj=float(nobj) c+nobjdi = number of object digits nobjdi=1+ifix(alog10(fnobj+0.5)) if(.not.(nlabch.ne.0))goto 23000 nchar=nlabch labsw=1 c+nchar = number of characters goto 23001 c else 23000 continue nchar=nobjdi labsw=0 23001 continue c+labsw = label switch: 1=ext, 0=int nchar1=nchar + 1 c+npos = number of object positions in display npos=nobj c- npos is always equal to nobj. Distinction is for conceptual clarity. c+njoile = number of join levels in display njoile=nobj-1 c-Initialize handling of display levels. c switch I23002 = (kdisle) if(.not.(I23002.eq.( 2).or.I23002.eq.( 3)))goto 23003 c- Use join levels as display levels. c- Therefore, ignore dislev. Instead, use one level for each join ndisl1=njoile goto 23002 23003 continue if(.not.(I23002.eq.( 1)))goto 23004 c- Use display levels provided in dislev c- Use number of display levels from ndisle ndisl1=ndisle goto 23002 23004 continue if(.not.(I23002.eq.( 0)))goto 23005 c- Logarithmic spacing of display levels. if(.not.(ndisle .ne. 0))goto 23006 ndisl1=ndisle goto 23007 c else 23006 continue ndisl1=njoile 23007 continue c- Calculate disrat and disadd, based on rationale explained above distop=joilev(njoile) disbot=joilev(1) disadd=(distop - 200.0 * disbot)/(200.0 - 1.0) disrat=(1.0/200.0)**(1.0/float(ndisl1-1)) 23005 continue 23002 continue c+For use in icilev ndisle=ndisl1 c----------------------------------------------------------------------- c-PART 2. CALCULATE objll(), objr() RECURSIVELY. ALSO CALC objjoi() c-Recursion follows the original merging process c-At each stage of recursion, for clusters defined at that stage: c- objll yields left-most object of largest current cluster containing c- argument object. c- objr yields right-hand neighbor of argument object in the largest c- current cluster containing argument object. However, c- if argument is right-most object, objr yields object itself c- objjoi yields right-most object of left cluster of argument level. c-Initialize objll(), objr(). Each initial cluster has 1 object. do 23008 iobj=1,nobj objll(iobj)=iobj objr(iobj)=iobj 23008 continue c-Recursive calculation itself do 23010 ijoile=1,njoile c+Start of long do loop on ijoile c+An object in first cluster being joined i1obj=joi1(ijoile) c+Find left-most object in first cluster i1objl=objll(i1obj) c-Find right-most object in first cluster c+right-hand neighbor of object WITHIN cluster i1objr=objr(i1obj) c while 23012 if(.not.(i1objr.ne.objr(i1objr)))goto 23013 c+Iterate till right end of cl reached i1objr=objr(i1objr) if(.not.( .not.(1.le.i1objr .and. i1objr.le.nobj) ))goto 230 & 14 c- i1objr should lie between 1 and nobj nerror = 2 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 2." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23014 continue goto 23012 c endwhile 23013 continue c-(Still in long do loop on ijoile) c+An object in second cluster being joined i2obj=joi2(ijoile) c+Find left-most object in second cluster i2objl=objll(i2obj) c-Find right-most object in second cluster c+right-hand neighbor of object WITHIN cluster i2objr=objr(i2obj) c while 23016 if(.not.(i2objr.ne.objr(i2objr)))goto 23017 c+Iterate until right end of cl reached i2objr=objr(i2objr) if(.not.( .not.(1.le.i2objr .and. i2objr.le.nobj) ))goto 230 & 18 c- i2objr should lie between 1 and nobj nerror=3 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 3." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23018 continue goto 23016 c endwhile 23017 continue c-(Still in long do loop on ijoile) c-Decide in which order to join the two clusters c- joisid=1 means first cluster on left c- joisid=2 means second cluster on left c-Present version of program: c- if using object position arrangement provided by calling routine c- (kobjpo=1), chooses case as required c- if creating object position arrangement (kobjpo=0) c- always uses case 1 c-Possible improvement of program: when creating arrangement (kobjpo=0), c-make choice of case depend on cluster sizes or other things, so as c-to obtain desirable ICICLE diagram. c switch I23020 = (kobjpo) if(.not.(I23020.eq.( 1)))goto 23021 c+Use arrangement given in objpos joisid=0 do 23022 ipos=1,npos if(.not.(objpos(ipos).eq.i1objl))goto 23024 c+first cluster on left joisid=1 23024 continue if(.not.(objpos(ipos).eq.i2objl))goto 23026 c+second cluster on left joisid=2 23026 continue if(.not.(joisid.ne.0))goto 23028 goto 23023 23028 continue 23022 continue 23023 continue if(.not.(joisid.eq.0))goto 23030 c- joisid should be defined in preceding do loop nerror=4 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 4." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23030 continue goto 23020 23021 continue if(.not.(I23020.eq.( 0)))goto 23032 c+Create arrangement of objects if kobjpo=0 c+first cluster on left joisid=1 23032 continue 23020 continue c-(Still in long do loop on ijoile) c-Join the two clusters in order selected c switch I23033 = (joisid) c+Start of switch on joisid if(.not.(I23033.eq.( 1)))goto 23034 c+first cluster on left c+Right-most object of left cluster objjoi(ijoile)=i1objr c+Recursive updating of objr objr(i1objr)=i2objl c- Recursive updating of objll iobj=i2objl c repeat 23035 continue objll(iobj)=i1objl if(.not.(iobj.eq.objr(iobj)))goto 23038 goto 23037 c else 23038 continue iobj=objr(iobj) 23039 continue if(.not.( .not.(1.le.iobj .and. iobj.le.nobj) ))goto 2304 & 0 c- iobj should lie between 1 and nobj nerror=5 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "ICICLE: Rest of output aborted, nerror = 5." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23040 continue 23036 goto 23035 23037 continue goto 23033 23034 continue if(.not.(I23033.eq.( 2)))goto 23042 c+second cluster on left c+Right-most object of left cluster objjoi(ijoile)=i2objr c+Recursive updating of objr objr(i2objr)=i1objl c- Recursive updating of objll iobj=i1objl c repeat 23043 continue objll(iobj)=i2objl if(.not.(iobj.eq.objr(iobj)))goto 23046 goto 23045 c else 23046 continue iobj=objr(iobj) 23047 continue if(.not.( .not.(1.le.iobj .and. iobj.le.nobj) ))goto 2304 & 8 c- iobj should lie between 1 and nobj nerror=6 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "ICICLE: Rest of output aborted, nerror = 6." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23048 continue 23044 goto 23043 23045 continue 23042 continue 23033 continue c+End of switch on joisid 23010 continue c+End of long do loop on ijoile c-End of definition of objll, objr, objjoi c+left-most object in entire set lllobj=objll(1) c----------------------------------------------------------------------- c-PART 3. DEAL WITH OBJECT POSITION ARRAY, objpos() c switch I23050 = (kobjpo) c+Start of switch on kojbpo if(.not.(I23050.eq.( 1)))goto 23051 c- If object position array was provided by calling program, check c- consistency with clustering provided by the calling program. c+To start with, assume consistent kcons=0 c for ipos=1 23052 if(.not.(ipos.lt.npos))goto 23054 if(.not.(objr(objpos(ipos)) .ne. objpos(ipos+1)))goto 23055 kcons=1 goto 23054 23055 continue ipos=ipos+1 goto 23052 c endfor 23054 continue if(.not.(kcons.eq.1))goto 23057 print *, "----------------------------------------" print *, & "ICICLE: Given object positions not consistent with given tree," print *, & "so the Icicle plot will use modified object positions." print *, "----------------------------------------" 23057 continue goto 23050 23051 continue if(.not.(I23050.eq.( 0)))goto 23059 c- Save object position iobj=lllobj c for ipos=1 23060 if(.not.(ipos.le.npos))goto 23062 objpos(ipos)=iobj iobj=objr(iobj) ipos=ipos+1 goto 23060 c endfor 23062 continue 23059 continue 23050 continue c+End of switch on kobjpo c----------------------------------------------------------------------- c-PART 4. PRINT ICICLE PLOT c-In general, display may be too wide to get on one page. c-It is broken up into a number of vertical strips, each one page wide. c-Entire display consists of several strips, to be placed side by side. if(.not.(kdisle.ne.2))goto 23063 c+diswid = display width which shows on one page diswid=114 goto 23064 c else 23063 continue diswid=103 23064 continue c+nstrip = number of strips nstrip=(distot+diswid-1)/diswid c-Loop through number of strips necessary to show whole display. c-During loop c- istrip is number of the strip c- ldis is object position of left edge of strip c- rdis is object position of right edge of strip c-When generating display line for one strip, create display line for c-entire width, but only use it from ldis to rdis istrip=0 c for ldis=1 23065 if(.not.(ldis.le.distot))goto 23067 c+Start of long for loop through strips. Index is ldis. rdis=min(distot, ldis+diswid-1) istrip=istrip+1 c-print header information at top of display print 7,12 7 format(a1) if(.not.(diswid.lt.distot))goto 23068 print 8, istrip, nstrip 23068 continue 8 format("strip",i3," of",i3," strips") print *, title c+output a blank line print *, " " c-initialize the label array, label(,) iobj=lllobj do 23070 ipos=1,npos c for ichar=nchar 23072 if(.not.(ichar.ge.1))goto 23074 i=iobj if(.not.(labsw.ne.0))goto 23075 label(ichar,ipos)=extlab(ichar,iobj) goto 23076 c else 23075 continue digit=mod(i,10) i=(i-digit)/10 write(schar,1) digit 1 format(i1) label(ichar,ipos)=schar 23076 continue ichar=ichar-1 goto 23072 c endfor 23074 continue label(nchar1,ipos)=echar iobj=objr(iobj) if(.not.( .not.(1.le.iobj .and. iobj.le.nobj) ))goto 23077 c- iobj should be between 1 and nobj nerror=7 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "ICICLE: Rest of output aborted, nerror = 7." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23077 continue 23070 continue c+print object labels at top of columns do 23079 ichar=1,nchar do 23081 ipos=1,npos idis1=width*(ipos-1)+1 displa(idis1)=label(ichar,ipos) if(.not.(ipos.eq.npos))goto 23083 goto 23082 23083 continue c for idis=idis1+1 23085 if(.not.(idis.le.width*ipos))goto 23087 displa(idis)=" " idis=idis+1 goto 23085 c endfor 23087 continue 23081 continue 23082 continue if(.not.(kdisle.ne.2))goto 23088 print 22, (displa(idis),idis=ldis,rdis) goto 23089 c else 23088 continue print 23, (displa(idis),idis=ldis,rdis) 23089 continue 22 format(18x, 114a1) 23 format(29x, 103a1) c- change blanks to fill characters, in case some labels shorter do 23090 ipos=1,npos if(.not.(label(ichar,ipos).eq." "))goto 23092 label(ichar,ipos)=fchar 23092 continue 23090 continue 23079 continue c+blank line print * , " " c-PRINT BODY OF DISPLAY c-Initialize join level counter, ijoile, and wall. ijoile=njoile do 23094 ipos=1,npos c+fill wall with join character wall(ipos)=jchar 23094 continue c+blank out last entry of wall wall(npos)=bchar c-Initialize phase counter, ipha, and set number of phases, npha. ipha=0 npha=nchar1 c-Initialize display level counter, idisle, and disl23. idisle=ndisl1 c+Special indicator for cases 2 and 3 disl23 = 0.0 c-Initialize display level, disle. c switch I23096 = (kdisle) if(.not.(I23096.eq.( 0)))goto 23097 c+Start with root node join level disle=joilev(njoile) goto 23096 23097 continue if(.not.(I23096.eq.( 1)))goto 23098 c+Use display levels from calling program disle=dislev(ndisl1) goto 23096 23098 continue if(.not.(I23096.eq.( 2).or.I23096.eq.( 3)))goto 23099 c+Use join levels disle=joilev(ndisl1) 23099 continue 23096 continue c+Save disle in dislev dislev(ndisl1)=disle c repeat 23100 continue c+Start of repeat loop to print one strip. c- Update ijoile, wall. c while 23103 if(.not.( (joilev(ijoile)-disle)*sdissw+disl23 .gt. 0. & .and. ijoile.ge.1))goto 23104 c- Normally: Continue until joilev moves past disle. disl23=0.0 c- Cases 2,3, on first entry: Skip loop. disl23=0.0 c- Cases 2,3, later entry: Do loop just once. disl23=1.0 c+Start of while loop to update ijoile and wall. if(.not.(kdisle.eq.2.or.kdisle.eq.3))goto 23105 c+Reset 2,3 indicator. disl23 = 1.0 c- Find right-most object of left cluster making join 23105 continue iobj=objjoi(ijoile) c- Find its position by counting from left-most object ipos=1 c for jobj=lllobj 23107 if(.not.(jobj.ne.iobj))goto 23109 ipos=ipos+1 if(.not.( .not.(1.le.jobj .and. jobj.le.nobj) ))goto 2 & 3110 c- jobj should be between 1 and nobj nerror=8 print *, & "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "ICICLE: Rest of output aborted, nerror = 8." print *, & "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" return 23110 continue jobj=objr(jobj) goto 23107 c endfor 23109 continue c- Blank out the join character making this join wall(ipos)=bchar c- Blank out new singleton groups. if(.not.(wall(ipos+1).eq.bchar))goto 23112 do 23114 ichar=1,nchar1 label(ichar,ipos+1)=bchar 23114 continue 23112 continue if(.not.( ( ipos.gt.1 .and. wall(ipos-1).eq.bchar ) .or. & ipos.eq.1 ))goto 23116 do 23118 ichar=1,nchar1 label(ichar,ipos)=bchar 23118 continue c- Update ijoile. 23116 continue ijoile=ijoile-1 if(.not.(disl23.eq.1.0))goto 23120 c+Move just one join level in cases 2,3 goto 23104 23120 continue goto 23103 c endwhile 23104 continue c+End of while loop to update ijoile and wall. c- Update ipha. ipha=ipha+1 if(.not.(ipha.gt.npha))goto 23122 ipha=1 c- Fill displa from label and wall. 23122 continue do 23124 ipos=1,npos idis1=width*(ipos-1)+1 displa(idis1)=label(ipha,ipos) if(.not.(ipos.eq.npos))goto 23126 goto 23125 23126 continue c for idis=idis1+1 23128 if(.not.(idis.le.width*ipos))goto 23130 displa(idis)=wall(ipos) idis=idis+1 goto 23128 c endfor 23130 continue 23124 continue 23125 continue c- THE PRIMARY OUTPUT STATEMENT c- Print one line of display. if(.not.(kdisle.ne.2))goto 23131 print 32, idisle, disle, (displa(idis), idis=ldis,rdis) goto 23132 c else 23131 continue if(.not.(ijoile.eq.njoile))goto 23133 print 33,idisle, disle, (displa(idis), idis=ldis,rdis) goto 23134 c else 23133 continue print 34, idisle, disle, joilev(idisle+1), (displa( & idis), idis=ldis,rdis) 23134 continue 23132 continue 32 format(1x,i3,2x,g11.5,1x,114a1) 33 format(1x,i3,2x,g11.5," No limit ",1x,103a1) 34 format(1x,i3,2x,g11.5,g11.5,1x,103a1) c- update idisle and apply stopping rule idisle=idisle-1 if(.not.(idisle.lt.1))goto 23135 goto 23102 c- Update disle and save disle in dislev 23135 continue c switch I23137 = (kdisle) if(.not.(I23137.eq.( 0)))goto 23138 c+Calculate display level disle=(disle+disadd)*disrat - disadd goto 23137 23138 continue if(.not.(I23137.eq.( 1)))goto 23139 c+Use display level from calling program disle=dislev(idisle) goto 23137 23139 continue if(.not.(I23137.eq.( 2).or.I23137.eq.( 3)))goto 23140 c+Use next join level as display level disle=joilev(ijoile-1) 23140 continue 23137 continue c+Save disle dislev(idisle)=disle c+End of repeat loop to print one strip. 23101 goto 23100 23102 continue ldis=ldis+diswid goto 23065 c endfor 23067 continue c+End of long for loop through strips. Index is ldis. c----------------------------------------------------------------------- return end c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c-subroutine icilev, Fortran version. c-sets the display levels according to user preference subroutine icilev(sw, array, n) c-Argument description follows: c- sw = input. It controls handling of levels, as described below. c- It can be -1, 0 (default value) 1, 2, 3, or 4. c- It changes the display mode, except when sw=-1. c- array = input or output. It holds display levels. c- It is used only with sw values -1, 1. c- n = input. It indicates the number of display levels c- It is used only with sw values -1, 0, 1. c- What icilev does for different sw values. c--1 = Does NOT change display mode. Instead, has immediate effect: c- display level values are returned in "array" ("array" is output) c- If n!=0, n lowest levels returned; if n=0, all levels returned c- 0 = Log spacing used for display levels. c- If n!=0, n levels used; if n=0, no. of levels=number of objects. c- 1 = n levels from "array" are used in display("array" is input). c- 2 = One level is used for each join. Also, TWO display levels are c- printed on each line, to show INTERVAL over which this c- clustering is relevant. c- 3 = Same, but as space saver only FIRST display level is printed. common /iclev/ kdisle, ndisle, dislev integer kdisle, n, n1, ndisle, sw real dislev(200), array(n) c switch I23141 = (sw) if(.not.(I23141.eq.( -1).or.I23141.eq.( 1)))goto 23142 if(.not.(n.eq.0))goto 23143 n1=ndisle n=ndisle goto 23144 c else 23143 continue n1=n 23144 continue 23142 continue 23141 continue c switch I23145 = (sw) if(.not.(I23145.eq.( -1)))goto 23146 do 23147 i = 1,n1 array(i) = dislev(i) 23147 continue return 23146 continue if(.not.(I23145.eq.( 1)))goto 23149 do 23150 i = 1,n dislev(i) = array(i) 23150 continue goto 23145 23149 continue if(.not.(I23145.eq.( 0).or.I23145.eq.( 2).or.I23145.eq.( 3))) & goto 23152 goto 23145 23152 continue c default c+kdisle has invalid value print *, "----------------------------------------" print *, & "ICICLE: Call to icilev ignored, because first argument is invalid" print *, "----------------------------------------" return 23145 continue c+levels switch kdisle = sw c+number of levels ndisle = n return end c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c-subroutine icipos, Fortran version. c- Allows the user to set the position order of the objects in the c- display (i.e., change the display mode) in subsequent calls to c- icicle, or to find what position order was used during the last call c- to icicle subroutine icipos(sw, array, n) c- Argument description follows: c- sw is input; controls handling of object positions (see below). c- array is input or output; gives positions of objects. c- It is used only with sw values -1 and 1. c- n is input; number of positions in "array". c- It is used only with sw values -1 and 1. c- sw has three possible values (0 is default value): c- -1 : Does NOT change display mode. Instead, has immediate effect: c- ICICLE returns n object positions (from previous use) c- in "array" ("array" is output). c- 0 : ICICLE will supply its own object positions in future c- 1 : ICICLE reads in n object positions from "array" (input). It c- will use these object positions in future calls to icicle. c- However, if they are not consistent with hierarchical c- clustering, modifications are made as necessary (but the input c- "array" is never changed). common /icpos/ kobjpo, objpos integer kobjpo,objpos(200),sw,array(n),n c switch I23153 = (sw) if(.not.(I23153.eq.( -1)))goto 23154 do 23155 i = 1,n array(i) = objpos(i) 23155 continue return 23154 continue if(.not.(I23153.eq.( 1)))goto 23157 do 23158 i = 1,n objpos(i) = array(i) 23158 continue goto 23153 23157 continue if(.not.(I23153.eq.( 0)))goto 23160 goto 23153 23160 continue c default c+invalid value print *, "----------------------------------------" print *, & "ICICLE: Call to icipos ignored, because first argument is invalid" print *, "----------------------------------------" return 23153 continue kobjpo = sw return end c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c-subroutine icidis, Fortran version. c-Enters ICICLE display parameters subroutine icidis(widthi, echari, fchari, jchari, bchari) c-If this subroutine is not called, ICICLE will use default values of c- display parameters. c-width = width per position = number of columns from one object column c-to next in display, measured "center to center". minimum=default=2. c-echar = end character. Default = "&". c- Separates vertical repetitions of labels. c-fchar = fill character. Default = "&". c- Fills out shorter labels to length of longer ones. c-jchar = join character. Default = "=". c- Joins objects which belong to the same cluster. c-bchar = blank character. Default = " ". c- Used for white space which separates objects in c- distinct clusters. c+display parameters common /icdis/ width, echar, fchar, jchar, bchar integer width, widthi character echari*1, jchari*1, fchari*1, bchari*1 character echar*1, jchar*1, fchar*1, bchar*1 if(.not.( .not.( 2.le.widthi .and. widthi.le.5 ) ))goto 23161 print *, "----------------------------------------" print *, & "ICICLE: Width per position has been changed to between 2 and 5" print *, "----------------------------------------" 23161 continue width = min(max(2, widthi),5) if(.not.(widthi.gt.3))goto 23163 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "ICICLE: If width per position > 3, array 'displa' may overflow" print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" 23163 continue echar = echari jchar = jchari fchar = fchari bchar = bchari return end c--#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# c-subroutine icires, Fortran version. c-Resets all modes to their default values. subroutine icires integer dummyi real dummyr(1) call icilev(0,dummyr,0) call icipos(0,dummyr,dummyi) c+default values for all arguments call icidis(2, "&", "&", "=", " ") return end CUT HERE------------ icicle.f echo processing file hiclust.r 1>&2 cat > hiclust.r <<'CUT HERE------------ hiclust.r' # hiclust.r # This software is a RATFOR implementation of the method called HICLUST. # The authors of this software are Joseph B Kruskal and James M Landwehr. # Copyright (c) 1993 by AT&T. # Permission to use, copy, modify, and distribute this software for any # purpose without fee is hereby granted, provided that this entire # notice is included in all copies of any software which is or includes # a copy or modification of this software and in all copies of the # supporting documentation for such software. # THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMP- # LIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY # REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY # OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. # This software comes from the SECOND MDS Package of AT&T Bell Laboratories # For explanation of the method this software implements, see # "Hierarchical Clustering Schemes" by Stephen C Johnson in Psychometrika, # 1967, Volume 32, pages 241-254. This clustering program is a modified # version of Johnson's program "hiclust". The same clustering method has # been independently invented and published several times, and is simple, # convenient, and widely used. # This file consists consists of a main routine written in the Ratfor # language. Its Fortran expansion is in the hiclust.f file. # Though this program is included primarily to illustrate how to # call ICICLE, you may find it of value because it is an improved # version of a widely used clustering program. # This version of HICLUST was modified in 1984 by J B Kruskal and # J E McRae. These are the main changes that were introduced: # --ICICLE is used for output in place of Johnson's original output. # --This version is written in Ratfor instead of Fortran IV. # --This version includes the averaging submethod of clustering besides # the single-link and complete-link submethods included by Johnson. # --Clustering terminology has been changed to agree with terminology # now most widely used in the clustering literature. # --The comments have been extensively modified. # --The input has been extended to permit use of ICICLE facilities. # An explanation of ICICLE plots may be found in The American # Statistician, May 1983, Volume 37, Number 2, pages 162-168. # This file was created by Joseph B Kruskal and Jean # E McRae at AT&T Bell Laboratories, Murray Hill, New Jersey, U.S.A. # It was completed in November, 1984. It has been thoroughly tested # by the authors. # Copyright November, 1984 by AT&T Bell Laboratories. # AT&T Bell Laboratories makes no warranties, express or implied, with # respect to this program. In particular, Bell Laboratories makes no # warranty of merchantability, fitness for a particular use, freedom # from infringement of any patent, copyright or trademark, nor any # warranty as to accuracy. Bell Laboratories assumes no obligation to # furnish any assistance of any kind whatsoever, or to furnish any # additional information or documentation. #----+----@----+----@----+----@----+----@----+----@----+----@----+----@ #NEWPAGE is also defined in the ICICLE.r file define NEWPAGE 12 #The formfeed (octal 014) character, for use in starting new page #MAXN and MAXNLEV are also defined in the ICICLE.r file. The values #there need not be the same as the values here. define MAXN 200 #MAXN means the maximum number of objects permitted (max n) define MAXNLEV 200 #MAXNLEV means the maximum number of levels permitted in the display define NLABCM 6 #"No. of LABel Characters: Max" is the max number of characters #permitted in a label. #---------------------------------------------------------------------- # e(i,j) is the array of input data (i.e., the "distances") # d, w, lki, lkj, xx are internal working storage # d(i,j) is the distance array as it changes during the merging process # w(i) ["weight"] is the number of points in cluster i at this stage # Initially, w(i)=1.0 for all i # If w(i)=0.0, i is no longer in use as a cluster at this stage # lki(ii) ["link i"] is one of the two clusters linked at stage ii. # lkj(ii) ["link j"] is one of the two clusters linked at stage ii. # For every ii, lki(ii) > lkj(ii) # xx(ii) is the distance between the two clusters linked at stage ii # label, level, objpos, title used merely to pass information on # label(NLABCM,MAXN) contains the user-supplied labels (if any) # level(MAXNLEV) contains the user-supplied levels (if any) for display # objpos(MAXN) contains the user-supplied object positions (if any) # title(3) contains titles for the 3 submethods #---------------------------------------------------------------------- real d(MAXN,MAXN),e(MAXN,MAXN),xx(MAXN),w(MAXN),level(MAXNLEV) real s,x integer lki(MAXN),lkj(MAXN),objpos(MAXN) integer method(3) integer i,j,k,l,ii,n,n1,ni,nj,is,i1,nlab,klev,nlev,kobjpo character*80 title(3),form character*1 label(NLABCM,MAXN) data title(1)/"Single-link Method"/ data title(2)/"Complete-link Method"/ data title(3)/"Averaging Method"/ #EXECUTABLE STATEMENTS START HERE #The use of "icidis" may be safely ignored by most users. It can be #used to change "width per position" and the characters in the ICICLE #display, which few users will want to do. call icidis(2, "&", "&", "=", " ") #This call is included only to illustrate how "icidis" is used. It #could be omitted without effect, since the values it specifies are #all the same as the default values. # 2 is no. of col.s from one object column to next (width per pos.) # "&" is the end char. It separates vertical repetitions of labels. # "&" is the fill character which fills out shorter labels. # "=" is the join char which joins objects belonging to same cluster. # " " is the blank char, used for the white space separating clusters. repeat { #Start of long repeat loop over data sets. Bracket depth 1. # read in: n = number of objects # n=0 means no more data. # n<0 means use last data set previously read in # is= +1 if matrix is matrix of dissimilarities or distances # -1 if matrix is matrix of similarities read 10, n, is 10 format(2i3) print 20, n, is 20 format(" Values that were read in: n= ",_ i2, " simil-dissim switch= ", i2) if(is!=0) s = float(is) if(n==0) stop #If n is 0, stop # If n>0, read triangular matrix without diagonal into e, # symmetrize, and provide 0 diagonal. if(n>0) { nlast=n n1=n-1 read 13, form 13 format(a80) e(1,1)=0.0 do i=2,n { e(i,i)=0.0 i1=i-1 read form,(e(i,j),j=1,i1) do j=1,i1 e(j,i)=e(i,j) } print *, " The data was read in using the following format" print 23, form 23 format(1x,a80) } if(n<0) n=nlast # Read in parameters # nlab: max number of char.s in any label, # but if 0, labels absent, # or if -1, use same labels as with preceding set of data. # klev: level options: -1, 0, 1, 2, 3 (first arg to "icilev"). # 0=use log spacing; -1=same, and return display levels; # 1=user provides levels; 2,3=one level for each join # Levels (sec. arg to "icilev") are present only if klev=1 # nlev: number of levels, but can use 0 and let program decide # (used if klev=-1,0,1) (third arg to "icilev") # kobjpo: if 1, user gives obj. positions; if 0, user doesn't; # if -1, return the object positions used # and choice of submethods: any subset of 1,2,3, in any order # (1=single-link, 2=complete-link, 3=average submethod) read 11, nlab, klev, nlev, kobjpo, (method(i),i=1,3) 11 format(7i3) print 21, nlab, klev, nlev, kobjpo, (method(i),i=1,3) 21 format(" Values that were read in:"/_ " maximum number of label characters=",i2/_ " level option", i3, " number of levels=", i3/_ " object position option", i3/_ " submethods chosen", 3i2) # Read new labels, or use old labels, or use no labels if(nlab>0) { nlabls=nlab read 13, form read form, ((label(i,j),i=1,nlab),j=1,n) print *, " Labels were read in according to the following format" print 23, form } if(nlab<0) nlab=nlabls if(nlab==0) nlabls=0 # If levels are present, read them in if(klev==1) { read 13, form read form, (level(i),i=1,nlev) print *, " Levels were read in according to the following format" print 23, form } # If the user is supplying object positions, read them in if(kobjpo==1) { read 13, form read form, (objpos(i),i=1,n) print *,_ " Object positions were read in according to the following format" print 23, form } # Count number of submethods selected nmeth=0 do i=1,3 if(method(i)!=0) nmeth=nmeth+1 if(nmeth==0) { print *, "----------------------------------------" print *, "Hiclust: No clustering submethods were selected." print *, "----------------------------------------" stop } # Loop through the methods do ki=1,3 { #Start of long LOOP over methods, indexed by k. Bracket depth 2. k=method(ki) if(k==0) next # Initially, every point is a singleton cluster. # The clustering process consists of a sequence of "merges", each of #which merges two clusters into a single merged cluster. If clusters #i and j are merged and i > j, then i is the new name for the #merged cluster, and j does not refer to any cluster. i is referred #to as a "cluster point" and j as an "noncluster point". # Initialize the clustering so that every point is a singleton cluster. # Initialize every value of array w to 1.0 # w(i) = weight (size) of the cluster i, if point i is a cluster point # = 0. if point i is a noncluster point # Thus w(i) # >= 1. indicates that point i is cluster point # == 0. indicates that point i is noncluster point # Initially, every point is a cluster point. # Initialize array d(,) to e(,). # e(i,j) is the input distance between points i and j. # d(i,j) = distance between clusters i and j. It changes during merging #Clear arrays lki, lkj, and xx which identify clusters merged, and # distances for each merge. do i=1,n { do j=1,n d(i,j)=e(i,j) w(i)=1.0 lki(i)=0.0 ; lkj(i)=0.0 ; xx(i)=0.0 } # ii counts the number of "merges" do ii=1,n1 { #Start of long loop over merges, indexed by ii. Bracket depth 3. # Find min pair distance # First, find two cluster points, ni and nj (with ni > nj), and # initialize the running minimum distance x to d(ni,nj). ni=n+1; nj=n+1 do j=1,n if(w(j)>0.0) {nj=j; break} if(nj>=n) { nerror=9 #Must be two cluster points left print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "Hiclust: Program error detected. nerror = 9." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" stop } nj1=nj+1 do i=nj1,n if( w(i)>0.0 ) {ni=i; break} if(ni>=n+1) { nerror=10 #Must be a cluster point left print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, "Hiclust: Program error detected. nerror = 10." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" stop } x=d(ni,nj) # Begin the real business of finding the closest pair # Only look at cluster points do i=2,n { #Start of outer loop over points, indexed by i. Bracket depth 4. if(w(i)==0.0) next #i noncluster point, so skip i1 = i-1 do j=1,i1 { #Start of inner loop over points, indexed by j. Br depth 5. if(w(j)==0.0) next #j noncluster point, so skip # This is the central compare statement if((d(i,j)-x)*s <= 0.0) { # Closer pair found. Update x, ni, nj. ni = i nj=j x=d(i,j) } } #End of inner loop over points, indexed by i. Br. depth 5. } #End of outer loop over points, indexed by i. Bracket depth 4. # We have now looked at every pair of points. # Store x in xx, and ni and nj in arrays lki and lkj. # Store new cluster weight in w(ni) and 0. in w(nj). xx(ii)=x lki(ii)=ni lkj(ii)=nj wni=w(ni) #Save the old (pre-merge) weight for ni wnj=w(nj) #Save the old (pre-merge) weight for nj w(ni)= w(ni) + w(nj) #weight of new cluster. ni still a cluster point. w(nj) = 0.0 #Point nj is now a noncluster point. # Because point nj is a noncluster point, it no longer has a row # of the matrix. The object(s) formerly in cluster nj are now part # of the new larger cluster ni. # Select cluster points l other than ni. do l=1,n { #Start loop over points, indexed by l. Update d(). Br depth 4. if(w(l)==0.0 | l==ni) next #Skip if l noncluster or ni switch(k) #Branch on method, and find d(ni,l) and d(l,ni). { #Start of BRANCH over methods, indexed by k. Br depth 5. case 1: # ******Single-link submethod****** # d(ni,l)= min(d(ni,l),d(nj,l)) if((d(ni,l)-d(nj,l))*s > 0.0) { d(ni,l)=d(nj,l) d(l,ni)=d(l,nj) } case 2: # ******Complete-link submethod****** # d(ni,l)= max(d(ni,l),d(nj,l)) if((d(ni,l)-d(nj,l))*s < 0.0) { d(ni,l)=d(nj,l) d(l,ni)=d(l,nj) } case 3: # ******Averaging submethod****** # d(ni,l)= average of d(ni,l) & d(nj,l) d(ni,l) = (wni*d(ni,l)+wnj*d(nj,l))/(wni+wnj) d(l,ni) = d(ni,l) } #End of BRANCH over methods, indexed by k. Bracket depth 5. } #End of loop over points indexed by l. Update d(). Br depth 4. # Now repeat process on modified d array } #End of long loop over merges, indexed by ii. Bracket depth 3. ##########call ICICLE print out routine########## klev0=max(klev,0) #If klev=-1, then use log spacing call icilev(klev0,level,nlev) #supply level information to ICICLE if(kobjpo==1) call icipos(kobjpo,objpos,n) #obj. information to ICICLE #Arrays lki, lkj (merge pointers) are entered as arguments #in reverse alphabetical order so that objects appear in #ascending order in the ICICLE plot. call icicle(n,lkj,lki,xx,s,NLABCM,nlab,label,title(k)) #call icilev if levels are to be returned if(klev==-1) { call icilev(klev, level, nwhat) print *, "The levels used by ICICLE are:" print *, (level(i),i=1,nwhat) } #call icipos if positions are to be returned if(kobjpo == -1) { call icipos(kobjpo,objpos,n) print *, "The object positions used by ICICLE are:" print *, (objpos(i),i=1,n) } # Go to next method. } #End of long LOOP over methods, indexed by k. Bracket depth 2. call icires #reset all modes to default values print 7, NEWPAGE 7 format(a1) # Return to beginning of loop to read more data. } #End of long repeat loop over data sets. Bracket depth 1. end CUT HERE------------ hiclust.r echo processing file hiclust.f 1>&2 cat > hiclust.f <<'CUT HERE------------ hiclust.f' c# hiclust.f c# This software is the FORTRAN expansion of the program in hiclust.r c- The authors of this software are Joseph B Kruskal and James M Landwehr. c# Note that this program is DIFFERENT from the same-named main entry in c# netlib/mds, though the two programs are very similar c- Copyright (c) 1993 by AT&T. c- Permission to use, copy, modify, and distribute this software for any c- purpose without fee is hereby granted, provided that this entire c- notice is included in all copies of any software which is or includes c- a copy or modification of this software and in all copies of the c- supporting documentation for such software. c- THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMP- c- LIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY c- REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY c- OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. c- This software comes from the SECOND MDS Package of AT&T Bell Laboratories c- For explanation of the method this software implements, see c- "Hierarchical Clustering Schemes" by Stephen C Johnson in Psychometrika, c- 1967, Volume 32, pages 241-254. This clustering program is a modified c- version of Johnson's program "hiclust". The same clustering method has c- been independently invented and published several times, and is simple, c- convenient, and widely used. c# This is a program called hiclust, which performs clustering. It c# consists of a main routine, and is the Fortran expansion of the c# hiclust.r file, except that comments have been modified. c- Though this program is included primarily to illustrate how to c- call ICICLE, you may find it of value because it is an improved c- version of a widely used clustering program. c- This version of HICLUST was modified in 1984 by J B Kruskal and c- J E McRae. These are the main changes that were introduced: c- --ICICLE is used for output in place of Johnson's original output. c- --This version is written in Ratfor instead of Fortran IV. c- --This version includes the averaging submethod of clustering besides c- the single-link and complete-link submethods included by Johnson. c- --Clustering terminology has been changed to agree with terminology c- now most widely used in the clustering literature. c- --The comments have been extensively modified. c- --The input has been extended to permit use of ICICLE facilities. c- An explanation of ICICLE plots may be found in The American c- Statistician, May 1983, Volume 37, Number 2, pages 162-168. c- This file was created by Joseph B Kruskal and Jean c- E McRae at AT&T Bell Laboratories, Murray Hill, New Jersey, U.S.A. c- It was completed in November, 1984. It has been thoroughly tested c- by the authors. c- Copyright November, 1984 by AT&T Bell Laboratories. c- AT&T Bell Laboratories makes no warranties, express or implied, with c- respect to this program. In particular, Bell Laboratories makes no c- warranty of merchantability, fitness for a particular use, freedom c- from infringement of any patent, copyright or trademark, nor any c- warranty as to accuracy. Bell Laboratories assumes no obligation to c- furnish any assistance of any kind whatsoever, or to furnish any c- additional information or documentation. c# Explanation of the comment convention used: c# A comment that starts with "c-" is comment line in Ratfor version. c# A comment that starts with "c+" comes from line with statement on it, c# and belongs to the immediately following Fortran line, if any. c# A comment that starts with "c " corresponds to a Ratfor statement. c# A comment that starts with "c#" was added or altered after expansion. c#----+----@----+----@----+----@----+----@----+----@----+----@----+----@ c# The Ratfor DEFINE statements and their explanations have been deleted c# because there is nothing in Fortran that corresonds to DEFINE. c# All array sizes in this Fortran version are those c# which result from the DEFINE statements in the Ratfor version. c----------------------------------------------------------------------- c- e(i,j) is the array of input data (i.e., the "distances") c- d, w, lki, lkj, xx are internal working storage c- d(i,j) is the distance array as it changes during the merging process c- w(i) ["weight"] is the number of points in cluster i at this stage c- Initially, w(i)=1.0 for all i c- If w(i)=0.0, i is no longer in use as a cluster at this stage c- lki(ii) ["link i"] is one of the two clusters linked at stage ii. c- lkj(ii) ["link j"] is one of the two clusters linked at stage ii. c- For every ii, lki(ii) > lkj(ii) c- xx(ii) is the distance between the two clusters linked at stage ii c- label, level, objpos, title used merely to pass information on c- label(NLABCM,MAXN) contains the user-supplied labels (if any) c- level(MAXNLEV) contains the user-supplied levels (if any) for display c- objpos(MAXN) contains the user-supplied object positions (if any) c- title(3) contains titles for the 3 submethods c----------------------------------------------------------------------- real d(200,200),e(200,200),xx(200),w(200),level(200) real s,x integer lki(200),lkj(200),objpos(200) integer method(3) integer i,j,k,l,ii,n,n1,ni,nj,is,i1,nlab,klev,nlev,kobjpo character*80 title(3),form character*1 label(6,200) data title(1)/"Single-link Method"/ data title(2)/"Complete-link Method"/ data title(3)/"Averaging Method"/ c-EXECUTABLE STATEMENTS START HERE c-The use of "icidis" may be safely ignored by most users. It can be c-used to change "width per position" and the characters in the ICICLE c-display, which few users will want to do. call icidis(2, "&", "&", "=", " ") c-This call is included only to illustrate how "icidis" is used. It c-could be omitted without effect, since the values it specifies are c-all the same as the default values. c- 2 is no. of col.s from one object column to next (width per pos.) c- "&" is the end char. It separates vertical repetitions of labels. c- "&" is the fill character which fills out shorter labels. c- "=" is the join char which joins objects belonging to same cluster. c- " " is the blank char, used for the white space separating clusters. c repeat 23000 continue c+Start of long repeat loop over data sets. Bracket depth 1. c- read in: n = number of objects c- n=0 means no more data. c- n<0 means use last data set previously read in c- is= +1 if matrix is matrix of dissimilarities or distances c- -1 if matrix is matrix of similarities read 10, n, is 10 format(2i3) print 20, n, is 20 format(" Values that were read in: n= ", i2, & " simil-dissim switch= ", i2) if(.not.(is.ne.0))goto 23003 s = float(is) 23003 continue if(.not.(n.eq.0))goto 23005 c+If n is 0, stop stop c- If n>0, read triangular matrix without diagonal into e, c- symmetrize, and provide 0 diagonal. 23005 continue if(.not.(n.gt.0))goto 23007 nlast=n n1=n-1 read 13, form 13 format(a80) e(1,1)=0.0 do 23009 i=2,n e(i,i)=0.0 i1=i-1 read form,(e(i,j),j=1,i1) do 23011 j=1,i1 e(j,i)=e(i,j) 23011 continue 23009 continue print *, & " The data was read in using the following format" print 23, form 23 format(1x,a80) 23007 continue if(.not.(n.lt.0))goto 23013 n=nlast c- Read in parameters c- nlab: max number of char.s in any label, c- but if 0, labels absent, c- or if -1, use same labels as with preceding set of data. c- klev: level options: -1, 0, 1, 2, 3 (first arg to "icilev"). c- 0=use log spacing; -1=same, and return display levels; c- 1=user provides levels; 2,3=one level for each join c- Levels (sec. arg to "icilev") are present only if klev=1 c- nlev: number of levels, but can use 0 and let program decide c- (used if klev=-1,0,1) (third arg to "icilev") c- kobjpo: if 1, user gives obj. positions; if 0, user doesn't; c- if -1, return the object positions used c- and choice of submethods: any subset of 1,2,3, in any order c- (1=single-link, 2=complete-link, 3=average submethod) 23013 continue read 11, nlab, klev, nlev, kobjpo, (method(i),i=1,3) 11 format(7i3) print 21, nlab, klev, nlev, kobjpo, (method(i),i=1,3) 21 format(" Values that were read in:"/ & " maximum number of label characters=",i2/ & " level option", i3, " number of levels=", i3/ & " object position option", i3/" submethods chosen", 3i2) c- Read new labels, or use old labels, or use no labels if(.not.(nlab.gt.0))goto 23015 nlabls=nlab read 13, form read form, ((label(i,j),i=1,nlab),j=1,n) print *, & " Labels were read in according to the following format" print 23, form 23015 continue if(.not.(nlab.lt.0))goto 23017 nlab=nlabls 23017 continue if(.not.(nlab.eq.0))goto 23019 nlabls=0 c- If levels are present, read them in 23019 continue if(.not.(klev.eq.1))goto 23021 read 13, form read form, (level(i),i=1,nlev) print *, & " Levels were read in according to the following format" print 23, form c- If the user is supplying object positions, read them in 23021 continue if(.not.(kobjpo.eq.1))goto 23023 read 13, form read form, (objpos(i),i=1,n) print *, & " Object positions were read in according to the following format" print 23, form c- Count number of submethods selected 23023 continue nmeth=0 do 23025 i=1,3 if(.not.(method(i).ne.0))goto 23027 nmeth=nmeth+1 23027 continue 23025 continue if(.not.(nmeth.eq.0))goto 23029 print *, "----------------------------------------" print *, & "Hiclust: No clustering submethods were selected." print *, "----------------------------------------" stop c- Loop through the methods 23029 continue do 23031 ki=1,3 c+Start of long LOOP over methods, indexed by k. Bracket depth 2. k=method(ki) if(.not.(k.eq.0))goto 23033 goto 23031 c- Initially, every point is a singleton cluster. c- The clustering process consists of a sequence of "merges", each of c-which merges two clusters into a single merged cluster. If clusters c-i and j are merged and i > j, then i is the new name for the c-merged cluster, and j does not refer to any cluster. i is referred c-to as a "cluster point" and j as an "noncluster point". c- Initialize the clustering so that every point is a singleton cluster. c- Initialize every value of array w to 1.0 c- w(i) = weight (size) of the cluster i, if point i is a cluster point c- = 0. if point i is a noncluster point c- Thus w(i) c- >= 1. indicates that point i is cluster point c- == 0. indicates that point i is noncluster point c- Initially, every point is a cluster point. c- Initialize array d(,) to e(,). c- e(i,j) is the input distance between points i and j. c- d(i,j) = distance between clusters i and j. It changes during merging c-Clear arrays lki, lkj, and xx which identify clusters merged, and c- distances for each merge. 23033 continue do 23035 i=1,n do 23037 j=1,n d(i,j)=e(i,j) 23037 continue w(i)=1.0 lki(i)=0.0 lkj(i)=0.0 xx(i)=0.0 23035 continue c- ii counts the number of "merges" do 23039 ii=1,n1 c+Start of long loop over merges, indexed by ii. Bracket depth 3. c- Find min pair distance c- First, find two cluster points, ni and nj (with ni > nj), and c- initialize the running minimum distance x to d(ni,nj). ni=n+1 nj=n+1 do 23041 j=1,n if(.not.(w(j).gt.0.0))goto 23043 nj=j goto 23042 23043 continue 23041 continue 23042 continue if(.not.(nj.ge.n))goto 23045 c+Must be two cluster points left nerror=9 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "Hiclust: Program error detected. nerror = 9." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" stop 23045 continue nj1=nj+1 do 23047 i=nj1,n if(.not.( w(i).gt.0.0 ))goto 23049 ni=i goto 23048 23049 continue 23047 continue 23048 continue if(.not.(ni.ge.n+1))goto 23051 c+Must be a cluster point left nerror=10 print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" print *, & "Hiclust: Program error detected. nerror = 10." print *, "%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+" stop 23051 continue x=d(ni,nj) c- Begin the real business of finding the closest pair c- Only look at cluster points do 23053 i=2,n c+Start of outer loop over points, indexed by i. Bracket depth 4. if(.not.(w(i).eq.0.0))goto 23055 c+i noncluster point, so skip goto 23053 23055 continue i1 = i-1 do 23057 j=1,i1 c+Start of inner loop over points, indexed by j. Br depth 5. if(.not.(w(j).eq.0.0))goto 23059 c+j noncluster point, so skip goto 23057 c- This is the central compare statement 23059 continue if(.not.((d(i,j)-x)*s .le. 0.0))goto 23061 c- Closer pair found. Update x, ni, nj. ni = i nj=j x=d(i,j) 23061 continue 23057 continue c+End of inner loop over points, indexed by i. Br. depth 5. 23053 continue c+End of outer loop over points, indexed by i. Bracket depth 4. c- We have now looked at every pair of points. c- Store x in xx, and ni and nj in arrays lki and lkj. c- Store new cluster weight in w(ni) and 0. in w(nj). xx(ii)=x lki(ii)=ni lkj(ii)=nj c+Save the old (pre-merge) weight for ni wni=w(ni) c+Save the old (pre-merge) weight for nj wnj=w(nj) c+weight of new cluster. ni still a cluster point. w(ni)= w(ni) + w(nj) c+Point nj is now a noncluster point. w(nj) = 0.0 c- Because point nj is a noncluster point, it no longer has a row c- of the matrix. The object(s) formerly in cluster nj are now part c- of the new larger cluster ni. c- Select cluster points l other than ni. do 23063 l=1,n c+Start loop over points, indexed by l. Update d(). Br depth 4. if(.not.(w(l).eq.0.0 .or. l.eq.ni))goto 23065 c+Skip if l noncluster or ni goto 23063 23065 continue c switch I23067 = (k) c+Branch on method, and find d(ni,l) and d(l,ni). c+Start of BRANCH over methods, indexed by k. Br depth 5. if(.not.(I23067.eq.( 1)))goto 23068 c+ ******Single-link submethod****** c- d(ni,l)= min(d(ni,l),d(nj,l)) if(.not.((d(ni,l)-d(nj,l))*s .gt. 0.0))goto 23069 d(ni,l)=d(nj,l) d(l,ni)=d(l,nj) 23069 continue goto 23067 23068 continue if(.not.(I23067.eq.( 2)))goto 23071 c+ ******Complete-link submethod****** c- d(ni,l)= max(d(ni,l),d(nj,l)) if(.not.((d(ni,l)-d(nj,l))*s .lt. 0.0))goto 23072 d(ni,l)=d(nj,l) d(l,ni)=d(l,nj) 23072 continue goto 23067 23071 continue if(.not.(I23067.eq.( 3)))goto 23074 c+ ******Averaging submethod****** c- d(ni,l)= average of d(ni,l) & d(nj,l) d(ni,l) = (wni*d(ni,l)+wnj*d(nj,l))/(wni+wnj) d(l,ni) = d(ni,l) 23074 continue 23067 continue c+End of BRANCH over methods, indexed by k. Bracket depth 5. 23063 continue c+End of loop over points indexed by l. Update d(). Br depth 4. c- Now repeat process on modified d array 23039 continue c+End of long loop over merges, indexed by ii. Bracket depth 3. c-#########call ICICLE print out routine########## c+If klev=-1, then use log spacing klev0=max(klev,0) c+supply level information to ICICLE call icilev(klev0,level,nlev) if(.not.(kobjpo.eq.1))goto 23075 c+obj. information to ICICLE call icipos(kobjpo,objpos,n) c- Arrays lki, lkj (merge pointers) are entered as arguments c- in reverse alphabetical order so that objects appear in c- ascending order in the ICICLE plot. 23075 continue call icicle(n,lkj,lki,xx,s,6,nlab,label,title(k)) c- call icilev if levels are to be returned if(.not.(klev.eq.-1))goto 23077 call icilev(klev, level, nwhat) print *, "The levels used by ICICLE are:" print *, (level(i),i=1,nwhat) c- call icipos if positions are to be returned 23077 continue if(.not.(kobjpo .eq. -1))goto 23079 call icipos(kobjpo,objpos,n) print *, "The object positions used by ICICLE are:" print *, (objpos(i),i=1,n) c- Go to next method. 23079 continue 23031 continue c+End of long LOOP over methods, indexed by k. Bracket depth 2. c+reset all modes to default values call icires print 7, 12 7 format(a1) c- Return to beginning of loop to read more data. c+End of long repeat loop over data sets. Bracket depth 1. 23001 goto 23000 end CUT HERE------------ hiclust.f echo processing file icicle.data 1>&2 cat > icicle.data <<'CUT HERE------------ icicle.data' C icicle.data C SAMPLE INPUT AND OUTPUT FROM HICLUST AND ICICLE. C The authors of this software are Joseph B Kruskal and James M Landwehr. C Copyright (c) 1993 by AT&T. C Permission to use, copy, modify, and distribute this software for any C purpose without fee is hereby granted, provided that this entire C notice is included in all copies of any software which is or includes C a copy or modification of this software and in all copies of the C supporting documentation for such software. C THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMP- C LIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY C REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY C OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. C This software comes from the SECOND MDS Package of AT&T Bell Laboratories C An explanation of ICICLE plots may be found in The American C Statistician, May 1983, Volume 37, Number 2, pages 162-168. C Sample input for the combination of hiclust and ICICLE, and the C output from this input. The data was used in the paper cited above. C This file was created by Joseph B Kruskal, James M Landwehr, and Jean C E McRae at AT&T Bell Laboratories, Murray Hill, New Jersey, U.S.A. C It was completed in November, 1984. It has been thoroughly tested C by the authors. C Copyright November, 1984 by AT&T Bell Laboratories. C AT&T Bell Laboratories makes no warranties, express or implied, with C respect to this program. In particular, Bell Laboratories makes no C warranty of merchantability, fitness for a particular use, freedom C from infringement of any patent, copyright or trademark, nor any C warranty as to accuracy. Bell Laboratories assumes no obligation to C furnish any assistance of any kind whatsoever, or to furnish any C additional information or documentaion. #----+----@----+----@----+----@----+----@----+----@----+----@----+----@ The input and output follow, separated by lines of #-#-#-... . In the input, the comments in the right-hand portion of some cards are added only for clarity, and are unnecessary but harmless in actual input. In the output, the formfeed character (octal 14), which is used to start a new page, has been replaced by "NEWPAGE" for clarity. #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# 7 -1 7 points, dissims. Data format follows. (7f4.1) 826 data 309 321 data 319 328 949 data 311 335 888 929 data 344 355 629 641 639 data 342 360 610 617 615 939 data 2 2 0 1 1 2 3 ch/lab, lev-opt, levs, pos-opt, three submeths (7(2a1,1x)) I1 I2 B2 B3 B1 W1 W2 The labels. Preceding is format to read them. (7i2) 5 3 4 6 7 1 2 Desired object order. Preceding is format to read them. 0 1 0 points, i.e., terminate computation. #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# Values that were read in: n= 7 simil-dissim switch= -1 The data was read in using the following format (7f4.1) Values that were read in: maximum number of label characters= 2 level option 2 number of levels= 0 object position option 1 submethods chosen 1 2 3 Labels were read in according to the following format (7(2a1,1x)) Object positions were read in according to the following format (7i2) NEWPAGE Single-link Method B B B W W I I 1 2 3 1 2 1 2 6 36.000 No limit B=B=B=W=W=I=I 5 64.100 36.000 1=2=3=1=2 1=2 4 82.600 64.100 &=&=& &=& &=& 3 92.900 82.600 B=B=B W=W 2 93.900 92.900 2=3 1=2 1 94.900 93.900 &=& NEWPAGE Complete-link Method B B B W W I I 1 2 3 1 2 1 2 6 30.900 No limit B=B=B=W=W=I=I 5 61.000 30.900 1=2=3=1=2 1=2 4 82.600 61.000 &=&=& &=& &=& 3 88.800 82.600 B=B=B W=W 2 93.900 88.800 2=3 1=2 1 94.900 93.900 &=& NEWPAGE Averaging Method B B B W W I I 1 2 3 1 2 1 2 6 33.240 No limit B=B=B=W=W=I=I 5 62.517 33.240 1=2=3=1=2 1=2 4 82.600 62.517 &=&=& &=& &=& 3 90.850 82.600 B=B=B W=W 2 93.900 90.850 2=3 1=2 1 94.900 93.900 &=& NEWPAGE Values that were read in: n= 0 simil-dissim switch= 1 CUT HERE------------ icicle.data #define END .