📄 nndvl

📁 it is the Data Mining Algorithm source code.
💻
字号:
#!/bin/bashsel=${1:-all}cnt=${2:-20}off=${3:-1}inc=${4:-7}add=$5explist="1.05 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0"mmtlist="0.05 0.1 0.15 0.2 0.25 0.3 0.4 0.5 0.6 0.7 0.8 0.9"#-----------------------------------------------------------------------function genrefs (){                               # --- generate reference results  data=${1:-iris}               # get the data name  ncls=${2:-2}                  # get the number of clusters  shape=${3:-s}                 # get the cluster shape  base=$data$ncls.$shape        # construct base file name  refs=0                        # initialize the result counter  while [[ -f $base$refs ]]; do # count existing reference results    let refs++; done            # if reference results exist,  if (( refs > 0 )); then       # report their number and abort    echo "found $refs reference result(s)"; return; fi  args="-$3c$ncls -e5000 $add $data.dom $data.tab"  for (( k = 0; k < cnt; k++ )); do    seed=$(( k*inc+off ))       # compute the seed value    cli -S$seed $args $base$refs 2> /dev/null    echo "" > $data.tmp         # traverse existing reference results    for (( i = 0; i < refs; i++ )); do      clc -x $base$refs $base$i $data.tab 2> /dev/null >> $data.tmp    done                        # compare reference results    new=`gawk 'BEGIN { min = 1; }               ($0 ~ "^mean squared") { if ($NF < min+0.0) min = $NF; }               END { print (min < 1e-9) ? 0 : 1 }' $data.tmp`    if (( new == 0 ));          # if the current result is not new    then rm -f $base$refs;      # delete the result file    else let refs++; fi         # otherwise increment  done                          # the result counter  echo "generated $refs reference result(s)"  rm -f $data.tmp               # delete temporary file}  # genrefs ()#-----------------------------------------------------------------------function run (){                               # --- run one clustering trial  mode=${1:-none}               # get the training mode  echo mode: $mode $2 >> $data.raw  cli -A$mode $2 $args 2> /dev/null # do the clustering  echo "" > $data.tmp           # traverse the reference results  for (( i = 0; i < refs; i++ )); do    clc -x $data.$nepo $base$i $data.tab 2> /dev/null >> $data.tmp  done                          # find the best reference results  cmp=`gawk 'BEGIN { min = 1; best = n = 0; }             ($0 ~ "^mean squared") {               if ($NF < min+0.0) { min = $NF; best = n; } n++; }             END { printf("%d %s", best, min); }' $data.tmp`  min=`echo $cmp | sed 's/.* \(.*\)/\1/'`  best=`echo $cmp | sed 's/\(.*\) .*/\1/'`  rm -f $data.tmp               # traverse the training epochs  for (( i = 0; i < nepo; i += step )); do    clc -x $data.$i $base$best $data.tab 2> /dev/null >> $data.tmp  done                          # compare clustering results  gawk -v step=$step 'BEGIN { n = 0 }    ($0 ~ "^mean squared") { printf("%d %s\n", n, $NF); n += step; }  ' $data.tmp >> $data.raw      # collect the comparison results  echo $nepo $min >> $data.raw  # add final comparison result  rm -f $data.[0-9]* $data.tmp  # remove the result files}  # run ()#-----------------------------------------------------------------------function modes (){                               # --- try different training modes  data=${1:-iris}               # get the data name  ncls=${2:-2}                  # get the number of clusters  nepo=${3:-20}                 # get the number of epochs  if   (( nepo >= 600 )); then step=50;  elif (( nepo >= 480 )); then step=40;  elif (( nepo >= 240 )); then step=20;  elif (( nepo >= 120 )); then step=10;  elif (( nepo >=  60 )); then step=5;  elif (( nepo >=  48 )); then step=4;  elif (( nepo >=  24 )); then step=2;  else                         step=1; fi  echo shape: $4$5 >> $data.raw # record the cluster shape  echo processing $data $ncls $4 $5  genrefs $data $ncls $4        # generate the reference results  for (( k = 0; k < cnt; k++ )); do    echo -n "trial $k ... "     # traverse the trials    seed=$(( k*inc+off ))       # compute the seed value    args="-S$seed -$4$5c$ncls -Pe$nepo $add $data.dom $data.tab $data."    run none                    # start with a normal run    for x in $explist; do       # try different expansion factors      run expand   -g$x; done    for x in $mmtlist; do       # try different momentum terms      run momentum -m$x; done    run adaptive                # adaptive expansion factor    run resilient               # resilient backpropagation    run quick -g2               # quick backpropagation    echo "done."  done  # rm -f $base[0-9]*             # delete the reference results}  # modes ()#-----------------------------------------------------------------------function trials (){                               # --- collect trial data  list=${3:-"none:vnone:Vnone"}  gawk -v list="$list" '  BEGIN {    max = -1; on = 0;           # initialize variables    cnt = split(list, modes, ":");  }                             # get the list of modes  {    if      ($1 == "shape:")    # if cluster shape specification      shape = $2;               # note the cluster shape    else if ($1 == "mode:") {   # if training mode specification      if (($2 != "quick") && (NF > 2))           m = shape substr($2, 1, 1) substr($3, 3);      else m = shape $2;        # construct the mode name      on = 0;                   # set the mode switch      for (i = cnt+1; --i > 0; )        if (m == modes[i]) { on = 1; break; } }    else if (on) {              # if current mode matches      if ($1 > max) max = $1;   # update the maximum epoch      k = cnts[m,$1] +0;        # get the trial number      v = ($2 > 0) ? log($2)/log(10) : -16;      trials[m,$1,k] = v        # store the trial value and      sums[m,$1]    += v;       # sum it for the average      cnts[m,$1]     = k+1;     # store the number of trials      totals[$1]++;             # count the trial for the epoch    }  }  END {                         # output collected data    for (i = 0; i <= max; i++) {      if (totals[i] <= 0) continue;      printf("> %10d diff\n", i);      for (n = 0; ++n <= cnt; ) {        m = modes[n];        if (cnts[m,i] <= 0) continue;        for (k = 0; k < cnts[m,i]; k++)          printf("%-12s %g\n", m sprintf("%02d", k), trials[m,i,k]);        printf("%-12s %g\n", m, sums[m,i]/cnts[m,i]);      }    }  }' $1.raw >> ${2:-$1}.trl}  # trials ()#-----------------------------------------------------------------------function average (){                               # --- average data over trials  gawk '  BEGIN { max = -1; n = 0; }  {    if      ($1 == "shape:")    # if cluster shape specification      shape = $2;               # note the cluster shape    else if ($1 == "mode:") {   # if trainig mode specification      if (($2 != "quick") && (NF > 2))           m = shape substr($2, 1, 1) substr($3, 3);      else m = shape $2;        # construct the mode name      for (i = n; --i >= 0; )   # add mode to the list if necessary        if (modes[i] == m) break;      if (i < 0) modes[i = n++] = m; }    else {                      # if result line      if ($1 > max) max = $1;   # update the maximum epoch      sums[m,$1] += ($2 > 0) ? log($2)/log(10) : -16;      cnts[m,$1]++;             # store the trial value and count it      totals[$1]++;             # count the trial for the epoch    }  }  END {                         # output collected data    for (i = 0; i <= max; i++) {      if (totals[i] <= 0) continue;      printf("> %10d diff\n", i);      for (k = 0; k < n; k++) {        m = modes[k];        if (cnts[m,i] <= 0) continue;        printf("%-12s %g\n", m, sums[m,i]/cnts[m,i]);      }    }  }' $1.raw >> ${2:-$1}.res}  # average ()#-----------------------------------------------------------------------if [[ $sel == "all" || $sel == "iris3" || $sel == "iris" ]]; then  rm -f iris.raw  modes iris 3 16  modes iris 3 16 v K  modes iris 3 16 v  modes iris 3 80 V K  modes iris 3 80 V  rm -f iris.trl  trials iris  rm -f iris.res  average irisfiif [[ $sel == "all" || $sel == "wsel3" ]]; then  rm -f wsel.raw  modes wsel 3 16  modes wsel 3 16 v K  modes wsel 3 16 v  modes wsel 3 30 V K  modes wsel 3 30 V  rm -f wsel3.trl  trials wsel wsel3  rm -f wsel3.res  average wsel wsel3  mv wsel.raw wsel3.rawfiif [[ $sel == "all" || $sel == "wsel6" ]]; then  rm -f wsel.raw  modes wsel 6  40  modes wsel 6  80 v K  modes wsel 6  80 v  modes wsel 6 120 V K  modes wsel 6 120 V  rm -f wsel6.trl  trials wsel wsel6  rm -f wsel6.res  average wsel wsel6  mv wsel.raw wsel6.rawfiif [[ $sel == "all" || $sel == "wine3" ]]; then  rm -f wine.raw  modes wine 3  16  modes wine 3  18 v K  modes wine 3  18 v  modes wine 3 300 V K  modes wine 3 300 V  rm -f wine3.trl  trials wine wine3  rm -f wine3.res  average wine wine3  mv wine.raw wine3.rawfiif [[ $sel == "all" || $sel == "wine6" ]]; then  rm -f wine.raw  modes wine 6 200  modes wine 6 200 v K  modes wine 6 200 v  modes wine 6 180 V K  modes wine 6 180 V  rm -f wine6.trl  trials wine wine6  rm -f wine6.res  average wine wine6  mv wine.raw wine6.rawfiif [[ $sel == "all" || $sel == "breast" ]]; then  rm -f breast.raw  modes breast 2   8  modes breast 2  16 v K  modes breast 2  16 v  modes breast 2 100 V K  modes breast 2 100 V  rm -f breast.trl  trials breast  rm -f breast.res  average breastfiif [[ $sel == "all" || $sel == "abalone3" ]]; then  rm -f abalone.raw  modes abalone 3  40  modes abalone 3  40 v K  modes abalone 3  40 v  modes abalone 3 300 V K  modes abalone 3 300 V  rm -f abalone3.trl  trials abalone abalone3  rm -f abalone3.res  average abalone abalone3  mv abalone.raw abalone3.rawfiif [[ $sel == "all" || $sel == "abalone6" ]]; then  rm -f abalone.raw  modes abalone 6 150  modes abalone 6 120 v K  modes abalone 6 120 v  modes abalone 6 500 V K  modes abalone 6 500 V  rm -f abalone6.trl  trials abalone abalone6  rm -f abalone6.res  average abalone abalone6  mv abalone.raw abalone6.rawfi
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -