⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_cluster.py

📁 这是一个关于c-cluster的技术文章,它详细地介绍了cluster的一些技术分类及算法等,是学习聚类的入门好教程.
💻 PY
📖 第 1 页 / 共 2 页
字号:
                    [ 1.7, 1.9 ],                    [ 5.7, 5.9 ],                    [ 5.7, 5.9 ],                    [ 3.1, 3.3 ],                    [ 5.4, 5.3 ],                    [ 5.1, 5.5 ],                    [ 5.0, 5.5 ],                    [ 5.1, 5.2 ]])  mask2 = array([[ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ]])  # Cluster assignments  c1 = [0]  c2 = [1,2]  c3 = [3]  print "First data set:"  print_matrix(data1, mask1)  print "Clusters are", c1, c2, c3  distance = clusterdistance(data1, mask=mask1, weight=weight1, index1=c1, index2=c2, dist='e', method='a', transpose=0);  print "Distance between cluster", c1, "and", c2, "is %7.3f." % distance  distance = clusterdistance(data1, mask=mask1, weight=weight1, index1=c1, index2=c3, dist='e', method='a', transpose=0);  print "Distance between cluster", c1, "and", c3, "is %7.3f." % distance  distance = clusterdistance(data1, mask=mask1, weight=weight1, index1=c2, index2=c3, dist='e', method='a', transpose=0);  print "Distance between cluster", c2, "and", c3, "is %7.3f." % distance  # Cluster assignments  c1 = [ 0, 1, 2, 3 ]  c2 = [ 4, 5, 6, 7 ]  c3 = [ 8 ]  print "Second data set:"  print_matrix(data2, mask2)  print "Clusters are", c1, c2, c3  distance = clusterdistance(data2, mask=mask2, weight=weight2, index1=c1, index2=c2, dist='e', method='a', transpose=0);  print "Distance between cluster", c1, "and", c2, "is %7.3f." % distance  distance = clusterdistance(data2, mask=mask2, weight=weight2, index1=c1, index2=c3, dist='e', method='a', transpose=0);  print "Distance between cluster", c1, "and", c3, "is %7.3f." % distance  distance = clusterdistance(data2, mask=mask2, weight=weight2, index1=c2, index2=c3, dist='e', method='a', transpose=0);  print "Distance between cluster", c2, "and", c3, "is %7.3f." % distance  printdef test_treecluster(module):  if module=='Bio.Cluster':    from Bio.Cluster import treecluster  elif module=='Pycluster':    from Pycluster import treecluster  else:    raise 'Unknown module name', module  print "test_treecluster:"  # First data set  weight1 =  [ 1,1,1,1,1 ]  data1   =  array([[  1.1, 2.2, 3.3, 4.4, 5.5],                     [  3.1, 3.2, 1.3, 2.4, 1.5],                     [  4.1, 2.2, 0.3, 5.4, 0.5],                     [ 12.1, 2.0, 0.0, 5.0, 0.0]])  mask1 = array([[ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1]])  # Second data set  weight2 =  [ 1,1 ]  data2 = array([[ 0.8223, 0.9295 ],                 [ 1.4365, 1.3223 ],                 [ 1.1623, 1.5364 ],                 [ 2.1826, 1.1934 ],                 [ 1.7763, 1.9352 ],                 [ 1.7215, 1.9912 ],                 [ 2.1812, 5.9935 ],                 [ 5.3290, 5.9452 ],                 [ 3.1491, 3.3454 ],                 [ 5.1923, 5.3156 ],                 [ 4.7735, 5.4012 ],                 [ 5.1297, 5.5645 ],                 [ 5.3934, 5.1823 ]])  mask2 = array([[ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ]])  # test first data set  print "First data set:"  print_matrix(data1, mask1)  print "Pairwise average-linkage clustering"  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='a', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise single-linkage clustering"  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='s', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise centroid-linkage clustering"  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='c', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise maximum-linkage clustering"  result, linkdist = treecluster(data=data1, mask=mask1, weight=weight1, transpose=0, method='m', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data1)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data1)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  # Test second data set  print "Second data set:"  print "Pairwise average-linkage clustering"  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='a', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise single-linkage clustering"  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='s', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise centroid-linkage clustering"  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='c', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  print "Pairwise maximum-linkage clustering"  result, linkdist = treecluster(data=data2, mask=mask2, weight=weight2, transpose=0, method='m', dist='e')  print "Number of nodes is %d (should be %d)" % (len(result), len(data2)-1)  print "Number of link distances is %d (should be %d)" % (len(linkdist), len(data2)-1)  for i in range(len(result)):    print "Node %3d joins node %3d with node %3d; link distance is %7.3f" % (i, result[i][0], result[i][1], linkdist[i])  printdef test_somcluster(module):  if module=='Bio.Cluster':    from Bio.Cluster import somcluster  elif module=='Pycluster':    from Pycluster import somcluster  else:    raise 'Unknown module name', module  print "test_somcluster:"  # First data set  weight1 = [ 1,1,1,1,1 ]  data1 = array([[  1.1, 2.2, 3.3, 4.4, 5.5],                  [  3.1, 3.2, 1.3, 2.4, 1.5],                  [  4.1, 2.2, 0.3, 5.4, 0.5],                  [ 12.1, 2.0, 0.0, 5.0, 0.0]])  mask1 = array([[ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1],                  [ 1, 1, 1, 1, 1]])  # Second data set  weight2 =  [ 1,1 ]  data2 = array([[ 1.1, 1.2 ],                 [ 1.4, 1.3 ],                 [ 1.1, 1.5 ],                 [ 2.0, 1.5 ],                 [ 1.7, 1.9 ],                 [ 1.7, 1.9 ],                 [ 5.7, 5.9 ],                 [ 5.7, 5.9 ],                 [ 3.1, 3.3 ],                 [ 5.4, 5.3 ],                 [ 5.1, 5.5 ],                 [ 5.0, 5.5 ],                 [ 5.1, 5.2 ]])  mask2 = array([[ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ],                 [ 1, 1 ]])  print "First data set:"  clusterid, celldata = somcluster(data=data1, mask=mask1, weight=weight1, transpose=0, nxgrid=10, nygrid=10, inittau=0.02, niter=100, dist='e')  print "Number of cluster ids is %d (should be %d)" % (len(clusterid), len(data1))  print "Grid is %d-dimensional (should be 2-dimensional)" % len(clusterid[0])  print "Second data set:"  clusterid, celldata = somcluster(data=data2, mask=mask2, weight=weight2, transpose=0, nxgrid=10, nygrid=10, inittau=0.02, niter=100, dist='e')  print "Number of cluster ids is %d (should be %d)" % (len(clusterid), len(data2))  print "Grid is %d-dimensional (should be 2-dimensional)" % len(clusterid[0])  printdef run_tests(module="Pycluster"):  if module==[]: module = "Bio.Cluster"  test_mean_median(module)  test_matrix_parse(module)  test_kcluster(module)  test_clusterdistance(module)  test_treecluster(module)  test_somcluster(module)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -