⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert.cpp

📁 clique code with sample data set. clique is a data clustering algorithm which follows hierarchical c
💻 CPP
📖 第 1 页 / 共 2 页
字号:
    i = 0;
    // Second pass: Actual conversion using the maps created before
    while(!ifile2.eof()){
      outColumn = 0;
      currentColumn = 0;
      buffer[0] = '\0';

      if (use_ascii) cout << i << " ";
      else cout.write((char*)&i, sizeof(int));
      if (use_ascii) cout << numberOfTuples << " ";
      else cout.write((char*)&numberOfTuples, sizeof(int));

      // cout << "Writing " << i << endl;

      while(outColumn < outColumns){
	ifile2 >> buffer;
	entry = buffer;

	if(currentColumn == labelColumn){
	  // cout << "Label is " << labelMap[entry] - 1 << endl;
	  ofile << labelMap[entry] - 1 << endl;
	}

	if(buffer[0] == '\n' || buffer[0] == ','){
	}
	else{
	  // Check if this column is blocked
	  for(columnIt = ignoredColumns.begin(); columnIt != ignoredColumns.end(); columnIt++){
	    if(currentColumn == *columnIt)
	      break;
	  }

	  
	  if(columnIt == ignoredColumns.end()){
	    outValue = columnOffset[outColumn] + attrMap[outColumn][entry] - 1;
	    // cout << "OV in " << outColumn << " = " << outValue << endl;
	    if (use_ascii) cout << outValue << " ";
            else cout.write((char*)&outValue, sizeof(int));
	    // cout << "DONE" << endl;
	    outColumn++;
	  }
	  currentColumn++;
	}
      }
      while(currentColumn < totalColumns){
	ifile2 >> buffer;
	currentColumn++;
      }
      if (use_ascii) cout << i << endl;
      else cout.write((char*)&i, sizeof(int));

      outValue = -1;
      i++;
    }

    // Finally, create the mapping file so we know what the original attribute
    // values were
    //for(attrMapIt = labelMap.begin(); attrMapIt != labelMap.end(); attrMapIt++){
    //  ofile2 << "Label " << ": '" << attrMapIt->first << "' = " << attrMapIt->second << endl;
    //}

    for(i = 0; i < outColumns; i++){
      ofile2 << "*A* " << i << endl;

      for(attrMapIt = attrMap[i].begin(); attrMapIt != attrMap[i].end(); attrMapIt++){
	if(attrMapIt->first != ""){
	  ofile2 << attrMapIt->first << " " << attrMapIt->second << endl;
	}
	else{
	  ofile2 << "- " << attrMapIt->second << endl;
	}
      }
    }

    ifile2.close();
    ofile.close();
    delete[] attrMap;

    break;
  case CLICKTOROCK:
    if(argc < 4){
      printf("convert: You need to specify the number of values you want in the output file\n");
      printf("convert: CLICKTOROCK <sourcefile> <itemlimit>\n");
      exit(1);
    }

    items = NULL;

    ifile.read((char*)&numberOfTuples, sizeof(int));
    
    ifile.read((char*)&numItems, sizeof(int));
    
    items = new int[numItems];
    ifile.read((char*)items, numItems*sizeof(int));
    delete items;    
    items = new int[numItems + 3];

    int effectiveTuples;
    effectiveTuples = atoi(argv[3]) < numberOfTuples ? atoi(argv[3]) : numberOfTuples;

    tuples = new int[effectiveTuples * (numItems + 3)];
    ifile.read((char*)tuples, effectiveTuples * (numItems+3)*sizeof(int));
      
    cout << "1 " << effectiveTuples << " 1" << endl;
    for(i = 0; i < effectiveTuples - 1; i++){
      for(j = i+1; j < effectiveTuples; j++){
	common = total = 0;
	for(k = 2; k < numItems + 2; k++){
	  if(tuples[i * (numItems+3) + k] == tuples[j * (numItems + 3) + k]){
	    common++;
	    total++;
	  }
	  else{
	    total += 2;
	  }
	}
	
	if(total != 0){
	  cout << (double)common / double(total) << " " << i+1 << " " << j+1 << endl;
	}
	else{
	  cout << 0.0 << " " << i+1 << " " << j+1 << endl;
	}
      }
    }
    
   
    break;

  case CONFUSION:
  
    if(argc < 5){
      printf("convert: You need to give two confusion files and a mapping file.\n");
      printf("convert: CONFUSION <confusion1> <confusion2> <mapping file for 1>\n");
      exit(1);
    }

    ifile2.open(argv[3]);
    if(!ifile2.is_open()){
      printf("Could not open confusion file '%s'\n", argv[3]);
      ifile.close();
      exit(1);
    }

    ifile3.open(argv[4]);
    if(!ifile3.is_open()){
      printf("Could not open mapping file '%s'\n", argv[4]);
      ifile.close();
      ifile2.close();
      exit(1);
    }

    // First read the cluster number mapping
    while(!ifile3.eof()){
      ifile3 >> buffer;
      mapFrom = atoi(buffer);
      ifile3 >> buffer;
      mapTo = atoi(buffer);
      
      if(mapFrom == mapTo)
	continue;
  
      //cout << "convert: Mapping " << mapFrom << " to " << mapTo << endl;

      // Confusion files start counting at -1 (for "no cluster") and we need the
      // 0 as reserved value for "no mapping specified"
      confusionMap[mapFrom + 2] = mapTo + 2;

    }
    ifile3.close();


    numberOfTuples = 0;
    maxDim1 = maxDim2 = 0;

    while(!ifile.eof()){
      if(ifile2.eof()){
	printf("Warning: Confusion files have different length\n");
	break;
      }
      
      tokenizer.clear();
      clusters1.clear();
      ifile >> tokenizer;

      if(tokenizer.empty())
	break;

      while(!tokenizer.empty()){
        if((position = tokenizer.find_first_of(",")) >= 0){
	  clusterNumber = atoi(tokenizer.substr(0, position).c_str()) + 2;
	  tokenizer.erase(0, position + 1);
	}
	else{
	  clusterNumber = atoi(tokenizer.c_str()) + 2;
	  tokenizer.clear();
	}
	if(confusionMap[clusterNumber] != 0)
	  clusterNumber = confusionMap[clusterNumber];
	
	clusters1.push_back(clusterNumber);
	
	if(clusterNumber > maxDim1)
	  maxDim1 = clusterNumber;
      }

      tokenizer.clear();
      clusters2.clear();
      ifile2 >> tokenizer;

      while(!tokenizer.empty()){
        if((position = tokenizer.find_first_of(",")) >= 0){
	  clusterNumber = atoi(tokenizer.substr(0, position).c_str()) + 2;
	  tokenizer.erase(0, position + 1);
	}
	else{
	  clusterNumber = atoi(tokenizer.c_str()) + 2;
	  tokenizer.clear();
	}
	if(confusionMap[clusterNumber] != 0)
	  clusterNumber = confusionMap[clusterNumber];
	
	clusters2.push_back(clusterNumber);
	
	if(clusterNumber > maxDim2)
	  maxDim2 = clusterNumber;
      }
 
      numberOfTuples++;

      // See if there's a case where the actual class and the predicted class
      // match. If not so, take the first (actual, predicted) pair and add it to the
      // confusion matrix

      cluster1 = -1;
      cluster2 = -1;
      for(i = 0; i < clusters1.size(); i++){
	for(j = 0; j < clusters2.size(); j++){
	  if(clusters1[i] == clusters2[j]){
	    cluster1 = clusters1[i];
	    cluster2 = clusters2[j];
	    break;
	  }
	}
      }

      if(cluster1 == -1 && cluster2 == -1){
	cluster1 = clusters1[0];
	cluster2 = clusters2[0];
      }

      if(cluster1 >= MAXCLUSTERS || cluster2 >= MAXCLUSTERS){
	cout << "Warning: The maximal size of the confusion matrix is " << MAXCLUSTERS << "^2." << endl;
	continue;
      }

      confusionMatrix[cluster1][cluster2]++;
    }

    ifile2.close();

    if(numberOfTuples == 0){
      cout << "No tuples in the confusion files.\n" << endl;
      exit(1);
    }

    cout << "\\begin{table}" << endl;
    cout << "  \\begin{center}" << endl;
    cout << "    \\begin{tabular}";

    for(j = 1; j <= maxDim2+1; j++){
      if(j == 1){
	cout << "{c";
      }
      else if(j == maxDim2+1){
	cout << "|c}";
      }
      else{
	cout << "|c";
      }
    }
    cout << endl;
   
    for(j = 1; j<= maxDim2; j++){
      if(j == 1){
	cout << " & None ";
      }
      else{
	cout << " & $C_" << j-1 << "$";
      }
    }
    cout << " \\\\" << endl << "\\hline" << endl;


    for(i = 1; i <= maxDim1; i++){
      if(i == 1){
	cout << " None ";
      }
      else{
	cout << "$C_" << i-1 << "$ ";
      }
      for(j = 1; j <= maxDim2; j++){
	printf("& %.1f\\%% ", 100 * (double)confusionMatrix[i][j] / (double)numberOfTuples);
      }
      if(i < maxDim1){
	cout << "\\\\" << endl << "\\hline" << endl;
      }
      else{
	cout << endl;
      }
    }

    cout << "    \\end{tabular}" << endl;
    cout << "  \\end{center}" << endl;
    cout << "  \\caption{Confusion Matrix}" << endl;
    cout << "\\end{table}" << endl;
    
    

    break;
 
  default:
    break;
  }

  ifile.close();

  return 0;

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -