⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert.cpp

📁 clique code with sample data set. clique is a data clustering algorithm which follows hierarchical c
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <cstdio>
#include <cstring>
#include <cstdlib>

#include <iostream>
#include <fstream>

#include <map>
#include <vector>
#include <string>

using namespace std;

// Conversion modes
#define OUTCACTUS 1
#define CSVTOCACTUS 2

#define OUTCLICK 3
#define CSVTOCLICK 4
#define CLICKTOROCK 5
#define CONFUSION 6

#define MAXCLUSTERS 1000

int main(int argc, char *argv[]) 
{ 

  int mode;

  int   numItems, id, i, j, k;
  int*  items;

  char buffer[1024];

  ifstream ifile;
  ifstream ifile2;
  ifstream ifile3;
  ofstream ofile;
  ofstream ofile2;

  int currentColumn, outColumn, totalColumns, recordID, outColumns, outValue;
  int labelColumn, numberOfTuples;

  int common, total;

  vector<int> ignoredColumns;
  vector<int>::const_iterator columnIt;
  map<string,int>* attrMap;
  map<string,int>::iterator attrMapIt;

  map<string,int> labelMap;
  int highestLabel;
  vector<int> highestOutValue;
  vector<int> columnOffset;

  map<int, int> confusionMap;
  int mapFrom, mapTo;
  int confusionMatrix[MAXCLUSTERS][MAXCLUSTERS];

  int cluster1, cluster2;  
  string entry;

  int maxDim1, maxDim2;
  int position;
  string tokenizer;
  int clusterNumber;

  int ignore_st= 0;
  bool use_ascii = false;
  
  vector<int> clusters1;
  vector<int> clusters2;
    
  int* tuples;

  if(argc < 2){
    printf("convert: You must give a conversion mode.\n");
    return(1);
  }


  if(!strcmp(argv[1], "OUTCACTUS"))
    mode = OUTCACTUS;
  else if(!strcmp(argv[1], "CSVTOCACTUS"))
    mode = CSVTOCACTUS;
  else if(!strcmp(argv[1], "CSVTOCLICK"))
    mode = CSVTOCLICK;
  else if(!strcmp(argv[1], "CLICKTOROCK"))
    mode = CLICKTOROCK;
  else if(!strcmp(argv[1], "CONFUSION"))
    mode = CONFUSION;
  else if(!strcmp(argv[1], "OUTCLICK"))
    mode = OUTCLICK;
  else{
    printf("convert: I don't know that mode you specified.\n");
    return(1);
  }
  
  ifile.open(argv[2]);
  
  if(!ifile){
    printf("convert: Cannot read input file.\n");
    return(1);
  }
  
  switch(mode){
  case OUTCACTUS:
    items = NULL;

    while(ifile.read((char*)&id, sizeof(int))){
      printf("%d ", id);
      ifile.read((char*)&numItems, sizeof(int));
      if(!items){
	items = new int[numItems];
      }
      
      ifile.read((char*)items, numItems*sizeof(int));
      for(i = 0; i < numItems; ++i){
	printf("%d ", items[i]);
      }
      printf("\n");
 
      // EOL
      ifile.read((char*)&id, sizeof(int));
    }
    
    if(items)
      delete items;

    break;
  case OUTCLICK:
    items = NULL;

    ifile.read((char*)&numberOfTuples, sizeof(int));
    printf("Total %d tuples in the file, ", numberOfTuples);
    
    ifile.read((char*)&numItems, sizeof(int));
    printf("with %d attributes.\n", numItems);
    
    items = new int[numItems];
    ifile.read((char*)items, numItems*sizeof(int));
    printf("Distinct attribute values are\n");
    for(i = 0; i < numItems; i++){
      printf("A%d (%d), ", i+1, items[i]);
    }
    printf("\n");
    delete items;
    
    items = new int[numItems + 3];

    while(ifile.read((char*)items, (numItems+3)*sizeof(int))){
      for(i = 0; i < numItems + 2; ++i){
	printf("%d ", items[i]);
      }
      printf("\n");
    }
    
    break;
  case CSVTOCACTUS:
    if(argc < 4){
      printf("convert: You need to specify the number of columns in the input file\n");
      printf("convert: CSVTOCACTUS <file> <totalColumns> [ascii] {ignoredColumn}*\n");
      exit(1);
    }
    
    use_ascii = false;
    totalColumns = atoi(argv[3]);
    ignore_st = 4;
    if (strcmp(argv[4], "ascii") == 0){
       ignore_st = 5;
       use_ascii = true;
    }
    
    for(i = ignore_st; i < argc; i++){
      ignoredColumns.push_back(atoi(argv[i]));
      // printf("Ignoring %d\n", atoi(argv[i]));
    }

    recordID = 1;
    outColumns = totalColumns - ignoredColumns.size();
    attrMap = new map<string,int>[outColumns];

    // printf("OutColumns = %d\n", outColumns);

    highestOutValue.assign(outColumns, 0);

    while(!ifile.eof()){
      currentColumn = 1;
      outColumn = 0;
      buffer[0] = '\0';

      if (use_ascii) cout << recordID << " ";
      else cout.write((char*)&recordID, sizeof(int));
      recordID++;
      if (use_ascii) cout << outColumns << " ";
      else cout.write((char*)&outColumns, sizeof(int));
      while(outColumn < outColumns){
	ifile >> buffer;
	//printf("'%s' is current column %d, out column %d\n", buffer, currentColumn, outColumn);
	if(buffer[0] == '\n' || buffer[0] == ','){
	}
	else{
	  // Check if this column is blocked
	  for(columnIt = ignoredColumns.begin(); columnIt != ignoredColumns.end(); columnIt++){
	    if(currentColumn == *columnIt)
	      break;
	  }

	  if(columnIt == ignoredColumns.end()){
	    entry = buffer;
	    outValue = attrMap[outColumn][entry];
   
	    if(outValue == 0){
	      outValue = ++highestOutValue[outColumn];
	      attrMap[outColumn][entry] = outValue;
	    }
	    
	    // outValue += outColumn * 100;
            if (use_ascii) cout << outValue << " ";
            else cout.write((char*)&outValue, sizeof(int));
	    
	    outColumn++;
	  }
	  currentColumn++;
	}
      }

      outValue = -1;
      if (use_ascii) cout << outValue << endl;
      else cout.write((char*)&outValue, sizeof(int));
    }
    delete attrMap;
    break;

  case CSVTOCLICK:
    if(argc < 7){
      printf("convert: You need to specify the number of columns in the input file and the label column\n");
      printf("convert: CSVTOCACTUS <sourcefile> <confusionfile> <mappingfile> <totalColumns> <label column> [ascii] {ignoredColumn}*\n");
      exit(1);
    }

    ofile.open(argv[3]);
    ofile2.open(argv[4]);

    if(!ofile.is_open() || !ofile2.is_open()){
      cout << "convert: The confusion file or mapping file could nout be opened." << endl;
      exit(1);
    }

    totalColumns = atoi(argv[5]);
    labelColumn = atoi(argv[6]);

    use_ascii = false;
    ignore_st = 7;
    if (strcmp(argv[7], "ascii") == 0){
       ignore_st = 8;
       use_ascii = true;
    }

    for(i = ignore_st; i < argc; i++){
      ignoredColumns.push_back(atoi(argv[i]));
    }

    outColumns = totalColumns - ignoredColumns.size();
    attrMap = new map<string,int>[outColumns];
    highestOutValue.assign(outColumns, 0);
    columnOffset.assign(outColumns, 0);
    highestLabel = 0;
    numberOfTuples = 0;

    // First pass: Compute attribute -> value mapping and count the number of
    // tuples
    while(!ifile.eof()){
      currentColumn = 0;
      outColumn = 0;
      buffer[0] = '\0';

      while(outColumn < outColumns){
	ifile >> buffer;
	entry = buffer;

	if(currentColumn == labelColumn){
	  if(labelMap[entry] == 0)
	    labelMap[entry] = ++highestLabel;
	}

	if(buffer[0] == '\n' || buffer[0] == ','){
	}
	else{
	  // Check if this column is blocked
	  for(columnIt = ignoredColumns.begin(); columnIt != ignoredColumns.end(); columnIt++){
	    if(currentColumn == *columnIt)
	      break;
	  }

	  if(columnIt == ignoredColumns.end()){
	    outValue = attrMap[outColumn][entry];
	    if(outValue == 0){
	      outValue = ++highestOutValue[outColumn];
	      attrMap[outColumn][entry] = outValue;
	    }
	    outColumn++;
	  }
	  currentColumn++;
	}
      }
      while(currentColumn < totalColumns){
	ifile >> buffer;
	currentColumn++;
      }
      outValue = -1;
      numberOfTuples++;
    }

    // cout << "Number of tuples " << numberOfTuples << endl;

    ifile2.open(argv[2]);

    if (use_ascii) cout << numberOfTuples << " ";
    else cout.write((char*)&numberOfTuples, sizeof(int));
    if (use_ascii) cout << outColumns << " ";
    else cout.write((char*)&outColumns, sizeof(int));
    for(i = 0; i < outColumns; ++i){
      outValue = highestOutValue[i];
      if (use_ascii) cout << outValue << " ";
      else cout.write((char*)&(outValue), sizeof(int));
    }
    if (use_ascii) cout << endl;

    for(i = 1; i < outColumns; i++){
      columnOffset[i] = columnOffset[i-1] + highestOutValue[i-1];
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -