📄 fastcommunity_mh.cc
字号:
cout << "\nThis program runs the fast community structure inference algorithm due to "; cout << "Clauset, Newman and Moore on an input graph in the .pairs format. This version "; cout << "is the full max-heap version originally described in cond-mat/0408187. The program "; cout << "requires the input network connectivity to be formatted in the following specific "; cout << "way: the graph must be simple and connected, where each edge is written on "; cout << "a line in the format 'u v' (e.g., 3481 3483).\n"; cout << "To run the program, you must specify the input network file (-f file.pairs). "; cout << "Additionally, you can differentiate runs on the same input file with a label "; cout << "(-l test_run) which is imbedded in all corresponding output files. "; cout << "Because the algorithm is deterministic, you can specify a point (-c C) at which to "; cout << "cut the dendrogram; the program will write out various information about the clustered "; cout << "network: a list of clusters, the clustered connectivity, and the cluster size "; cout << "distribution. Typically, one wants this value to be the time at which modularity Q "; cout << "was maximized (that time is recorded in the .info file for a given run).\n"; cout << "Examples:\n"; cout << " ./FastCommunity -f network.pairs -l test_run\n"; cout << " ./FastCommunity -f network.pairs -l test_run -c 1232997\n"; cout << "\n"; return false; } while (argct < argc) { temp = argv[argct]; if (temp == "-files") { cout << "\nBasic files generated:\n"; cout << "-- .INFO\n"; cout << " Various information about the program's running. Includes a listing of "; cout << "the files it generates, number of vertices and edges processed, the maximum "; cout << "modularity found and the corresponding step (you can re-run the program with "; cout << "this value in the -c argument to have it output the contents of the clusters, "; cout << "etc. when it reaches that step again (not the most efficient solution, but it "; cout << "works)), start/stop time, and when -c is used, it records some information about "; cout << "the distribution of cluster sizes.\n"; cout << "-- .JOINS\n"; cout << " The dendrogram and modularity information from the algorithm. The file format "; cout << "is tab-delimited columns of data, where the columns are:\n"; cout << " 1. the community index which absorbs\n"; cout << " 2. the community index which was absorbed\n"; cout << " 3. the modularity value Q after the join\n"; cout << " 4. the time step of the join\n"; cout << "\nOptional files generated (at time t=C when -c C argument used):\n"; cout << "-- .WPAIRS\n"; cout << " The connectivity of the clustered graph in a .wpairs file format "; cout << "(i.e., weighted edges). The edge weights should be the dQ values associated "; cout << "with that clustered edge at time C. From this format, it's easy to "; cout << "convert into another for visualization (e.g., pajek's .net format).\n"; cout << "-- .HIST\n"; cout << " The size distribution of the clusters.\n"; cout << "-- .GROUPS\n"; cout << " A list of each group and the names of the vertices which compose it (this is "; cout << "particularly useful for verifying that the clustering makes sense - tedious but "; cout << "important).\n"; cout << "\n"; return false; } else if (temp == "-f") { // input file name argct++; temp = argv[argct]; ext = ".pairs"; pos = temp.find(ext,0); if (pos == string::npos) { cout << " Error: Input file must have terminating .pairs extension.\n"; return false; } ext = "/"; count = 0; pos = string::npos; for (int i=0; i < temp.size(); i++) { if (temp[i] == '/') { pos = i; } } if (pos == string::npos) { ioparm.d_in = ""; ioparm.filename = temp; } else { ioparm.d_in = temp.substr(0, pos+1); ioparm.filename = temp.substr(pos+1,temp.size()-pos-1); } ioparm.d_out = ioparm.d_in; // now grab the filename sans extension for building outputs files for (int i=0; i < ioparm.filename.size(); i++) { if (ioparm.filename[i] == '.') { pos = i; } } ioparm.s_scratch = ioparm.filename.substr(0,pos); } else if (temp == "-l") { // s_label argct++; if (argct < argc) { ioparm.s_label = argv[argct]; } else { " Warning: missing modifier for -l argument; using default.\n"; } } else if (temp == "-t") { // timer value argct++; if (argct < argc) { along = strtol(argv[argct],endptr,10); ioparm.timer = atoi(argv[argct]); cout << ioparm.timer << endl; if (ioparm.timer == 0 || strlen(argv[argct]) > temp.length()) { cout << " Warning: malformed modifier for -t; using default.\n"; argct--; ioparm.timer = 20; } } else { cout << " Warning: missing modifier for -t argument; using default.\n"; argct--; } } else if (temp == "-c") { // cut value argct++; if (argct < argc) {// along = strtol(argv[argct],endptr,10); ioparm.cutstep = atoi(argv[argct]); if (ioparm.cutstep == 0) { cout << " Warning: malformed modifier for -c; disabling output.\n"; argct--; } } else { cout << " Warning: missing modifier for -t argument; using default.\n"; argct--; } } else if (temp == "-s") { ioparm.suppFlag = true; } else if (temp == "-v") { ioparm.textFlag = 1; } else if (temp == "--v") { ioparm.textFlag = 2; } else if (temp == "---v") { ioparm.textFlag = 3; } else { cout << "Unknown commandline argument: " << argv[argct] << endl; } argct++; } return true;}// ------------------------------------------------------------------------------------void readInputFile() { // temporary variables for this function int numnodes = 0; int numlinks = 0; int s,f,t; edge **last; edge *newedge; edge *current; // pointer for checking edge existence bool existsFlag; // flag for edge existence time_t t1; t1 = time(&t1); time_t t2; t2 = time(&t2); // First scan through the input file to discover the largest node id. We need to // do this so that we can allocate a properly sized array for the sparse matrix // representation. cout << " scanning input file for basic information." << endl; cout << " edgecount: [0]"<<endl; ifstream fscan(ioparm.f_input.c_str(), ios::in); while (fscan >> s >> f) { // read friendship pair (s,f) numlinks++; // count number of edges if (f < s) { t = s; s = f; f = t; } // guarantee s < f if (f > numnodes) { numnodes = f; } // track largest node index if (t2-t1>ioparm.timer) { // check timer; if necessarsy, display cout << " edgecount: ["<<numlinks<<"]"<<endl; t1 = t2; // ioparm.timerFlag = true; // } // t2=time(&t2); // } fscan.close(); cout << " edgecount: ["<<numlinks<<"] total (first pass)"<<endl; gparm.maxid = numnodes+2; // store maximum index elist = new edge [2*numlinks]; // create requisite number of edges int ecounter = 0; // index of next edge of elist to be used // Now that we know numnodes, we can allocate the space for the sparse matrix, and // then reparse the file, adding edges as necessary. cout << " allocating space for network." << endl; e = new edge [gparm.maxid]; // (unordered) sparse adjacency matrix last = new edge* [gparm.maxid]; // list of pointers to the last edge in each row numnodes = 0; // numnodes now counts number of actual used node ids numlinks = 0; // numlinks now counts number of bi-directional edges created ioparm.timerFlag = false; // reset timer cout << " reparsing the input file to build network data structure." << endl; cout << " edgecount: [0]"<<endl; ifstream fin(ioparm.f_input.c_str(), ios::in); while (fin >> s >> f) { s++; f++; // increment s,f to prevent using e[0] if (f < s) { t = s; s = f; f = t; } // guarantee s < f numlinks++; // increment link count (preemptive) if (e[s].so == 0) { // if first edge with s, add s and (s,f) e[s].so = s; // e[s].si = f; // last[s] = &e[s]; // point last[s] at self numnodes++; // increment node count } else { // try to add (s,f) to s-edgelist current = &e[s]; // existsFlag = false; // while (current != NULL) { // check if (s,f) already in edgelist if (current->si==f) { // existsFlag = true; // link already exists numlinks--; // adjust link-count downward break; // } // current = current->next; // look at next edge } // if (!existsFlag) { // if not already exists, append it newedge = &elist[ecounter++]; // grab next-free-edge newedge -> so = s; // newedge -> si = f; // last[s] -> next = newedge; // append newedge to [s]'s list last[s] = newedge; // point last[s] to newedge } // } // if (e[f].so == 0) { // if first edge with f, add f and (f,s) e[f].so = f; // e[f].si = s; // last[f] = &e[f]; // point last[s] at self numnodes++; // increment node count } else { // try to add (f,s) to f-edgelist if (!existsFlag) { // if (s,f) wasn't in s-edgelist, then newedge = &elist[ecounter++]; // (f,s) not in f-edgelist newedge -> so = f; // newedge -> si = s; // last[f] -> next = newedge; // append newedge to [f]'s list last[f] = newedge; // point last[f] to newedge } // } existsFlag = false; // reset existsFlag if (t2-t1>ioparm.timer) { // check timer; if necessarsy, display cout << " edgecount: ["<<numlinks<<"]"<<endl; t1 = t2; // ioparm.timerFlag = true; // } // t2=time(&t2); // } cout << " edgecount: ["<<numlinks<<"] total (second pass)"<<endl; fin.close(); // Now we record our work in the parameters file, and exit. ofstream fout(ioparm.f_parm.c_str(), ios::app); fout << "---NET_STATS----\n"; fout << "MAXID-----:\t" << gparm.maxid-2 << "\n"; fout << "NUMNODES--:\t" << numnodes << "\n"; fout << "NUMEDGES--:\t" << numlinks << "\n"; fout.close(); gparm.m = numlinks; // store actual number of edges created gparm.n = numnodes; // store actual number of nodes used return;}// ------------------------------------------------------------------------------------// records the agglomerated list of indices for each valid community void recordGroupLists() { list *current; ofstream fgroup(ioparm.f_group.c_str(), ios::trunc); for (int i=0; i<gparm.maxid; i++) { if (c[i].valid) { fgroup << "GROUP[ "<<i-1<<" ][ "<<c[i].size<<" ]\n"; // external format current = c[i].members; while (current != NULL) { fgroup << current->index-1 << "\n"; // external format current = current->next; } } } fgroup.close(); return;}// ------------------------------------------------------------------------------------// records the network as currently agglomeratedvoid recordNetwork() { dpair *list, *current, *temp; ofstream fnet(ioparm.f_net.c_str(), ios::trunc); for (int i=0; i<gparm.maxid; i++) { if (dq[i].heap_ptr != NULL) { list = dq[i].v->returnTreeAsList(); // get a list of items in dq[i].v current = list; // store ptr to head of list while (current != NULL) { // source target weight (external representation) fnet << i-1 << "\t" << current->x-1 << "\t" << current->y << "\n"; temp = current; // clean up memory and move to next current = current->next; delete temp; } } } fnet.close(); return;}// ------------------------------------------------------------------------------------// ------------------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -