📄 clustermain.cpp
字号:
// Array Weight is weight for each column in clustering
// Default is 1.0
ArrayWeight = new double[2*Columns-1]; // allow for possibility of
ArrayOrder = new double[2*Columns-1]; // clustering arrays
for (i=0;i<Columns;i++)
{
ArrayWeight[i] = 1;
ArrayOrder[i] = 1;
}
// Now Figure out if there is an Array Weight ROW
Rows = List->Count - 1;
int LineOffset = 1;
Line = List->Strings[1];
LineList->Clear();
while ((Field = NextString(&Line)) != "DONE")
{
LineList->Insert(0,Field);
}
if ( (LineList->Strings[0] == "EWEIGHT") ||
(LineList->Strings[0] == "WEIGHT") )
{
Rows--;
LineOffset++;
index = 0;
for (j=0;j<min(LineList->Count,Headers->Count);j++)
{
if (IsData[j] == true)
{
try
{
double Val = LineList->Strings[j].ToDouble();
ArrayWeight[index] = Val;
}
catch (EConvertError &E)
{
ArrayWeight[index] = 0; // Default to zero if row is
// present but cell empty
}
index ++;
}
}
}
Line = List->Strings[2];
LineList->Clear();
while ((Field = NextString(&Line)) != "DONE")
{
LineList->Insert(0,Field);
}
if ( (LineList->Strings[0] == "EORDER") ||
(LineList->Strings[0] == "ORDER") )
{
Rows--;
LineOffset ++;
index = 0;
for (j=0;j<min(LineList->Count,Headers->Count);j++)
{
if (IsData[j] == true)
{
try
{
double Val = LineList->Strings[j].ToDouble();
ArrayOrder[index] = Val;
}
catch (EConvertError &E)
{
ArrayOrder[index] = 0; // Default to zero if row is
// present but cell empty
}
index ++;
}
}
}
GeneData = new double*[2*Rows-1];
GeneMask = new bool*[2*Rows-1];
UniqID = new AnsiString[2*Rows-1];
GeneName = new AnsiString[2*Rows-1];
GeneWeight = new double[2*Rows-1];
GeneOrder = new double[2*Rows-1];
for (i=0;i<2*Rows-1;i++)
{
GeneData[i] = new double[Columns];
GeneMask[i] = new bool[Columns];
for (j=0;j<Columns;j++)
{
GeneMask[i][j] = false;
}
}
for (i=LineOffset;i<List->Count;i++)
{
LineList->Clear();
l = i - LineOffset;
//Update user on status
StatusBar1->SimpleText = "Loading Gene " + AnsiString(l + 1) + " of "
+ AnsiString(List->Count - LineOffset);
Application->ProcessMessages();
//Get current line
Line = List->Strings[i];
//Parse Line into fields
while ((Field = NextString(&Line)) != "DONE")
{
LineList->Insert(0,Field);
}
//UniqID always columns 0
UniqID[l] = LineList->Strings[0];
if (NameIndex > -1)
{
GeneName[l] = LineList->Strings[NameIndex];
}
else
{
GeneName[l] = UniqID[l];
}
if (GeneWeightIndex > -1)
{
try
{
GeneWeight[l] = LineList->Strings[GeneWeightIndex].ToDouble();
}
catch (EConvertError &E)
{
}
}
else
{
GeneWeight[l] = 1.0;
}
if (GeneOrderIndex > -1)
{
try
{
GeneOrder[l] = LineList->Strings[GeneOrderIndex].ToDouble();
}
catch (EConvertError &E)
{
}
}
else
{
GeneOrder[l] = 1.0;
}
index = 0;
for (j=0;j<min(LineList->Count,Headers->Count);j++)
{
if (IsData[j] == true)
{
if (LineList->Strings[j].Length() > 0)
{
try
{
double Val = LineList->Strings[j].ToDouble();
GeneData[l][index] = Val;
GeneMask[l][index] = true;
}
catch (EConvertError &E)
{
}
}
index ++;
}
}
}
delete LineList;
StatusBar1->SimpleText = "Done Loading Data";
}
catch (Exception &E)
{
Application->MessageBox("Could not Open File\nIf the file is open in another program (e.g. Excel)\nClose it and try again","Could Not Open File",
MB_OK);
}
delete List;
}
/* Tab-delimited parsing utility for LoadFile */
AnsiString TMainForm::NextString(AnsiString* String)
{
AnsiString Field;
int delim = (*String).LastDelimiter("\t");
if (delim != 0)
{
Field = (*String).SubString(delim+1,(*String).Length()-delim);
(*String) = (*String).SubString(0,delim-1);
}
else
{
Field = (*String);
(*String) = (*String).SubString(0,0);
}
if ((Field.Length() == 0) && (delim == 0) )
{
return AnsiString("DONE");
}
else
{
return Field;
}
}
void __fastcall TMainForm::SaveData(AnsiString FileName)
{
TStringList *DataFile = new TStringList();
AnsiString OutString = "";
OutString += Headers->Strings[0] + AnsiString("\t");
OutString += AnsiString("NAME") + AnsiString("\t");
OutString += "GWEIGHT";
TFloatFormat Format = ffGeneral;
// Now add headers for data columns
for (int i=0;i<Columns;i++)
{
OutString += "\t";
OutString += AnsiString(Headers->Strings[InColumn[i]]);
}
DataFile->Add(OutString);
{
OutString = AnsiString("EWEIGHT");
OutString += AnsiString("\t");
OutString += AnsiString("\t");
for (int i=0;i<Columns;i++)
{
OutString += "\t" + AnsiString(ArrayWeight[i]);
}
}
DataFile->Add(OutString);
int index, colindex;
for (int i=0;i<Rows;i++)
{
index = i;
OutString = "";
OutString += AnsiString(UniqID[index]) + "\t" + AnsiString(GeneName[index]);
OutString += "\t" + AnsiString(GeneWeight[index]);
for (int j=0;j<Columns;j++)
{
colindex = j;
if (GeneMask[index][colindex] == true)
{
OutString += "\t" + AnsiString(FloatToStrF(GeneData[index][colindex],Format,4,2));
}
else
{
OutString += "\t";
}
}
DataFile->Add(OutString);
}
DataFile->SaveToFile(FileName);
delete DataFile;
}
void __fastcall TMainForm::SaveButtonClick(TObject *Sender)
{
SaveDataDialog->FileName = JobNameEdit->Text + ".txt";
if (SaveDataDialog->Execute())
{
SaveData(SaveDataDialog->FileName);
}
}
//---------------------------------------------------------------------------
/* File Format Help Button Clicked */
void __fastcall TMainForm::FileHelpClick(TObject *Sender)
{
FileFormatForm->Show();
}
/* Open Cluster Manual in Browser Window */
void __fastcall TMainForm::ManualButtonClick(TObject *Sender)
{
ShellExecute(Handle, "open", "http://rana.stanford.edu/software/manuals/ClusterTreeView.pdf", 0, 0, SW_SHOWNORMAL);
}
//---------------------------------------------------------------------------
/* Filter data. Apply user selected criteria to flag (for subsequent removal)
rows that fail to pass tests. Note that filters are assessed here and applied
separately so the user can adjust parameters to get appropriate number
of rows passing */
void __fastcall TMainForm::FilterClick(TObject *Sender)
{
FilterResultsLabel->Visible = false;
AcceptFilterButton->Visible = false;
/* Store results in boolean Use */
delete Use;
Use = new bool[Rows];
UseRows = 0;
for (int Row=0;Row<Rows;Row++)
{
StatusBar1->SimpleText = "Assessing Filters for Gene " + AnsiString(Row);
int Count = 0;
int CountAbs = 0;
double Sum = 0;
double Sum2 = 0;
double Min = 10000000;
double Max = -10000000;
/* Compute some row stats */
for (int Column=0;Column<Columns;Column++)
{
if (GeneMask[Row][Column])
{
Sum += GeneData[Row][Column];
Sum2 += pow(GeneData[Row][Column],2);
Count ++;
Min = min(GeneData[Row][Column],Min);
Max = max(GeneData[Row][Column],Max);
if (fabs(GeneData[Row][Column]) >= FilterAbsValVal)
{
CountAbs++;
}
}
}
Use[Row] = true;
/* Filter based on percent values present; remove rows
with too many missing values */
if (FilterPercentCheckBox->Checked)
{
double PercentPresent = (double) Count / (double) Columns;
if ( (100 * PercentPresent) < FilterPercentVal )
{
Use[Row] = false;
}
}
/* Remove rows with low SD */
if (FilterSDCheckBox->Checked)
{
if (Count > 1)
{
double Ave = Sum / (double) Count;
double Arg = (Sum2 - 2 * Ave * Sum + (double) Count * pow(Ave,2))/ ((double)(Count-1));
double SD = sqrt(Arg);
if (SD < FilterSDVal)
{
Use[Row] = false;
}
}
else
{
Use[Row] = false;
}
}
/* Remove rows with too few extreme values */
if (FilterAbsValCheckBox->Checked)
{
if (CountAbs < FilterAbsValCount)
{
Use[Row] = false;
}
}
/* Remove rows with too small Max-Min */
if (FilterMaxMinCheckBox->Checked)
{
if (Max - Min < FilterMaxMinVal)
{
Use[Row] = false;
}
}
if (Use[Row])
{
UseRows++;
}
}
FilterResultsLabel->Visible = true;
/* Tell user how many rows passed */
FilterResultsLabel->Caption = AnsiString(UseRows) + " passed out of " +
AnsiString(Rows);
AcceptFilterButton->Visible = true;
StatusBar1->SimpleText = "Done Analyzing Filters";
}
//---------------------------------------------------------------------------
/* Accept results of last filtering */
void __fastcall TMainForm::AcceptFilterButtonClick(TObject *Sender)
{
AcceptFilterButton->Visible = false;
AnsiString *TempID;
AnsiString *TempName;
double *TempOrder;
double *TempWeight;
double **TempData;
bool **TempMask;
TempID = new AnsiString[Rows];
TempName = new AnsiString[Rows];
TempOrder = new double[Rows];
TempWeight = new double[Rows];
TempData = new double*[Rows];
TempMask = new bool*[Rows];
for (int Row=0;Row<Rows;Row++)
{
TempData[Row] = new double[Columns];
TempMask[Row] = new bool[Columns];
for (int Column=0;Column<Columns;Column++)
{
TempData[Row][Column] = GeneData[Row][Column];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -