⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clustermain.cpp

📁 著名的基因芯片表达数据聚类软件。esein编写
💻 CPP
📖 第 1 页 / 共 5 页
字号:
            for (i=0; i<DataColumns; i++)
            {
                if ( (Mask[elem1][i] == true) && (Mask[elem2][i] == true) )
                {
                    k++;
                    Vector1[k] = Data[elem1][i];
                    Vector2[k] = Data[elem2][i];
                }
            }

            if (k > 1)
            {
                kendl1(Vector1, Vector2, k, &tau, &tauz, &taup);
            }
            else
            {
                tau = 0;
            }

            free_vector(Vector1,1,DataColumns);
            free_vector(Vector2,1,DataColumns);
#endif

            return (unsigned short) (16384.0 * (1.0 - tau));
            break;
    }
}

/* Pearson Correlation */

unsigned short TMainForm::Correlation(double **Data, bool **Mask, double *Weight,
                bool Centered, bool Absolute, int elem1, int elem2, int DataColumns)
{
    int i;

    double Sum1, Sum2;
    double Sum11, Sum22;
    double Sum12;

    double Ave1, Ave2;
    double Norm;

    double Corr = -1.0;
    double Count = 0;

    Sum1 = 0;
    Sum2 = 0;
    Sum11 = 0;
    Sum22 = 0;
    Sum12 = 0;

    for (i=0;i<DataColumns;i++)
    {
        try
        {
        if ( (Mask[elem1][i] == true) && (Mask[elem2][i] == true) )
        {
            Sum1  += Weight[i]*Data[elem1][i];
            Sum2  += Weight[i]*Data[elem2][i];
            Sum11 += Weight[i]*Data[elem1][i] * Data[elem1][i];
            Sum22 += Weight[i]*Data[elem2][i] * Data[elem2][i];
            Sum12 += Weight[i]*Data[elem1][i] * Data[elem2][i];
            Count += Weight[i];
        }
        }
        catch (Exception &E)
        {
            Sum1 = 0;
        }
    }

    if (Count > 0)
    {
        if (Centered)
        {
            Ave1 = Sum1/Count;
            Ave2 = Sum2/Count;
        }
        else
        {
            Ave1 = 0;
            Ave2 = 0;
        }

        try
        {
            Norm = sqrt(
                 max(0.0,Sum11 - Count * Ave1 * Ave1)
                 *
                 max(0.0,Sum22 - Count * Ave2 * Ave2)
                 );
        }
        catch (Exception &E)
        {
        }

        if ( (Norm > 0) )
        {
            Corr = (Sum12 - Count * Ave1 * Ave2)
                    / (Norm);
        }

        if (Absolute == true)
        {
            return (unsigned short) (32768.0 * (1.0 - fabs(Corr)));
        }
        else
        {
            return (unsigned short) (16384.0 * (1.0 - Corr));
        }
    }
    else
    {
        return 0;
    }
}

/* Pearson Correlation without Centered and Absolute options */

float TMainForm::Correlation(double **Data, bool **Mask, double *Weight,
                int elem1, int elem2, int DataColumns)
{
    int i;
    bool Centered = false;
    //bool Absolute = false;
    double Sum1, Sum2;
    double Sum11, Sum22;
    double Sum12;

    double Ave1, Ave2;
    double Norm1, Norm2;

    float Corr = -1.0;
    double Count = 0;

    Sum1 = 0;
    Sum2 = 0;
    Sum11 = 0;
    Sum22 = 0;
    Sum12 = 0;

    for (i=0;i<DataColumns;i++)
    {
        try
        {
        if ( (Mask[elem1][i] == true) && (Mask[elem2][i] == true) )
        {
            Sum1  += Weight[i]*Data[elem1][i];
            Sum2  += Weight[i]*Data[elem2][i];
            Sum11 += Weight[i]*Data[elem1][i] * Data[elem1][i];
            Sum22 += Weight[i]*Data[elem2][i] * Data[elem2][i];
            Sum12 += Weight[i]*Data[elem1][i] * Data[elem2][i];
            Count += Weight[i];
        }
        }
        catch (Exception &E)
        {
            Sum1 = 0;
        }
    }

    if (Count > 0)
    {
        if (Centered)
        {
            Ave1 = Sum1/Count;
            Ave2 = Sum2/Count;
        }
        else
        {
            Ave1 = 0;
            Ave2 = 0;
        }

        try
        {
            Norm1 = sqrt(max(0.0,Sum11 - 2 * Ave1 * Sum1 + Count * Ave1 * Ave1));
            Norm2 = sqrt(max(0.0,Sum22 - 2 * Ave2 * Sum2 + Count * Ave2 * Ave2));
        }
        catch (Exception &E)
        {
        }

        if ( (Norm1 > 0) && (Norm2 > 0) )
        {
            Corr = (Sum12 - Sum1 * Ave2 - Sum2 * Ave1 + Count * Ave1 * Ave2)
                    / (Norm1 * Norm2);
        }

        return Corr;
    }
    else
    {
        return 0;
    }
}

/* Button Pressed for Average Linkage Hierarchical Clustering: Setup and Execute */

void __fastcall TMainForm::AverageLinkageClusterButtonClick(TObject *Sender)
{
    int i,j;
    StatusBar1->SimpleText = "Initializing";

    // Get ClusterID for file names from user input

    JobName = AnsiString(ClusterName);

    if (JobNameEdit->Text.Length() > 0)
    {
        JobName = JobNameEdit->Text;
    }

    bool ClusterGenes = ClusterGenesCheckBox->Checked;
    bool ClusterArrays = ClusterArraysCheckBox->Checked;
    bool CalculateGeneWeights = CalculateGeneWeightsCheckBox->Checked;
    bool CalculateArrayWeights = CalculateArrayWeightsCheckBox->Checked;

    int *ColumnOrder = new int[Columns];
    AnsiString *ArrayID = new AnsiString[2*Columns-1];

    for (i=0;i<Columns;i++)
    {
        ColumnOrder[i] = i;
        ArrayID[i] = "ARRY" + AnsiString(i) + "X";
    }

    if (CalculateGeneWeights == true)
    {

        // Generate array data structures
        // This is a wasteful way of doing it, but makes
        // the coding easier for me

        double **ArrayData = new double*[2*Columns-1];
        bool   **ArrayMask = new bool*[2*Columns-1];

        for (i=0;i<2*Columns-1;i++)
        {
            ArrayData[i] = new double[Rows];
            ArrayMask[i] = new bool[Rows];
            if (i<Columns)
            {
               for (j=0;j<Rows;j++)
               {
                   ArrayData[i][j] = GeneData[j][i];
                   ArrayMask[i][j] = GeneMask[j][i];
               }
            }
        }

        TStringList *ArrayTreeFile = new TStringList();
        TStringList *ArrayClusterOrder = new TStringList();

        Cluster(ArrayData,ArrayMask,Columns,Rows,
            ArrayTreeFile,ArrayOrder,ArrayWeight,GeneWeight,ArrayID,
            ArrayMetricComboBox->ItemIndex,
            true,ArrayWeightCutoff,ArrayWeightExp,
            ArrayClusterOrder,StatusBar1,false,NULL);

        delete ArrayClusterOrder;

        for (i=0;i<2*Columns-1;i++)
        {
            delete ArrayData[i];
            delete ArrayMask[i];
        }

        delete ArrayData;
        delete ArrayMask;
    }

    int *RowOrder = new int[Rows];

    AnsiString *GeneID = new AnsiString[2*Rows-1];

    for (i=0;i<Rows;i++)
    {
        RowOrder[i] = i;
        GeneID[i] = AnsiString("GENE") + AnsiString(i) + AnsiString("X");
    }

    if (ClusterGenes == true)
    {
        TStringList *GeneTreeFile = new TStringList();
        TStringList *GeneClusterOrder = new TStringList();

        if (CalculateArrayWeights == true)
        {
            Cluster(GeneData,GeneMask,Rows,Columns,
                GeneTreeFile,GeneOrder,GeneWeight, ArrayWeight,GeneID,
                GeneMetricComboBox->ItemIndex,
                true,GeneWeightCutoff,GeneWeightExp,
                GeneClusterOrder,StatusBar1,false,NULL);
        }
        else
        {
            double *NullGeneWeight = new double[Rows];

            Cluster(GeneData,GeneMask,Rows,Columns,
                GeneTreeFile,GeneOrder,NullGeneWeight,ArrayWeight,GeneID,
                GeneMetricComboBox->ItemIndex,
                false,1.0,1.0,
                GeneClusterOrder,StatusBar1,false,NULL);

            delete NullGeneWeight;
        }

        for (i=0;i<GeneClusterOrder->Count;i++)
        {
            RowOrder[i] = GeneClusterOrder->Strings[i].ToInt();
        }

        AnsiString TreeFileName = JobName + ".gtr";
        GeneTreeFile->SaveToFile(TreeFileName);
    }

    if (ClusterArrays == true)
    {

        // Generate array data structures
        // This is a wasteful way of doing it, but makes
        // the coding easier for me

        double **ArrayData = new double*[2*Columns-1];
        bool   **ArrayMask = new bool*[2*Columns-1];

        for (i=0;i<2*Columns-1;i++)
        {
            ArrayData[i] = new double[Rows];
            ArrayMask[i] = new bool[Rows];
            if (i<Columns)
            {
            for (j=0;j<Rows;j++)
            {
                ArrayData[i][j] = GeneData[j][i];
                ArrayMask[i][j] = GeneMask[j][i];
            }
            }
        }

        TStringList *ArrayTreeFile = new TStringList();
        TStringList *ArrayClusterOrder = new TStringList();

        Cluster(ArrayData,ArrayMask,Columns,Rows,
            ArrayTreeFile,ArrayOrder,ArrayWeight,GeneWeight,ArrayID,
            ArrayMetricComboBox->ItemIndex,
            false,1.0,1.0,ArrayClusterOrder,StatusBar1,false,NULL);

        for (i=0;i<ArrayClusterOrder->Count;i++)
        {
            ColumnOrder[i] = ArrayClusterOrder->Strings[i].ToInt();
        }

        delete ArrayClusterOrder;

        for (i=0;i<2*Columns-1;i++)
        {
            delete ArrayData[i];
            delete ArrayMask[i];
        }

        delete ArrayData;
        delete ArrayMask;

        AnsiString ArrayTreeFileName = JobName + ".atr";
        ArrayTreeFile->SaveToFile(ArrayTreeFileName);
        delete ArrayTreeFile;
    }

    AnsiString OutString;

    TStringList *DataFile = new TStringList();

    // Now make output .cdt file

    OutString = "";

    if (ClusterGenes == true)
    {
        OutString += AnsiString("GID") + AnsiString("\t");
    }

    OutString += Headers->Strings[0] + AnsiString("\t");
    OutString += AnsiString("NAME") + AnsiString("\t");
    OutString += "GWEIGHT";

    // Now add headers for data columns

    for (i=0;i<Columns;i++)
    {
        OutString += "\t";
        OutString += AnsiString(Headers->Strings[InColumn[ColumnOrder[i]]]);
    }

    DataFile->Add(OutString);

    if (ClusterArrays == true)
    {
        OutString = AnsiString("AID");
        if (ClusterGenes == true)
        {
            OutString += AnsiString("\t");
        }

        OutString += AnsiString("\t");
        OutString += AnsiString("\t");

        for (i=0;i<Columns;i++)
        {
            OutString += "\t";
            OutString += ArrayID[ColumnOrder[i]];
        }

        DataFile->Add(OutString);
    }


    {
        OutString = AnsiString("EWEIGHT");
        if (ClusterGenes == true)
        {
            OutString += AnsiString("\t");
        }

        OutString += AnsiString("\t");
        OutString += AnsiString("\t");

        for (i=0;i<Columns;i++)
        {
            OutString += "\t";
            OutString += ArrayWeight[ColumnOrder[i]];
        }
    }

    DataFile->Add(OutString);

    int index;

    TFloatFormat Format = ffGeneral;

    for (i=0;i<Rows;i++)
    {
        index = RowOrder[i];

        OutString = "";
        if (ClusterGenes == true)
        {
            OutString += GeneID[index] + "\t";
        }

        OutString += AnsiString(UniqID[index]) + "\t" + AnsiString(GeneName[index]);
        OutString += "\t" + AnsiString(GeneWeight[index]);

        for (j=0;j<Columns;j++)
        {
            if (GeneMask[index][ColumnOrder[j]] == true)
            {
                OutString += "\t" + AnsiString(FloatToStrF(GeneData[index][ColumnOrder[j]],Format,4,2));
            }
            else
            {
                OutString += "\t";
            }
        }
        DataFile->Add(OutString);
    }

    AnsiString DataFileName = JobName + ".cdt";
    DataFile->SaveToFile(DataFileName);

    for (i=0;i<2*Columns-1;i++)
    {
        ArrayID[i] = "";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -