alnvec.cpp

来自「ncbi源码」· C++ 代码 · 共 1,047 行 · 第 1/3 页

CPP
1,047
字号
    TSignedSeqPos start1, start2;    string        buff1, buff2;    bool          isAA1, isAA2;    int           score = 0;    TSeqPos       len;        isAA1 = GetBioseqHandle(row1).GetBioseqCore()        ->GetInst().GetMol() == CSeq_inst::eMol_aa;    isAA2 = GetBioseqHandle(row2).GetBioseqCore()        ->GetInst().GetMol() == CSeq_inst::eMol_aa;    CSeqVector&   seq_vec1 = x_GetSeqVector(row1);    TSeqPos       size1    = seq_vec1.size();    CSeqVector &  seq_vec2 = x_GetSeqVector(row2);    TSeqPos       size2    = seq_vec2.size();    for (TNumseg seg = 0; seg < m_NumSegs; seg++) {        start1 = m_Starts[index1];        start2 = m_Starts[index2];        if (start1 >=0  &&  start2 >= 0) {            len = m_Lens[seg];            if (IsPositiveStrand(row1)) {                seq_vec1.GetSeqData(start1,                                    start1 + len,                                    buff1);            } else {                seq_vec1.GetSeqData(size1 - (start1 + len),                                    size1 - start1,                                    buff1);            }            if (IsPositiveStrand(row2)) {                seq_vec2.GetSeqData(start2,                                    start2 + len,                                    buff2);            } else {                seq_vec2.GetSeqData(size2 - (start2 + len),                                    size2 - start2,                                    buff2);            }            score += CalculateScore(buff1, buff2, isAA1, isAA2);        }        index1 += numrows;        index2 += numrows;    }    return score;}string& CAlnVec::GetColumnVector(string& buffer,                                 TSeqPos aln_pos,                                 TResidueCount * residue_count,                                 bool gaps_in_count) const{    if (aln_pos > GetAlnStop()) {        aln_pos = GetAlnStop(); // out-of-range adjustment    }    TNumseg seg   = GetSeg(aln_pos);    TSeqPos delta = aln_pos - GetAlnStart(seg);    TSeqPos len   = GetLen(seg);    TSignedSeqPos pos;    for (TNumrow row = 0; row < m_NumRows; row++) {        pos = GetStart(row, seg);        if (pos >= 0) {            // it's a sequence residue            bool plus = IsPositiveStrand(row);            if (plus) {                pos += delta;            } else {                pos += len - 1 - delta;            }                        CSeqVector& seq_vec = x_GetSeqVector(row);            if (GetWidth(row) == 3) {                string na_buff, aa_buff;                if (plus) {                    seq_vec.GetSeqData(pos, pos + 3, na_buff);                } else {                    TSeqPos size = seq_vec.size();                    seq_vec.GetSeqData(size - pos - 3, size - pos, na_buff);                }                TranslateNAToAA(na_buff, aa_buff);                buffer[row] = aa_buff[0];            } else {                buffer[row] = seq_vec[plus ? pos : seq_vec.size() - pos - 1];            }            if (residue_count) {                (*residue_count)[FromIupac(buffer[row])]++;            }        } else {            // it's a gap or endchar                        if (GetEndChar() != (buffer[row] = GetGapChar(row))) {                // need to check the where the segment is                // only if endchar != gap                // this saves a check if there're the same                TSegTypeFlags type = GetSegType(row, seg);                if (type & fNoSeqOnLeft  ||  type & fNoSeqOnRight) {                    buffer[row] = GetEndChar();                }            }            if (gaps_in_count) {                (*residue_count)[FromIupac(buffer[row])]++;            }        }    } // for row    return buffer;}int CAlnVec::CalculatePercentIdentity(TSeqPos aln_pos) const{    string column;    column.resize(m_NumRows);    TResidueCount residue_cnt;    residue_cnt.resize(16, 0);    GetColumnVector(column, aln_pos, &residue_cnt);        int max = 0, total = 0;    ITERATE (TResidueCount, i_res, residue_cnt) {        if (*i_res > max) {            max = *i_res;        }        total += *i_res;    }    return 100 * max / total;}END_objects_SCOPE // namespace ncbi::objects::END_NCBI_SCOPE/** ===========================================================================** $Log: alnvec.cpp,v $* Revision 1000.2  2004/06/01 19:40:49  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.59** Revision 1.59  2004/05/21 21:42:51  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.58  2004/03/30 16:39:09  todorov* -redundant statement** Revision 1.57  2003/12/22 19:14:12  todorov* Only left constructors that accept CScope&** Revision 1.56  2003/12/22 18:30:37  todorov* ObjMgr is no longer created internally. Scope should be passed as a reference in the ctor** Revision 1.55  2003/12/18 19:47:51  todorov* + GetColumnVector & CalculatePercentIdentity** Revision 1.54  2003/12/10 17:17:39  todorov* CalcScore now const** Revision 1.53  2003/10/21 14:41:35  grichenk* Fixed type convertion** Revision 1.52  2003/09/26 16:58:34  todorov* Fixed the length of c_buff** Revision 1.51  2003/09/23 21:29:34  todorov* Rearranged variables & fixed a bug** Revision 1.50  2003/09/23 18:37:24  todorov* bug fix in GetWholeAlnSeqString** Revision 1.49  2003/09/22 21:00:16  todorov* Consolidated adjacent inserts in GetWholeAlnSeqString** Revision 1.48  2003/09/22 19:03:30  todorov* Use the new x_GetSeq{Left,Right}Seg methods** Revision 1.47  2003/09/17 15:48:06  jianye* Added missing [] when de-allocating c_buff** Revision 1.46  2003/09/17 14:46:39  todorov* Performance optimization: Use char * instead of string** Revision 1.45  2003/09/12 16:16:50  todorov* TSeqPos->TSignedSeqPos bug fix** Revision 1.44  2003/08/29 18:18:58  dicuccio* Changed CreateConsensus() API to return a new dense-seg instead of altering the* current alignment manager** Revision 1.43  2003/08/27 21:19:55  todorov* using raw_scoremat.h** Revision 1.42  2003/08/25 16:34:59  todorov* exposed GetWidth** Revision 1.41  2003/08/20 17:50:52  todorov* resize + direct string access rather than appending** Revision 1.40  2003/08/20 17:23:54  ucko* TranslateNAToAA: append to strings with += rather than push_back* (which MSVC lacks); fix a typo while I'm at it.** Revision 1.39  2003/08/20 14:34:58  todorov* Support for NA2AA Densegs** Revision 1.38  2003/07/23 20:26:14  todorov* fixed an unaligned pieces coords problem in GetWhole..** Revision 1.37  2003/07/23 20:24:39  todorov* +aln_starts for the inserts in GetWhole...** Revision 1.36  2003/07/22 19:18:37  todorov* fixed a 1st seg check in GetWhole...** Revision 1.35  2003/07/21 21:29:39  todorov* cleaned an expression** Revision 1.34  2003/07/21 17:08:50  todorov* fixed calc of remaining nscrns in GetWhole...** Revision 1.33  2003/07/18 22:12:51  todorov* Fixed an anchor bug in GetWholeAlnSeqString** Revision 1.32  2003/07/17 22:45:56  todorov* +GetWholeAlnSeqString** Revision 1.31  2003/07/15 21:13:54  todorov* rm bioseq_handle ref** Revision 1.30  2003/07/15 20:54:01  todorov* exception type fixed** Revision 1.29  2003/07/15 20:46:09  todorov* Exception if bioseq handle is null** Revision 1.28  2003/06/05 19:03:12  todorov* Added const refs to Dense-seg members as a speed optimization** Revision 1.27  2003/06/02 16:06:40  dicuccio* Rearranged src/objects/ subtree.  This includes the following shifts:*     - src/objects/asn2asn --> arc/app/asn2asn*     - src/objects/testmedline --> src/objects/ncbimime/test*     - src/objects/objmgr --> src/objmgr*     - src/objects/util --> src/objmgr/util*     - src/objects/alnmgr --> src/objtools/alnmgr*     - src/objects/flat --> src/objtools/flat*     - src/objects/validator --> src/objtools/validator*     - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.26  2003/04/24 16:15:57  vasilche* Added missing includes and forward class declarations.** Revision 1.25  2003/04/15 14:21:27  vasilche* Added missing include file.** Revision 1.24  2003/03/29 07:07:31  todorov* deallocation bug fixed** Revision 1.23  2003/03/05 16:18:17  todorov* + str len err check** Revision 1.22  2003/02/11 21:32:44  todorov* fMinGap optional merging algorithm** Revision 1.21  2003/01/29 20:54:37  todorov* CalculateScore speed optimization** Revision 1.20  2003/01/27 22:48:41  todorov* Changed CreateConsensus accordingly too** Revision 1.19  2003/01/27 22:30:30  todorov* Attune to seq_vector interface change** Revision 1.18  2003/01/23 21:31:08  todorov* Removed the original, inefficient GetXXXString methods** Revision 1.17  2003/01/23 16:31:34  todorov* Added calc score methods** Revision 1.16  2003/01/17 19:25:04  ucko* Clear buffer with erase(), as G++ 2.9x lacks string::clear.** Revision 1.15  2003/01/17 18:16:53  todorov* Added a better-performing set of GetXXXString methods** Revision 1.14  2003/01/16 20:46:17  todorov* Added Gap/EndChar set flags** Revision 1.13  2003/01/08 16:50:56  todorov* Fixed TGetChunkFlags in GetAlnSeqString** Revision 1.12  2002/11/04 21:29:08  grichenk* Fixed usage of const CRef<> and CRef<> constructor** Revision 1.11  2002/10/21 19:15:20  todorov* added GetAlnSeqString** Revision 1.10  2002/10/08 18:03:15  todorov* added the default m_EndChar value** Revision 1.9  2002/10/01 14:13:22  dicuccio* Added handling of strandedness in creation of consensus sequence.** Revision 1.8  2002/09/25 20:20:24  todorov* x_GetSeqVector uses the strand info now** Revision 1.7  2002/09/25 19:34:54  todorov* "un-inlined" x_GetSeqVector** Revision 1.6  2002/09/25 18:16:29  dicuccio* Reworked computation of consensus sequence - this is now stored directly* in the underlying CDense_seg* Added exception class; currently used only on access of non-existent* consensus.** Revision 1.5  2002/09/19 18:24:15  todorov* New function name for GetSegSeqString to avoid confusion** Revision 1.4  2002/09/19 17:40:16  todorov* fixed m_Anchor setting in case of consensus** Revision 1.3  2002/09/05 19:30:39  dicuccio* - added ability to reference a consensus sequence for a given alignment* - added caching for CSeqVector objects (big performance gain)* - many small bugs fixed** Revision 1.2  2002/08/29 18:40:51  dicuccio* added caching mechanism for CSeqVector - this greatly improves speed in* accessing sequence data.** Revision 1.1  2002/08/23 14:43:52  ucko* Add the new C++ alignment manager to the public tree (thanks, Kamen!)*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?