flat_gff_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 436 行 · 第 1/2 页

CPP
436
字号
                    (s, m_Context->GetHandle(), *tentative_stop, cds);                if (s == "*") {                    x_AddFeature(l, *tentative_stop, source, "stop_codon",                                 "." /* score */, 0, attrs, gtf);                }            }        }    }    m_Stream->AddParagraph(l, &f, &seqfeat);}void CFlatGFFFormatter::FormatDataHeader(const CFlatDataHeader& dh){    if ( !(m_GFFFlags & fShowSeq) )        return;    list<string> l;    l.push_back("##" + m_SeqType + ' ' + m_Context->GetAccession());    m_Stream->AddParagraph(l, &dh);    m_EndSequence = "##end-" + m_SeqType;}void CFlatGFFFormatter::FormatData(const CFlatData& data){    if ( !(m_GFFFlags & fShowSeq) )        return;    list<string> l;    CSeqVector v = m_Context->GetHandle().GetSequenceView        (data.GetLoc(), CBioseq_Handle::eViewConstructed,         CBioseq_Handle::eCoding_Iupac);    CSeqVector_CI vi(v);    while (vi) {        string s;        vi.GetSeqData(s, 70);        l.push_back("##" + s);    }    m_Stream->AddParagraph(l, &data, &data.GetLoc());}void CFlatGFFFormatter::EndSequence(void){    if ( !m_EndSequence.empty() ) {        list<string> l;        l.push_back(m_EndSequence);        m_Stream->AddParagraph(l);    }}string CFlatGFFFormatter::x_GetGeneID(const CFlatFeature& feat,                                      const string& gene){    const CSeq_feat& seqfeat = feat.GetFeat();    string               main_acc;    if (m_Context->InSegSet()) {        const CSeq_id& id = *m_Context->GetSegMaster()->GetId().front();        main_acc = m_Context->GetPreferredSynonym(id).GetSeqIdString(true);    } else {        main_acc = m_Context->GetAccession();    }    string               gene_id   = main_acc + ':' + gene;    CConstRef<CSeq_feat> gene_feat = sequence::GetBestOverlappingFeat        (seqfeat.GetLocation(), CSeqFeatData::e_Gene,         sequence::eOverlap_Interval, *m_Scope);        TFeatVec&                v  = m_Genes[gene_id];    TFeatVec::const_iterator it = find(v.begin(), v.end(), gene_feat);    int                      n;    if (it == v.end()) {        n = v.size();        v.push_back(gene_feat);    } else {        n = it - v.begin();    }    if (n > 0) {        gene_id += '.' + NStr::IntToString(n + 1);    }    return gene_id;}string CFlatGFFFormatter::x_GetSourceName(const IFlattishFeature&){    // XXX - get from annot name (not presently available from IFF)?    switch (m_Context->GetPrimaryID().Which()) {    case CSeq_id::e_Local:                           return "Local";    case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt:    case CSeq_id::e_Giim:   case CSeq_id::e_Gi:      return "GenInfo";    case CSeq_id::e_Genbank:                         return "Genbank";    case CSeq_id::e_Swissprot:                       return "SwissProt";    case CSeq_id::e_Patent:                          return "Patent";    case CSeq_id::e_Other:                           return "RefSeq";    case CSeq_id::e_General:        return m_Context->GetPrimaryID().GetGeneral().GetDb();    default:    {        string source            (CSeq_id::SelectionName(m_Context->GetPrimaryID().Which()));        return NStr::ToUpper(source);    }    }}void CFlatGFFFormatter::x_AddFeature(list<string>& l, const CSeq_loc& loc,                                     const string& source, const string& key,                                     const string& score, int frame,                                     const string& attrs, bool gtf){    int exon_number = 1;    for (CSeq_loc_CI it(loc);  it;  ++it) {        TSeqPos from   = it.GetRange().GetFrom(), to = it.GetRange().GetTo();        char    strand = '+';        if (IsReverse(it.GetStrand())) {            strand = '-';        } else if (it.GetRange().IsWhole()                   ||  (m_Strandedness <= CSeq_inst::eStrand_ss                        &&  m_Context->GetMol() != CSeq_inst::eMol_dna)) {            strand = '.'; // N/A        }        if (it.GetRange().IsWhole()) {            to = sequence::GetLength(it.GetSeq_id(), m_Scope) - 1;        }        string extra_attrs;        if (gtf  &&  attrs.find("exon_number ") == NPOS) {            CSeq_loc       loc2;            CSeq_interval& si = loc2.SetInt();            si.SetFrom(from);            si.SetTo(to);            si.SetStrand(it.GetStrand());            si.SetId(const_cast<CSeq_id&>(it.GetSeq_id()));            CConstRef<CSeq_feat> exon = sequence::GetBestOverlappingFeat                (loc2, CSeqFeatData::eSubtype_exon,                 sequence::eOverlap_Contains, *m_Scope);            if (exon.NotEmpty()  &&  exon->IsSetQual()) {                ITERATE (CSeq_feat::TQual, q, exon->GetQual()) {                    if ( !NStr::CompareNocase((*q)->GetQual(), "number") ) {                        int n = NStr::StringToNumeric((*q)->GetVal());                        if (n >= exon_number) {                            exon_number = n;                            break;                        }                    }                }            }            extra_attrs = " exon_number \"" + NStr::IntToString(exon_number)                + "\";";            ++exon_number;        }        if ( sequence::IsSameBioseq(it.GetSeq_id(), m_Context->GetPrimaryID(),                                    m_Scope) ) {            // conditionalize printing, but update state regardless            l.push_back(m_Context->GetAccession() + '\t'                        + source + '\t'                        + key + '\t'                        + NStr::UIntToString(from + 1) + '\t'                        + NStr::UIntToString(to + 1) + '\t'                        + score + '\t'                        + strand + '\t'                        + (frame >= 0 ? char(frame + '0') : '.') + "\t"                        + attrs + extra_attrs);        }        if (frame >= 0) {            frame = (frame + to - from + 1) % 3;        }    }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: flat_gff_formatter.cpp,v $* Revision 1000.3  2004/06/01 19:43:14  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6** Revision 1.6  2004/05/21 21:42:53  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.5  2003/12/03 20:53:53  ucko* Also quote #s in values to avoid trouble with naive comment recognizers.** Revision 1.4  2003/11/04 20:00:28  ucko* Edit " \b" sequences (used as hints for wrapping) out from qualifier values** Revision 1.3  2003/10/18 01:36:34  ucko* Tweak to work around MSVC stupidity.** Revision 1.2  2003/10/17 21:06:35  ucko* Reworked GTF mode per Wratko's critique:*  - Now handles multi-exonic(!) start and stop codons.*  - Treats all RNA features on DNA as exons.*  - Sets exon_number attribute for GTF 1 compatibility.*  - Quotes other attributes better.** Revision 1.1  2003/10/08 21:11:45  ucko* New GFF/GTF formatter*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?