gff_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 628 行 · 第 1/2 页

CPP
628
字号
        return;    CBioseqContext& ctx = *seq.GetContext();    list<string> l;    CSeqVector v = seq.GetSequence();    v.SetCoding(CBioseq_Handle::eCoding_Iupac);    CSeqVector_CI vi(v);    string s;    while (vi) {        s.erase();        vi.GetSeqData(s, 70);        l.push_back("##" + s);    }    text_os.AddParagraph(l, ctx.GetHandle().GetCompleteBioseq());}// Privatestring CGFFFormatter::x_GetGeneID(const CFlatFeature& feat, const string& gene, CBioseqContext& ctx) const{    const CSeq_feat& seqfeat = feat.GetFeat();    string               main_acc;    if (ctx.IsPart()) {        const CSeq_id& id = *(ctx.GetMaster().GetHandle().GetSeqId());        main_acc = ctx.GetPreferredSynonym(id).GetSeqIdString(true);    } else {        main_acc = ctx.GetAccession();    }    string               gene_id   = main_acc + ':' + gene;    CConstRef<CSeq_feat> gene_feat = sequence::GetBestOverlappingFeat        (seqfeat.GetLocation(), CSeqFeatData::e_Gene,         sequence::eOverlap_Interval, ctx.GetScope());    return gene_id;}string CGFFFormatter::x_GetTranscriptID(const CFlatFeature& feat, const string& gene_id, CBioseqContext& ctx) const{    const CSeq_feat& seqfeat = feat.GetFeat();    // if our feature already is an mRNA, we need look no further    CConstRef<CSeq_feat> mrna_feat;    if (seqfeat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) {        mrna_feat.Reset(&seqfeat);    } else {        // search for a best overlapping mRNA        // we start with a scan through the product accessions because we need        // to insure that the chosen transcript does indeed match what we want        if (seqfeat.IsSetProduct()) {            try {                // this may throw, if the product spans multiple sequences                // this would be extremely unlikely, but we catch anyway                const CSeq_id& product_id =                    sequence::GetId(seqfeat.GetProduct());                SAnnotSelector sel;                sel.SetOverlapIntervals()                    .ExcludeNamedAnnots("SNP")                    .SetResolveAll()                    .SetFeatSubtype(CSeqFeatData::eSubtype_mRNA);                CFeat_CI feat_iter(ctx.GetScope(), seqfeat.GetLocation(), sel);                for ( ;  feat_iter  &&  !mrna_feat;  ++feat_iter) {                    // we grab the mRNA product, if available, and scan it for                    // a CDS feature.  the CDS feature should point to the same                    // product as our current feature.                    const CSeq_feat& mrna = feat_iter->GetOriginalFeature();                    if ( !mrna.IsSetProduct() ) {                        continue;                    }                    // make sure the feature contains our feature of interest                    sequence::ECompare comp =                        sequence::Compare(mrna.GetLocation(),                                          seqfeat.GetLocation());                    if (comp != sequence::eContains  &&                        comp != sequence::eSame) {                        continue;                    }                    CBioseq_Handle handle =                        ctx.GetScope().GetBioseqHandle(mrna.GetProduct());                    if ( !handle ) {                        continue;                    }                    SAnnotSelector cds_sel(sel);                    cds_sel.SetFeatSubtype(CSeqFeatData::eSubtype_cdregion);                    CFeat_CI other_iter(ctx.GetScope(),                                        mrna.GetProduct(),                                        cds_sel);                    for ( ;  other_iter  &&  !mrna_feat;  ++other_iter) {                        const CSeq_feat& cds = other_iter->GetOriginalFeature();                        if ( !cds.IsSetProduct() ) {                            continue;                        }                        CBioseq_Handle prot_handle =                            ctx.GetScope().GetBioseqHandle(cds.GetProduct());                        if ( !prot_handle ) {                            continue;                        }                        if (prot_handle.IsSynonym(product_id)) {                            // got it!                            mrna_feat.Reset(&mrna);                            break;                        }                    }                }            }            catch (...) {            }        }        //        // try to find the best by overlaps alone        //        if ( !mrna_feat ) {            mrna_feat = sequence::GetBestOverlappingFeat                (seqfeat.GetLocation(), CSeqFeatData::eSubtype_mRNA,                 sequence::eOverlap_CheckIntervals, ctx.GetScope());        }    }    //    // check if the mRNA feature we found has a product    //    if (mrna_feat.GetPointer()  &&  mrna_feat->IsSetProduct()) {        try {            const CSeq_id& id = sequence::GetId(mrna_feat->GetProduct());            string transcript_id =                ctx.GetPreferredSynonym(id).GetSeqIdString(true);            return transcript_id;        }        catch (...) {        }    }    //    // nothing found, so fake it    //    // failed to get transcript id, so we fake a globally unique one based    // on the gene id    m_Transcripts[gene_id].push_back(CConstRef<CSeq_feat>(&seqfeat));    string transcript_id = gene_id;    transcript_id += ":unknown_transcript_";    transcript_id += NStr::IntToString(m_Transcripts[gene_id].size());    return transcript_id;}string CGFFFormatter::x_GetSourceName(CBioseqContext& ctx) const{    // XXX - get from annot name (not presently available from IFF)?    switch ( ctx.GetPrimaryId()->Which() ) {    case CSeq_id::e_Local:                           return "Local";    case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt:    case CSeq_id::e_Giim:   case CSeq_id::e_Gi:      return "GenInfo";    case CSeq_id::e_Genbank:                         return "Genbank";    case CSeq_id::e_Swissprot:                       return "SwissProt";    case CSeq_id::e_Patent:                          return "Patent";    case CSeq_id::e_Other:                           return "RefSeq";    case CSeq_id::e_General:        return ctx.GetPrimaryId()->GetGeneral().GetDb();    default:    {        string source            (CSeq_id::SelectionName(ctx.GetPrimaryId()->Which()));        return NStr::ToUpper(source);    }    }}void CGFFFormatter::x_AddFeature(list<string>& l, const CSeq_loc& loc, const string& source, const string& key, const string& score, int frame, const string& attrs, bool gtf, CBioseqContext& ctx, bool tentative_stop) const{        int num_exons = 0;    for (CSeq_loc_CI it(loc);  it;  ++it) {        ++num_exons;    }    int exon_number = 1;    for (CSeq_loc_CI it(loc);  it;  ++it) {        TSeqPos from   = it.GetRange().GetFrom(), to = it.GetRange().GetTo();        char    strand = '+';        if (IsReverse(it.GetStrand())) {            strand = '-';        } else if (it.GetRange().IsWhole()                   ||  (m_Strandedness <= CSeq_inst::eStrand_ss                        &&  ctx.GetMol() != CSeq_inst::eMol_dna)) {            strand = '.'; // N/A        }        if (it.GetRange().IsWhole()) {            to = sequence::GetLength(it.GetSeq_id(), &ctx.GetScope()) - 1;        }        if ( tentative_stop  &&  (exon_number == num_exons) ) {            to -= 3;        }        string extra_attrs;        if (gtf  &&  attrs.find("exon_number ") == NPOS) {            CSeq_loc       loc2;            CSeq_interval& si = loc2.SetInt();            si.SetFrom(from);            si.SetTo(to);            si.SetStrand(it.GetStrand());            si.SetId(const_cast<CSeq_id&>(it.GetSeq_id()));            CConstRef<CSeq_feat> exon = sequence::GetBestOverlappingFeat                (loc2, CSeqFeatData::eSubtype_exon,                 sequence::eOverlap_Contains, ctx.GetScope());            if (exon.NotEmpty()  &&  exon->IsSetQual()) {                ITERATE (CSeq_feat::TQual, q, exon->GetQual()) {                    if ( !NStr::CompareNocase((*q)->GetQual(), "number") ) {                        int n = NStr::StringToNumeric((*q)->GetVal());                        if (n >= exon_number) {                            exon_number = n;                            break;                        }                    }                }            }            extra_attrs = " exon_number \"" + NStr::IntToString(exon_number)                + "\";";            ++exon_number;        }        if ( sequence::IsSameBioseq(it.GetSeq_id(), *ctx.GetPrimaryId(),                                    &ctx.GetScope()) ) {            // conditionalize printing, but update state regardless            l.push_back(ctx.GetAccession() + '\t'                        + source + '\t'                        + key + '\t'                        + NStr::UIntToString(from + 1) + '\t'                        + NStr::UIntToString(to + 1) + '\t'                        + score + '\t'                        + strand + '\t'                        + (frame >= 0 ? char(frame + '0') : '.') + "\t"                        + attrs + extra_attrs);        }        if (frame >= 0) {            frame = (frame + to - from + 1) % 3;        }    }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: gff_formatter.cpp,v $* Revision 1000.1  2004/06/01 19:44:46  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7** Revision 1.7  2004/05/21 21:42:54  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6  2004/05/08 12:12:33  dicuccio* Altered handling of transcript_id - make transcript_id globally unique and* trackable across CDS features as well as mRNA features** Revision 1.5  2004/05/06 17:54:29  shomrat* Use CConstRef instead of reference** Revision 1.4  2004/04/22 16:02:12  shomrat* fixed stop_codon** Revision 1.3  2004/03/25 20:42:41  shomrat* Master returns a CBioseq_Handle instead of a CBioseq** Revision 1.2  2004/02/11 16:59:11  shomrat* removed unused variable** Revision 1.1  2004/01/14 16:07:29  shomrat* Initial Revision*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?