feature_item.cpp

来自「ncbi源码」· C++ 代码 · 共 2,018 行 · 第 1/5 页

CPP
2,018
字号
  "tRNA-Gly",  "tRNA-His",  "tRNA-Ile",  "tRNA-Lys",  "tRNA-Leu",  "tRNA-Met",  "tRNA-Asn",  "tRNA-Pro",  "tRNA-Gln",  "tRNA-Arg",  "tRNA-Ser",  "tRNA-Thr",  "tRNA-Sec",  "tRNA-Val",  "tRNA-Trp",  "tRNA-OTHER",  "tRNA-Tyr",  "tRNA-Glx",  "tRNA-TERM"};static const string& s_AaName(int aa){    int shift = 0, idx = 255;    if ( aa <= 74 ) {        shift = 0;    } else if (aa > 79) {        shift = 2;    } else {        shift = 1;    }    if (aa != '*') {        idx = aa - (64 + shift);    } else {        idx = 25;    }    if ( idx > 0 && idx < 26 ) {        return s_TrnaList [idx];    }    return kEmptyStr;}static int s_ToIupacaa(int aa){    vector<char> n(1, static_cast<char>(aa));    vector<char> i;    CSeqConvert::Convert(n, CSeqUtil::e_Ncbieaa, 0, 1, i, CSeqUtil::e_Iupacaa);    return i.front();}void CFeatureItem::x_AddRnaQuals(const CSeq_feat& feat, CBioseqContext& ctx, bool& pseudo) const{    const CRNA_ref& rna = feat.GetData().GetRna();    const CFlatFileConfig& cfg = ctx.Config();    if ( rna.CanGetPseudo()  &&  rna.GetPseudo() ) {        pseudo = true;    }    CRNA_ref::TType rna_type = rna.CanGetType() ?        rna.GetType() : CRNA_ref::eType_unknown;    switch ( rna_type ) {    case CRNA_ref::eType_tRNA:    {        if ( rna.CanGetExt() ) {            const CRNA_ref::C_Ext& ext = rna.GetExt();            switch ( ext.Which() ) {            case CRNA_ref::C_Ext::e_Name:            {                // amino acid could not be parsed into structured form                if ( !cfg.DropIllegalQuals() ) {                    x_AddQual(eFQ_product,                              new CFlatStringQVal(ext.GetName()));                } else {                    x_AddQual(eFQ_product,                              new CFlatStringQVal("tRNA-OTHER"));                }                break;            }            case CRNA_ref::C_Ext::e_TRNA:            {                const CTrna_ext& trna = ext.GetTRNA();                int aa = 0;                if ( trna.CanGetAa()  &&  trna.GetAa().IsNcbieaa() ) {                    aa = trna.GetAa().GetNcbieaa();                } else {                    // !!!                    return;                }                if ( cfg.IupacaaOnly() ) {                    aa = s_ToIupacaa(aa);                }                const string& aa_str = s_AaName(aa);                if ( !aa_str.empty() ) {                    x_AddQual(eFQ_product, new CFlatStringQVal(aa_str));                    if ( trna.CanGetAnticodon()  &&  !aa_str.empty() ) {                        x_AddQual(eFQ_anticodon,                            new CFlatAnticodonQVal(trna.GetAnticodon(),                                                   aa_str.substr(5, NPOS)));                    }                }                if ( trna.IsSetCodon() ) {                    x_AddQual(eFQ_trna_codons, new CFlatTrnaCodonsQVal(trna));                }                break;            }            default:                break;            } // end of internal switch        }        break;    }    case CRNA_ref::eType_mRNA:    {        try {            if ( feat.CanGetProduct() ) {                const CSeq_id& id = GetId(feat.GetProduct(), &ctx.GetScope());                CBioseq_Handle prod =                     ctx.GetScope().GetBioseqHandleFromTSE(id, ctx.GetHandle());                EFeatureQualifier slot =                     (ctx.IsRefSeq()  ||  cfg.IsModeDump()  ||  cfg.IsModeGBench()) ?                    eFQ_transcript_id : eFQ_transcript_id_note;                if ( prod ) {                    x_AddProductIdQuals(prod, slot);                } else {                    x_AddQual(slot, new CFlatSeqIdQVal(id));                    if ( id.IsGi() ) {                        x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(id, true));                    }                }            }        } catch (CNotUnique&) {}        if ( !pseudo  &&  cfg.ShowTranscript() ) {            CSeqVector vec(feat.GetLocation(), ctx.GetScope());            vec.SetCoding(CBioseq_Handle::eCoding_Iupac);            string transcription;            vec.GetSeqData(0, vec.size(), transcription);            x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));        }        // intentional fall through    }    default:        if ( rna.CanGetExt()  &&  rna.GetExt().IsName() ) {            x_AddQual(eFQ_product, new CFlatStringQVal(rna.GetExt().GetName()));        }        break;    } // end of switch}void CFeatureItem::x_AddGeneQuals(const CSeq_feat& gene, CScope& scope) const{    _ASSERT(gene.GetData().Which() == CSeqFeatData::e_Gene);    x_AddQuals(gene.GetData().GetGene());    CConstRef<CSeq_feat> operon =        GetOverlappingOperon(gene.GetLocation(), scope);    if ( operon ) {        ITERATE (CSeq_feat::TQual, it, operon->GetQual()) {            if ( (*it)->CanGetQual()  &&  (*it)->GetQual() == "operon"  &&                 (*it)->CanGetVal() ) {                x_AddQual(eFQ_operon, new CFlatStringQVal((*it)->GetVal()));            }        }    }}void CFeatureItem::x_AddCdregionQuals(const CSeq_feat& cds, CBioseqContext& ctx, bool& pseudo, bool& had_prot_desc) const{    CScope& scope = ctx.GetScope();    const CFlatFileConfig& cfg = ctx.Config();        x_AddQuals(cds.GetData().GetCdregion());    if ( ctx.IsProt()  &&  IsMappedFromCDNA() ) {        x_AddQual(eFQ_coded_by, new CFlatSeqLocQVal(m_Feat->GetLocation()));    } else {        // protein qualifiers        if ( m_Feat->CanGetProduct() ) {            const CSeq_id* prot_id = 0;            // protein id            try {                prot_id = &GetId(m_Feat->GetProduct(), &scope);                CBioseq_Handle h = scope.GetBioseqHandle(*prot_id);                if (h) {                    prot_id = &GetId(h, eGetId_Best);                }            } catch (CException&) {                prot_id = 0;            }                        if ( prot_id != 0 ) {                CBioseq_Handle prot;                if ( !cfg.AlwaysTranslateCDS() ) {                    // by default only show /translation if product bioseq is within                    // entity, but flag can override and force far /translation                    prot = cfg.ShowFarTranslations() ?                         scope.GetBioseqHandle(*prot_id) :                         scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());                }                const CProt_ref* pref = 0;                if ( prot ) {                    // Add protein quals (comment, note, names ...)                     pref = x_AddProteinQuals(prot);                } else {                    x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(*prot_id));                    if ( prot_id->IsGi() ) {                        x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*prot_id, true));                    }                }                                // protein xref overrides names, but should not prevent /protein_id, etc.                const CProt_ref* p = m_Feat->GetProtXref();                if ( p != 0 ) {                    pref = p;                }                if ( pref != 0 ) {                    if ( !pref->GetName().empty() ) {                        CProt_ref::TName names = pref->GetName();                        x_AddQual(eFQ_cds_product, new CFlatStringQVal(names.front()));                        names.pop_front();                        if ( !names.empty() ) {                            x_AddQual(eFQ_prot_names, new CFlatStringListQVal(names));                        }                    }                    if ( pref->CanGetDesc() ) {                        x_AddQual(eFQ_prot_desc, new CFlatStringQVal(pref->GetDesc()));                        had_prot_desc = true;                    }                    if ( !pref->GetActivity().empty() ) {                        x_AddQual(eFQ_prot_activity,                            new CFlatStringListQVal(pref->GetActivity()));                    }                    if ( !pref->GetEc().empty() ) {                        x_AddQual(eFQ_prot_EC_number,                            new CFlatStringListQVal(pref->GetEc()));                    }                }                // translation                if ( !pseudo ) {                    string translation;                    if ( (!prot  &&  cfg.TranslateIfNoProduct())  ||                        cfg.AlwaysTranslateCDS() ) {                        CCdregion_translate::TranslateCdregion(translation, *m_Feat,                            scope);                    } else if ( prot ) {                        TSeqPos len = GetLength(cds.GetProduct(), &scope);                        if ( len > 0 ){                            CSeqVector seqv =                                 prot.GetSeqVector(CBioseq_Handle::eCoding_Ncbi);                            if ( cfg.IupacaaOnly() ) {                                seqv.SetCoding(CSeq_data::e_Iupacaa);                            } else {                                seqv.SetCoding(CSeq_data::e_Ncbieaa);                            }                            seqv.GetSeqData(0, seqv.size(), translation);                        }                    }                    if ( !translation.empty() ) {                        x_AddQual(eFQ_translation, new CFlatStringQVal(translation));                    } else {                        // !!! release mode error                    }                }            }        }    }}const CProt_ref* CFeatureItem::x_AddProteinQuals(CBioseq_Handle& prot) const{    _ASSERT(prot);    x_AddProductIdQuals(prot, eFQ_protein_id);    CSeqdesc_CI comm(prot, CSeqdesc::e_Comment, 1);    if ( comm  &&  !comm->GetComment().empty() ) {        x_AddQual(eFQ_prot_comment, new CFlatStringQVal(comm->GetComment()));    }    CSeqdesc_CI mi(prot, CSeqdesc::e_Molinfo);    if ( mi ) {        CMolInfo::TTech prot_tech = mi->GetMolinfo().GetTech();        if ( prot_tech >  CMolInfo::eTech_standard       &&             prot_tech != CMolInfo::eTech_concept_trans  &&             prot_tech != CMolInfo::eTech_concept_trans_a ) {            if ( !GetTechString(prot_tech).empty() ) {                x_AddQual(eFQ_prot_method,                     new CFlatStringQVal("Method: " + GetTechString(prot_tech)));            }        }    }    const CProt_ref* pref = 0;    CFeat_CI prot_feat(prot, 0, 0, CSeqFeatData::e_Prot);    if ( prot_feat ) {        pref = &(prot_feat->GetData().GetProt());        if ( prot_feat->IsSetComment() ) {            if ( pref->GetProcessed() == CProt_ref::eProcessed_not_set  ||                 pref->GetProcessed() == CProt_ref::eProcessed_preprotein ) {                x_AddQual(eFQ_prot_note,                     new CFlatStringQVal(prot_feat->GetComment()));            }        }    }    bool maploc = false, fig = false;    for ( CSeqdesc_CI it(prot, CSeqdesc::e_Pub); it; ++it ) {        const CPubdesc& pub = it->GetPub();        if ( !maploc  &&  pub.CanGetMaploc() ) {            string mapstr = "Map location " + pub.GetMaploc();            RemovePeriodFromEnd(mapstr);            x_AddQual(eFQ_maploc, new CFlatStringQVal(mapstr));            maploc = true;        }        if ( !fig  &&  pub.CanGetFig() ) {            string figstr = "This sequence comes from " + pub.GetFig();            RemovePeriodFromEnd(figstr);            x_AddQual(eFQ_figure, new CFlatStringQVal(figstr));            fig = true;        }    }    return pref;}static const string s_ValidExceptionText[] = {  "RNA editing",  "reasons given in citation"};static const string s_ValidRefSeqExceptionText[] = {    "RNA editing",    "alternative processing",    "alternative start codon",    "artificial frameshift",    "modified codon recognition",    "nonconsensus splice site",    "rearrangement required for product",    "reasons given in citation",    "ribosomal slippage",    "trans-splicing",    "unclassified transcription discrepancy",    "unclassified translation discrepancy"};static bool s_IsValidExceptionText(const string& text){    CStaticArraySet<string> legal_text(s_ValidExceptionText,        sizeof(s_ValidExceptionText));    return legal_text.find(text) != legal_text.end();}static bool s_IsValidRefSeqExceptionText(const string& text){    CStaticArraySet<string> legal_refseq_text(s_ValidRefSeqExceptionText,        sizeof(s_ValidRefSeqExceptionText));    return legal_refseq_text.find(text) != legal_refseq_text.end();}static void s_ParseException(const string& original, string& except, string& note, CBioseqContext& ctx){    if ( original.empty() ) {        return;    }    except.erase();    note.erase();    if ( !ctx.Config().DropIllegalQuals() ) {        except = original;        return;    }    list<string> l;    NStr::Split(original, ",", l);    NON_CONST_ITERATE (list<string>, it, l) {        NStr::TruncateSpaces(*it);    }    list<string> except_list, note_list;    ITERATE (list<string>, it, l) {        if ( s_IsValidExceptionText(*it)  ||

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?