feature_item.cpp
来自「ncbi源码」· C++ 代码 · 共 2,018 行 · 第 1/5 页
CPP
2,018 行
"tRNA-Gly", "tRNA-His", "tRNA-Ile", "tRNA-Lys", "tRNA-Leu", "tRNA-Met", "tRNA-Asn", "tRNA-Pro", "tRNA-Gln", "tRNA-Arg", "tRNA-Ser", "tRNA-Thr", "tRNA-Sec", "tRNA-Val", "tRNA-Trp", "tRNA-OTHER", "tRNA-Tyr", "tRNA-Glx", "tRNA-TERM"};static const string& s_AaName(int aa){ int shift = 0, idx = 255; if ( aa <= 74 ) { shift = 0; } else if (aa > 79) { shift = 2; } else { shift = 1; } if (aa != '*') { idx = aa - (64 + shift); } else { idx = 25; } if ( idx > 0 && idx < 26 ) { return s_TrnaList [idx]; } return kEmptyStr;}static int s_ToIupacaa(int aa){ vector<char> n(1, static_cast<char>(aa)); vector<char> i; CSeqConvert::Convert(n, CSeqUtil::e_Ncbieaa, 0, 1, i, CSeqUtil::e_Iupacaa); return i.front();}void CFeatureItem::x_AddRnaQuals(const CSeq_feat& feat, CBioseqContext& ctx, bool& pseudo) const{ const CRNA_ref& rna = feat.GetData().GetRna(); const CFlatFileConfig& cfg = ctx.Config(); if ( rna.CanGetPseudo() && rna.GetPseudo() ) { pseudo = true; } CRNA_ref::TType rna_type = rna.CanGetType() ? rna.GetType() : CRNA_ref::eType_unknown; switch ( rna_type ) { case CRNA_ref::eType_tRNA: { if ( rna.CanGetExt() ) { const CRNA_ref::C_Ext& ext = rna.GetExt(); switch ( ext.Which() ) { case CRNA_ref::C_Ext::e_Name: { // amino acid could not be parsed into structured form if ( !cfg.DropIllegalQuals() ) { x_AddQual(eFQ_product, new CFlatStringQVal(ext.GetName())); } else { x_AddQual(eFQ_product, new CFlatStringQVal("tRNA-OTHER")); } break; } case CRNA_ref::C_Ext::e_TRNA: { const CTrna_ext& trna = ext.GetTRNA(); int aa = 0; if ( trna.CanGetAa() && trna.GetAa().IsNcbieaa() ) { aa = trna.GetAa().GetNcbieaa(); } else { // !!! return; } if ( cfg.IupacaaOnly() ) { aa = s_ToIupacaa(aa); } const string& aa_str = s_AaName(aa); if ( !aa_str.empty() ) { x_AddQual(eFQ_product, new CFlatStringQVal(aa_str)); if ( trna.CanGetAnticodon() && !aa_str.empty() ) { x_AddQual(eFQ_anticodon, new CFlatAnticodonQVal(trna.GetAnticodon(), aa_str.substr(5, NPOS))); } } if ( trna.IsSetCodon() ) { x_AddQual(eFQ_trna_codons, new CFlatTrnaCodonsQVal(trna)); } break; } default: break; } // end of internal switch } break; } case CRNA_ref::eType_mRNA: { try { if ( feat.CanGetProduct() ) { const CSeq_id& id = GetId(feat.GetProduct(), &ctx.GetScope()); CBioseq_Handle prod = ctx.GetScope().GetBioseqHandleFromTSE(id, ctx.GetHandle()); EFeatureQualifier slot = (ctx.IsRefSeq() || cfg.IsModeDump() || cfg.IsModeGBench()) ? eFQ_transcript_id : eFQ_transcript_id_note; if ( prod ) { x_AddProductIdQuals(prod, slot); } else { x_AddQual(slot, new CFlatSeqIdQVal(id)); if ( id.IsGi() ) { x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(id, true)); } } } } catch (CNotUnique&) {} if ( !pseudo && cfg.ShowTranscript() ) { CSeqVector vec(feat.GetLocation(), ctx.GetScope()); vec.SetCoding(CBioseq_Handle::eCoding_Iupac); string transcription; vec.GetSeqData(0, vec.size(), transcription); x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription)); } // intentional fall through } default: if ( rna.CanGetExt() && rna.GetExt().IsName() ) { x_AddQual(eFQ_product, new CFlatStringQVal(rna.GetExt().GetName())); } break; } // end of switch}void CFeatureItem::x_AddGeneQuals(const CSeq_feat& gene, CScope& scope) const{ _ASSERT(gene.GetData().Which() == CSeqFeatData::e_Gene); x_AddQuals(gene.GetData().GetGene()); CConstRef<CSeq_feat> operon = GetOverlappingOperon(gene.GetLocation(), scope); if ( operon ) { ITERATE (CSeq_feat::TQual, it, operon->GetQual()) { if ( (*it)->CanGetQual() && (*it)->GetQual() == "operon" && (*it)->CanGetVal() ) { x_AddQual(eFQ_operon, new CFlatStringQVal((*it)->GetVal())); } } }}void CFeatureItem::x_AddCdregionQuals(const CSeq_feat& cds, CBioseqContext& ctx, bool& pseudo, bool& had_prot_desc) const{ CScope& scope = ctx.GetScope(); const CFlatFileConfig& cfg = ctx.Config(); x_AddQuals(cds.GetData().GetCdregion()); if ( ctx.IsProt() && IsMappedFromCDNA() ) { x_AddQual(eFQ_coded_by, new CFlatSeqLocQVal(m_Feat->GetLocation())); } else { // protein qualifiers if ( m_Feat->CanGetProduct() ) { const CSeq_id* prot_id = 0; // protein id try { prot_id = &GetId(m_Feat->GetProduct(), &scope); CBioseq_Handle h = scope.GetBioseqHandle(*prot_id); if (h) { prot_id = &GetId(h, eGetId_Best); } } catch (CException&) { prot_id = 0; } if ( prot_id != 0 ) { CBioseq_Handle prot; if ( !cfg.AlwaysTranslateCDS() ) { // by default only show /translation if product bioseq is within // entity, but flag can override and force far /translation prot = cfg.ShowFarTranslations() ? scope.GetBioseqHandle(*prot_id) : scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle()); } const CProt_ref* pref = 0; if ( prot ) { // Add protein quals (comment, note, names ...) pref = x_AddProteinQuals(prot); } else { x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(*prot_id)); if ( prot_id->IsGi() ) { x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*prot_id, true)); } } // protein xref overrides names, but should not prevent /protein_id, etc. const CProt_ref* p = m_Feat->GetProtXref(); if ( p != 0 ) { pref = p; } if ( pref != 0 ) { if ( !pref->GetName().empty() ) { CProt_ref::TName names = pref->GetName(); x_AddQual(eFQ_cds_product, new CFlatStringQVal(names.front())); names.pop_front(); if ( !names.empty() ) { x_AddQual(eFQ_prot_names, new CFlatStringListQVal(names)); } } if ( pref->CanGetDesc() ) { x_AddQual(eFQ_prot_desc, new CFlatStringQVal(pref->GetDesc())); had_prot_desc = true; } if ( !pref->GetActivity().empty() ) { x_AddQual(eFQ_prot_activity, new CFlatStringListQVal(pref->GetActivity())); } if ( !pref->GetEc().empty() ) { x_AddQual(eFQ_prot_EC_number, new CFlatStringListQVal(pref->GetEc())); } } // translation if ( !pseudo ) { string translation; if ( (!prot && cfg.TranslateIfNoProduct()) || cfg.AlwaysTranslateCDS() ) { CCdregion_translate::TranslateCdregion(translation, *m_Feat, scope); } else if ( prot ) { TSeqPos len = GetLength(cds.GetProduct(), &scope); if ( len > 0 ){ CSeqVector seqv = prot.GetSeqVector(CBioseq_Handle::eCoding_Ncbi); if ( cfg.IupacaaOnly() ) { seqv.SetCoding(CSeq_data::e_Iupacaa); } else { seqv.SetCoding(CSeq_data::e_Ncbieaa); } seqv.GetSeqData(0, seqv.size(), translation); } } if ( !translation.empty() ) { x_AddQual(eFQ_translation, new CFlatStringQVal(translation)); } else { // !!! release mode error } } } } }}const CProt_ref* CFeatureItem::x_AddProteinQuals(CBioseq_Handle& prot) const{ _ASSERT(prot); x_AddProductIdQuals(prot, eFQ_protein_id); CSeqdesc_CI comm(prot, CSeqdesc::e_Comment, 1); if ( comm && !comm->GetComment().empty() ) { x_AddQual(eFQ_prot_comment, new CFlatStringQVal(comm->GetComment())); } CSeqdesc_CI mi(prot, CSeqdesc::e_Molinfo); if ( mi ) { CMolInfo::TTech prot_tech = mi->GetMolinfo().GetTech(); if ( prot_tech > CMolInfo::eTech_standard && prot_tech != CMolInfo::eTech_concept_trans && prot_tech != CMolInfo::eTech_concept_trans_a ) { if ( !GetTechString(prot_tech).empty() ) { x_AddQual(eFQ_prot_method, new CFlatStringQVal("Method: " + GetTechString(prot_tech))); } } } const CProt_ref* pref = 0; CFeat_CI prot_feat(prot, 0, 0, CSeqFeatData::e_Prot); if ( prot_feat ) { pref = &(prot_feat->GetData().GetProt()); if ( prot_feat->IsSetComment() ) { if ( pref->GetProcessed() == CProt_ref::eProcessed_not_set || pref->GetProcessed() == CProt_ref::eProcessed_preprotein ) { x_AddQual(eFQ_prot_note, new CFlatStringQVal(prot_feat->GetComment())); } } } bool maploc = false, fig = false; for ( CSeqdesc_CI it(prot, CSeqdesc::e_Pub); it; ++it ) { const CPubdesc& pub = it->GetPub(); if ( !maploc && pub.CanGetMaploc() ) { string mapstr = "Map location " + pub.GetMaploc(); RemovePeriodFromEnd(mapstr); x_AddQual(eFQ_maploc, new CFlatStringQVal(mapstr)); maploc = true; } if ( !fig && pub.CanGetFig() ) { string figstr = "This sequence comes from " + pub.GetFig(); RemovePeriodFromEnd(figstr); x_AddQual(eFQ_figure, new CFlatStringQVal(figstr)); fig = true; } } return pref;}static const string s_ValidExceptionText[] = { "RNA editing", "reasons given in citation"};static const string s_ValidRefSeqExceptionText[] = { "RNA editing", "alternative processing", "alternative start codon", "artificial frameshift", "modified codon recognition", "nonconsensus splice site", "rearrangement required for product", "reasons given in citation", "ribosomal slippage", "trans-splicing", "unclassified transcription discrepancy", "unclassified translation discrepancy"};static bool s_IsValidExceptionText(const string& text){ CStaticArraySet<string> legal_text(s_ValidExceptionText, sizeof(s_ValidExceptionText)); return legal_text.find(text) != legal_text.end();}static bool s_IsValidRefSeqExceptionText(const string& text){ CStaticArraySet<string> legal_refseq_text(s_ValidRefSeqExceptionText, sizeof(s_ValidRefSeqExceptionText)); return legal_refseq_text.find(text) != legal_refseq_text.end();}static void s_ParseException(const string& original, string& except, string& note, CBioseqContext& ctx){ if ( original.empty() ) { return; } except.erase(); note.erase(); if ( !ctx.Config().DropIllegalQuals() ) { except = original; return; } list<string> l; NStr::Split(original, ",", l); NON_CONST_ITERATE (list<string>, it, l) { NStr::TruncateSpaces(*it); } list<string> except_list, note_list; ITERATE (list<string>, it, l) { if ( s_IsValidExceptionText(*it) ||
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?