feature_item.cpp

来自「ncbi源码」· C++ 代码 · 共 2,018 行 · 第 1/5 页

CPP
2,018
字号
             s_IsValidRefSeqExceptionText(*it) ) {            except_list.push_back(*it);        } else {            note_list.push_back(*it);        }    }    except = NStr::Join(except_list, ", ");    note = NStr::Join(note_list, ", ");}void CFeatureItem::x_AddExceptionQuals(CBioseqContext& ctx) const{    string except_text, note_text;    const CFlatFileConfig& cfg = ctx.Config();    if ( m_Feat->CanGetExcept_text()  &&  !m_Feat->GetExcept_text().empty() ) {        except_text = m_Feat->GetExcept_text();    }        // /exception currently legal only on cdregion    if ( m_Feat->GetData().IsCdregion()  ||  !cfg.DropIllegalQuals() ) {        // exception flag is set, but no exception text supplied        if ( except_text.empty()  &&             m_Feat->CanGetExcept()  &&  m_Feat->GetExcept() ) {            // if no /exception text, use text in comment, remove from /note            if ( x_HasQual(eFQ_seqfeat_note) ) {                const CFlatStringQVal* qval =                     dynamic_cast<const CFlatStringQVal*>(x_GetQual(eFQ_seqfeat_note).first->second.GetPointerOrNull());                if ( qval != 0 ) {                    const string& seqfeat_note = qval->GetValue();                    if ( !cfg.DropIllegalQuals()  ||                        s_IsValidExceptionText(seqfeat_note) ) {                        except_text = seqfeat_note;                        x_RemoveQuals(eFQ_seqfeat_note);                    }                }            } else {                except_text = "No explanation supplied";            }            // if DropIllegalQuals is set, check CDS list here as well            if ( cfg.DropIllegalQuals()  &&                 !s_IsValidExceptionText(except_text) ) {                except_text.erase();            }        }                if ( cfg.DropIllegalQuals() ) {            string except = except_text;            s_ParseException(except, except_text, note_text, ctx);        }    } else if ( !except_text.empty() ) {        note_text = except_text;        except_text.erase();    }    if ( !except_text.empty() ) {        x_AddQual(eFQ_exception, new CFlatStringQVal(except_text));    }    if ( !note_text.empty() ) {        x_AddQual(eFQ_exception_note, new CFlatStringQVal(note_text));    }}void CFeatureItem::x_AddProductIdQuals(CBioseq_Handle& prod, EFeatureQualifier slot) const{    if ( !prod ) {        return;    }    const CBioseq::TId& ids = prod.GetBioseqCore()->GetId();    if ( ids.empty() ) {        return;    }    // the product id (transcript or protein) is set to the best id    const CSeq_id* best = FindBestChoice(ids, CSeq_id::Score);    if ( best != 0 ) {        switch ( best->Which() ) {        case CSeq_id::e_Genbank:        case CSeq_id::e_Embl:        case CSeq_id::e_Ddbj:        case CSeq_id::e_Gi:        case CSeq_id::e_Other:        case CSeq_id::e_General:        case CSeq_id::e_Tpg:        case CSeq_id::e_Tpe:        case CSeq_id::e_Tpd:            // these are the only types we allow as product ids            break;        default:            best = 0;        }    }    if ( best == 0 ) {        return;    }    x_AddQual(slot, new CFlatSeqIdQVal(*best));    ITERATE (CBioseq::TId, it, ids) {        const CSeq_id& id = **it;        CSeq_id::E_Choice choice = id.Which();        if ( choice != CSeq_id::e_Genbank  &&             choice != CSeq_id::e_Embl  &&             choice != CSeq_id::e_Ddbj  &&             choice != CSeq_id::e_Gi  &&             choice != CSeq_id::e_Other  &&             choice != CSeq_id::e_General  &&             choice != CSeq_id::e_Tpg  &&             choice != CSeq_id::e_Tpe  &&             choice != CSeq_id::e_Tpd ) {            continue;        }        if ( &id == best  &&  !id.IsGi() ) {            // we've already done 'best'.             continue;        }        if ( id.IsGeneral() ) {            const CDbtag& dbt = id.GetGeneral();            if ( dbt.GetType() != CDbtag::eDbtagType_PID ) {                if ( ids.size() == 1 ) {                    x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(id, id.IsGi()));                }            }        } else {            x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(id, id.IsGi()));        }    }}void CFeatureItem::x_AddRegionQuals(const CSeq_feat& feat, CBioseqContext& ctx) const{    const string& region = feat.GetData().GetRegion();    if ( region.empty() ) {        return;    }    if ( ctx.IsProt()  &&         feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_region ) {        x_AddQual(eFQ_region_name, new CFlatStringQVal(region));    } else {        x_AddQual(eFQ_region, new CFlatStringQVal("Region: " + region));    }}void CFeatureItem::x_AddExtQuals(const CSeq_feat::TExt& ext) const{    ITERATE (CUser_object::TData, it, ext.GetData()) {        const CUser_field& field = **it;        if ( !field.CanGetData() ) {            continue;        }        if ( field.GetData().IsObject() ) {            const CUser_object& obj = field.GetData().GetObject();            x_AddExtQuals(obj);            return;        } else if ( field.GetData().IsObjects() ) {            ITERATE (CUser_field::C_Data::TObjects, o, field.GetData().GetObjects()) {                x_AddExtQuals(**o);            }            return;        }    }    if ( ext.CanGetType()  &&  ext.GetType().IsStr() ) {        const string& oid = ext.GetType().GetStr();        if ( oid == "ModelEvidence" ) {            x_AddQual(eFQ_modelev, new CFlatModelEvQVal(ext));        } else if ( oid == "GeneOntology" ) {            x_AddGoQuals(ext);        }    }}void CFeatureItem::x_AddGoQuals(const CUser_object& uo) const{    ITERATE (CUser_object::TData, uf_it, uo.GetData()) {        const CUser_field& field = **uf_it;        if ( field.IsSetLabel()  &&  field.GetLabel().IsStr() ) {            const string& label = field.GetLabel().GetStr();            EFeatureQualifier slot = eFQ_none;            if ( label == "Process" ) {                slot = eFQ_go_process;            } else if ( label == "Component" ) {                               slot = eFQ_go_component;            } else if ( label == "Function" ) {                slot = eFQ_go_function;            }            if ( slot == eFQ_none ) {                continue;            }            ITERATE (CUser_field::TData::TFields, it, field.GetData().GetFields()) {                if ( (*it)->GetData().IsFields() ) {                    x_AddQual(slot, new CFlatGoQVal(**it));                }            }        }    }}void CFeatureItem::x_AddQuals(const CGene_ref& gene) const{    const string* locus = (gene.IsSetLocus()  &&  !gene.GetLocus().empty()) ?        &gene.GetLocus() : 0;        const string* desc = (gene.IsSetDesc() &&  !gene.GetDesc().empty()) ?        &gene.GetDesc() : 0;    const CGene_ref::TSyn* syn = (gene.IsSetSyn()  &&  !gene.GetSyn().empty()) ?        &gene.GetSyn() : 0;    const string* locus_tag =         (gene.IsSetLocus_tag()  &&  !gene.GetLocus_tag().empty()) ?        &gene.GetLocus_tag() : 0;    if ( locus ) {        x_AddQual(eFQ_gene, new CFlatStringQVal(*locus));        if ( locus_tag ) {            x_AddQual(eFQ_locus_tag, new CFlatStringQVal(*locus_tag));        }        if ( desc ) {            x_AddQual(eFQ_gene_desc, new CFlatStringQVal(*desc));        }        if ( syn ) {            x_AddQual(eFQ_gene_syn, new CFlatStringListQVal(*syn));        }    } else if ( locus_tag ) {        x_AddQual(eFQ_locus_tag, new CFlatStringQVal(*locus_tag));        if ( desc ) {            x_AddQual(eFQ_gene_desc, new CFlatStringQVal(*desc));        }        if ( syn ) {            x_AddQual(eFQ_gene_syn, new CFlatStringListQVal(*syn));        }    } else if ( desc ) {        x_AddQual(eFQ_gene, new CFlatStringQVal(*desc));        if ( syn ) {            x_AddQual(eFQ_gene_syn, new CFlatStringListQVal(*syn));        }    } else if ( syn ) {        CGene_ref::TSyn syns = *syn;        x_AddQual(eFQ_gene, new CFlatStringQVal(syns.front()));        syns.pop_front();        if ( !syns.empty() ) {            x_AddQual(eFQ_gene_syn, new CFlatStringListQVal(syns));        }    }    if ( gene.IsSetAllele()  &&  !gene.GetAllele().empty() ) {        x_AddQual(eFQ_gene_allele, new CFlatStringQVal(gene.GetAllele()));    }    if ( gene.IsSetMaploc()  &&  !gene.GetMaploc().empty() ) {        x_AddQual(eFQ_gene_map, new CFlatStringQVal(gene.GetMaploc()));    }    if ( gene.IsSetDb() ) {        x_AddQual(eFQ_gene_xref, new CFlatXrefQVal(gene.GetDb()));    }    if ( gene.GetPseudo() ) {        x_AddQual(eFQ_pseudo, new CFlatBoolQVal(true));    }}void CFeatureItem::x_AddQuals(const CCdregion& cds) const{    CBioseqContext& ctx = *GetContext();    CScope& scope = ctx.GetScope();    CCdregion::TFrame frame = cds.GetFrame();    // code break    if ( cds.IsSetCode_break() ) {        // set selenocysteine quals        ITERATE (CCdregion::TCode_break, it, cds.GetCode_break()) {            if ( !(*it)->IsSetAa() ) {                continue;            }            const CCode_break::C_Aa& cbaa = (*it)->GetAa();            bool is_U = false;            switch ( cbaa.Which() ) {            case CCode_break::C_Aa::e_Ncbieaa:                is_U = (cbaa.GetNcbieaa() == 'U');                break;            case CCode_break::C_Aa::e_Ncbi8aa:                is_U = (cbaa.GetNcbieaa() == 'U');            case CCode_break::C_Aa::e_Ncbistdaa:                is_U = (cbaa.GetNcbieaa() == 24);                break;            default:                break;            }                        if ( is_U ) {                if ( ctx.Config().SelenocysteineToNote() ) {                    x_AddQual(eFQ_selenocysteine_note,                        new CFlatStringQVal("selenocysteine"));                } else {                    x_AddQual(eFQ_selenocysteine, new CFlatBoolQVal(true));                }                break;            }        }    }    // translation table    if ( cds.CanGetCode() ) {        int gcode = cds.GetCode().GetId();        if ( gcode > 0  &&  gcode != 255 ) {            // show code 1 only in GBSeq format.            if ( ctx.Config().IsFormatGBSeq()  ||  gcode > 1 ) {                x_AddQual(eFQ_transl_table, new CFlatIntQVal(gcode));            }        }    }    if ( !(ctx.IsProt()  && IsMappedFromCDNA()) ) {        // frame        if ( frame == CCdregion::eFrame_not_set ) {            frame = CCdregion::eFrame_one;        }        x_AddQual(eFQ_codon_start, new CFlatIntQVal(frame));        // translation exception        if ( cds.IsSetCode_break() ) {            x_AddQual(eFQ_transl_except,                 new CFlatCodeBreakQVal(cds.GetCode_break()));        }                // protein conflict        static const string conflic_msg =             "Protein sequence is in conflict with the conceptual translation";        bool has_prot = m_Feat->IsSetProduct()  &&                        (GetLength(m_Feat->GetProduct(), &scope) > 0);        if ( cds.CanGetConflict()  &&  cds.GetConflict()  &&  has_prot ) {            x_AddQual(eFQ_prot_conflict, new CFlatStringQVal(conflic_msg));        }    } else {        // frame        if ( frame > 1 ) {            x_AddQual(eFQ_codon_start, new CFlatIntQVal(frame));        }    }}void CFeatureItem::x_AddProtQuals(const CSeq_feat& feat, CBioseqContext& ctx,  bool& pseudo, bool& had_prot_desc, string& precursor_comment) const{    const CProt_ref& pref = feat.GetData().GetProt();    CProt_ref::TProcessed processed = pref.GetProcessed();    if ( ctx.IsNuc()  ||  (ctx.IsProt()  &&  !IsMappedFromProt()) ) {        if ( pref.IsSetName()  &&  !pref.GetName().empty() ) {            CProt_ref::TName names = pref.GetName();            x_AddQual(eFQ_product, new CFlatStringQVal(names.front()));            names.pop_front();            if ( !names.empty() ) {                x_AddQual(eFQ_prot_names, new CFlatStringListQVal(names));            }        }        if ( pref.CanGetDesc()  &&  !pref.GetDesc().empty() ) {            if ( !ctx.IsProt() ) {                x_AddQual(eFQ_prot_desc, new CFlatStringQVal(pref.GetDesc()));                had_prot_desc = true;            } else {                x_AddQual(eFQ_prot_name, new CFlatStringQVal(pref.GetDesc()));            }        }        if ( pref.IsSetActivity()  &&  !pref.GetActivity().empty() ) {            if ( ctx.IsNuc()  ||  processed != CProt_ref::eProcessed_mature ) {                x_AddQual(eFQ_prot_activity,                     new CFlatStringListQVal(pref.GetActivity()));            }        }        if ( feat.CanGetProduct() ) {            CBioseq_Handle prot =                 ctx.GetScope().GetBioseqHandle(feat.GetProduct());            if ( prot ) {                x_AddProductIdQuals(prot, eFQ_protein_id);            } else {                try {                    const CSeq_id& prod_id =                         GetId(feat.GetProduct(), &ctx.GetScope());                    if ( ctx.IsRefSeq()  ||  !ctx.Config().ForGBRelease() ) {                        x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(prod_id));                    }                } catch (CNotUnique&) {}            }        }    } else { // protein feature on subpeptide bioseq        x_AddQual(eFQ_derived_from, new CFlatSeqLocQVal(m_Feat->GetLocation()));        // check precursor_comment        CConstRef<CSeq_feat> prot =             GetBestOverlappingFeat(m_Feat->GetProduct(),                                   CSeqFeatData::e_Prot,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?