seqtitle.cpp

来自「ncbi源码」· C++ 代码 · 共 1,141 行 · 第 1/3 页

CPP
1,141
字号
                    break;                }            }        }        if (is_draft  &&  title.find("WORKING DRAFT") == NPOS) {            suffix = ", WORKING DRAFT SEQUENCE";        } else if (!is_draft  &&  !cancelled                   &&  title.find("SEQUENCING IN") == NPOS) {            suffix = ", *** SEQUENCING IN PROGRESS ***";        }                string un;        if (tech == CMolInfo::eTech_htgs_1) {            un = "un";        }        if (core->GetInst().GetRepr() == CSeq_inst::eRepr_delta) {            // We need the full bioseq here...            const CBioseq& seq = hnd.GetBioseq();            unsigned int pieces = 1;            ITERATE (CDelta_ext::Tdata, it,                     seq.GetInst().GetExt().GetDelta().Get()) {                switch ((*it)->Which()) {                case CDelta_seq::e_Loc:                    if ( (*it)->GetLoc().IsNull() ) {                        pieces++;                    }                    break;                case CDelta_seq::e_Literal:                    if ( !(*it)->GetLiteral().IsSetSeq_data() ) {                        pieces++;                    }                    break;                default:                    break;                }            }            if (pieces == 1) {                // suffix += (", 1 " + un + "ordered piece");            } else {                suffix += (", " + NStr::IntToString(pieces)                           + ' ' + un + "ordered pieces");            }        } else {            // suffix += ", in " + un + "ordered pieces";        }        break;    }    case CMolInfo::eTech_htgs_3:        if (title.find("complete sequence") == NPOS) {            suffix = ", complete sequence";        }        break;    case CMolInfo::eTech_est:        if (title.find("mRNA sequence") == NPOS) {            suffix = ", mRNA sequence";        }        break;    case CMolInfo::eTech_sts:        if (title.find("sequence tagged site") == NPOS) {            suffix = ", sequence tagged site";        }        break;    case CMolInfo::eTech_survey:        if (title.find("genomic survey sequence") == NPOS) {            suffix = ", genomic survey sequence";        }        break;    case CMolInfo::eTech_wgs:        if (wgs_master) {            if (title.find("whole genome shotgun sequencing project") == NPOS){                suffix = ", whole genome shotgun sequencing project";            }                    } else {            if (title.find("whole genome shotgun sequence") == NPOS) {                suffix = ", whole genome shotgun sequence";            }        }        break;    }    if (flags & fGetTitle_Organism) {        CConstRef<COrg_ref> org;        if (source) {            org = &source->GetOrg();        } else {            CSeqdesc_CI it(hnd, CSeqdesc::e_Org);            for (;  it;  ++it) {                org = &it->GetOrg();                BREAK(it);            }        }        if (organism.empty()  &&  org.NotEmpty()  &&  org->IsSetTaxname()) {            organism = org->GetTaxname();        }        if ( !organism.empty()  &&  title.find(organism) == NPOS) {            suffix += " [" + organism + ']';        }    }    return prefix + title + suffix;}static string s_DescribeClones(const string& clone){    SIZE_TYPE count = 1;    for (SIZE_TYPE pos = clone.find(';');  pos != NPOS;         pos = clone.find(';', pos + 1)) {        ++count;    }    if (count > 3) {        return ", " + NStr::IntToString(count) + " clones,";    } else {        return " clone " + clone;    }}static string s_TitleFromBioSource(const CBioSource& source,                                   const string&     suffix){    string          name, chromosome, clone, map_, strain, sfx;    const COrg_ref& org = source.GetOrg();    if (org.IsSetTaxname()) {        name = org.GetTaxname();    }    if (source.IsSetSubtype()) {        ITERATE (CBioSource::TSubtype, it, source.GetSubtype()) {            switch ((*it)->GetSubtype()) {            case CSubSource::eSubtype_chromosome:                chromosome = " chromosome " + (*it)->GetName();                break;            case CSubSource::eSubtype_clone:                clone = s_DescribeClones((*it)->GetName());                break;            case CSubSource::eSubtype_map:                map_ = " map " + (*it)->GetName();                break;            }        }    }    if (org.IsSetOrgname()  &&  org.GetOrgname().IsSetMod()) {        ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) {            if ((*it)->GetSubtype() == COrgMod::eSubtype_strain                &&  !NStr::EndsWith(name,                 (*it)->GetSubname(), NStr::eNocase)) {                strain = " strain " + (*it)->GetSubname();            }        }    }    if (suffix.size() > 0) {        sfx = ' ' + suffix;    }    string title = NStr::TruncateSpaces(name + chromosome + clone + map_                                        + strain + sfx);    if (islower(title[0])) {        title[0] = toupper(title[0]);    }    return title;}static string x_TitleFromChromosome(const CBioSource& source,                                    const CMolInfo&   mol_info){    string name, chromosome, segment, plasmid_name, orgnl;    string seq_tag, gen_tag;    bool   is_plasmid = false, is_virus = false;    if (source.GetOrg().IsSetTaxname()) {        name = source.GetOrg().GetTaxname();    } else {        return kEmptyStr;    }    string lc_name = name;    NStr::ToLower(lc_name);    if (lc_name.find("virus") != NPOS) {        is_virus = true;    }    if (source.IsSetSubtype()) {        ITERATE (CBioSource::TSubtype, it, source.GetSubtype()) {            switch ((*it)->GetSubtype()) {            case CSubSource::eSubtype_chromosome:                chromosome = (*it)->GetName();                break;            case CSubSource::eSubtype_segment:                segment = (*it)->GetName();                break;            case CSubSource::eSubtype_plasmid_name:            {                plasmid_name = (*it)->GetName();                string lc_plasmid = plasmid_name;                NStr::ToLower(lc_plasmid);                if (lc_plasmid.find("plasmid") == NPOS                    &&  lc_plasmid.find("element") == NPOS) {                    plasmid_name = "plasmid " + plasmid_name;                }                break;            }            }        }    }    switch (source.GetGenome()) {        // unknown, genomic    case CBioSource::eGenome_chloroplast:  orgnl = "chloroplast";   break;    case CBioSource::eGenome_chromoplast:  orgnl = "chromoplast";   break;    case CBioSource::eGenome_kinetoplast:  orgnl = "kinetoplast";   break;    case CBioSource::eGenome_mitochondrion:        orgnl = plasmid_name.empty() ? "mitochondrion" : "mitochondrial";        break;    case CBioSource::eGenome_plastid:      orgnl = "plastid";       break;    case CBioSource::eGenome_macronuclear: orgnl = "macronuclear";  break;    case CBioSource::eGenome_extrachrom:   orgnl = "extrachromosomal"; break;    case CBioSource::eGenome_plasmid:        orgnl = "plasmid";        is_plasmid = true;        break;        // transposon, insertion-seq    case CBioSource::eGenome_cyanelle:     orgnl = "cyanelle";      break;    case CBioSource::eGenome_proviral:        if (!is_virus) {            orgnl = plasmid_name.empty() ? "provirus" : "proviral";        }        break;    case CBioSource::eGenome_virion:        if (!is_virus) {            orgnl = "virion";        }        break;    case CBioSource::eGenome_nucleomorph:  orgnl = "nucleomorph";   break;    case CBioSource::eGenome_apicoplast:   orgnl = "apicoplast";    break;    case CBioSource::eGenome_leucoplast:   orgnl = "leucoplast";    break;    case CBioSource::eGenome_proplastid:   orgnl = "protoplast";    break;        // endogenous-virus    }    switch (mol_info.GetCompleteness()) {    case CMolInfo::eCompleteness_partial:    case CMolInfo::eCompleteness_no_left:    case CMolInfo::eCompleteness_no_right:    case CMolInfo::eCompleteness_no_ends:        seq_tag = ", partial sequence";        gen_tag = ", genome";        break;    default:        seq_tag = ", complete sequence";        gen_tag = ", complete genome";        break;    }    if (lc_name.find("plasmid") != NPOS) {        return name + seq_tag;            } else if (is_plasmid) {        if (plasmid_name.empty()) {            return name + " unnamed plasmid" + seq_tag;        } else {            return name + ' ' + plasmid_name + seq_tag;        }    } else if ( !plasmid_name.empty() ) {        if (orgnl.empty()) {            return name + ' ' + plasmid_name + seq_tag;        } else {            return name + ' ' + orgnl + ' ' + plasmid_name + seq_tag;        }    } else if ( !orgnl.empty() ) {        if ( chromosome.empty() ) {            return name + ' ' + orgnl + gen_tag;        } else {            return name + ' ' + orgnl + " chromosome " + chromosome + seq_tag;        }    } else if ( !segment.empty() ) {        if (segment.find("DNA") == NPOS  &&  segment.find("RNA") == NPOS            &&  segment.find("segment") == NPOS            &&  segment.find("Segment") == NPOS) {            return name + " segment " + segment + seq_tag;        } else {            return name + ' ' + segment + seq_tag;        }    } else if ( !chromosome.empty() ) {        return name + " chromosome " + chromosome + seq_tag;    } else {        return name + gen_tag;    }}static string s_TitleFromChromosome(const CBioSource& source,                                    const CMolInfo&   mol_info){    string result = x_TitleFromChromosome(source, mol_info);    result = NStr::Replace(result, "Plasmid", "plasmid");    result = NStr::Replace(result, "Element", "element");    if (!result.empty()) {        result[0] = toupper(result[0]);    }    return result;}static CConstRef<CSeq_feat> s_FindLongestFeature(const CSeq_loc& location,                                                 CScope& scope,                                                 CSeqFeatData::E_Choice type,                                                 CFeat_CI::EFeat_Location lt                                                   = CFeat_CI::e_Location){    CConstRef<CSeq_feat> result;    TSeqPos best_length = 0;    CFeat_CI it(scope, location, type, SAnnotSelector::eOverlap_Intervals,                SAnnotSelector::eResolve_TSE, lt);    for (;  it;  ++it) {        if (it->GetLocation().IsWhole()) {            // kludge; length only works on a Seq-loc of type "whole"            // if its Seq-id points to an object manager, which may not            // be the case here.            result = &it->GetMappedFeature();            BREAK(it);        } else if (GetLength(it->GetLocation(), &scope) > best_length) {            best_length = GetLength(it->GetLocation(), &scope);            result = &it->GetMappedFeature();        }    }    return result;}static string s_TitleFromProtein(const CBioseq_Handle& handle, CScope& scope,                                 string& organism){    CConstRef<CProt_ref> prot;    CConstRef<CSeq_loc>  cds_loc;    CConstRef<CGene_ref> gene;    CBioseq_Handle::TBioseqCore  core = handle.GetBioseqCore();    string               result;    CSeq_loc everywhere;    everywhere.SetWhole().Assign(*core->GetId().front());    {{        CConstRef<CSeq_feat> prot_feat            = s_FindLongestFeature(everywhere, scope, CSeqFeatData::e_Prot);        if (prot_feat) {            prot = &prot_feat->GetData().GetProt();        }    }}    {{        CConstRef<CSeq_feat> cds_feat            = s_FindLongestFeature(everywhere, scope, CSeqFeatData::e_Cdregion,                                   CFeat_CI::e_Product);        if (cds_feat) {            cds_loc = &cds_feat->GetLocation();        }    }}    if (cds_loc) {        CConstRef<CSeq_feat> gene_feat            = s_FindLongestFeature(*cds_loc, scope, CSeqFeatData::e_Gene);        if (gene_feat) {            gene = &gene_feat->GetData().GetGene();        }    }    if (prot.NotEmpty()  &&  prot->IsSetName()  &&  !prot->GetName().empty()) {        bool first = true;        ITERATE (CProt_ref::TName, it, prot->GetName()) {            if (!first) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?