seqtitle.cpp
来自「ncbi源码」· C++ 代码 · 共 1,141 行 · 第 1/3 页
CPP
1,141 行
break; } } } if (is_draft && title.find("WORKING DRAFT") == NPOS) { suffix = ", WORKING DRAFT SEQUENCE"; } else if (!is_draft && !cancelled && title.find("SEQUENCING IN") == NPOS) { suffix = ", *** SEQUENCING IN PROGRESS ***"; } string un; if (tech == CMolInfo::eTech_htgs_1) { un = "un"; } if (core->GetInst().GetRepr() == CSeq_inst::eRepr_delta) { // We need the full bioseq here... const CBioseq& seq = hnd.GetBioseq(); unsigned int pieces = 1; ITERATE (CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) { switch ((*it)->Which()) { case CDelta_seq::e_Loc: if ( (*it)->GetLoc().IsNull() ) { pieces++; } break; case CDelta_seq::e_Literal: if ( !(*it)->GetLiteral().IsSetSeq_data() ) { pieces++; } break; default: break; } } if (pieces == 1) { // suffix += (", 1 " + un + "ordered piece"); } else { suffix += (", " + NStr::IntToString(pieces) + ' ' + un + "ordered pieces"); } } else { // suffix += ", in " + un + "ordered pieces"; } break; } case CMolInfo::eTech_htgs_3: if (title.find("complete sequence") == NPOS) { suffix = ", complete sequence"; } break; case CMolInfo::eTech_est: if (title.find("mRNA sequence") == NPOS) { suffix = ", mRNA sequence"; } break; case CMolInfo::eTech_sts: if (title.find("sequence tagged site") == NPOS) { suffix = ", sequence tagged site"; } break; case CMolInfo::eTech_survey: if (title.find("genomic survey sequence") == NPOS) { suffix = ", genomic survey sequence"; } break; case CMolInfo::eTech_wgs: if (wgs_master) { if (title.find("whole genome shotgun sequencing project") == NPOS){ suffix = ", whole genome shotgun sequencing project"; } } else { if (title.find("whole genome shotgun sequence") == NPOS) { suffix = ", whole genome shotgun sequence"; } } break; } if (flags & fGetTitle_Organism) { CConstRef<COrg_ref> org; if (source) { org = &source->GetOrg(); } else { CSeqdesc_CI it(hnd, CSeqdesc::e_Org); for (; it; ++it) { org = &it->GetOrg(); BREAK(it); } } if (organism.empty() && org.NotEmpty() && org->IsSetTaxname()) { organism = org->GetTaxname(); } if ( !organism.empty() && title.find(organism) == NPOS) { suffix += " [" + organism + ']'; } } return prefix + title + suffix;}static string s_DescribeClones(const string& clone){ SIZE_TYPE count = 1; for (SIZE_TYPE pos = clone.find(';'); pos != NPOS; pos = clone.find(';', pos + 1)) { ++count; } if (count > 3) { return ", " + NStr::IntToString(count) + " clones,"; } else { return " clone " + clone; }}static string s_TitleFromBioSource(const CBioSource& source, const string& suffix){ string name, chromosome, clone, map_, strain, sfx; const COrg_ref& org = source.GetOrg(); if (org.IsSetTaxname()) { name = org.GetTaxname(); } if (source.IsSetSubtype()) { ITERATE (CBioSource::TSubtype, it, source.GetSubtype()) { switch ((*it)->GetSubtype()) { case CSubSource::eSubtype_chromosome: chromosome = " chromosome " + (*it)->GetName(); break; case CSubSource::eSubtype_clone: clone = s_DescribeClones((*it)->GetName()); break; case CSubSource::eSubtype_map: map_ = " map " + (*it)->GetName(); break; } } } if (org.IsSetOrgname() && org.GetOrgname().IsSetMod()) { ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) { if ((*it)->GetSubtype() == COrgMod::eSubtype_strain && !NStr::EndsWith(name, (*it)->GetSubname(), NStr::eNocase)) { strain = " strain " + (*it)->GetSubname(); } } } if (suffix.size() > 0) { sfx = ' ' + suffix; } string title = NStr::TruncateSpaces(name + chromosome + clone + map_ + strain + sfx); if (islower(title[0])) { title[0] = toupper(title[0]); } return title;}static string x_TitleFromChromosome(const CBioSource& source, const CMolInfo& mol_info){ string name, chromosome, segment, plasmid_name, orgnl; string seq_tag, gen_tag; bool is_plasmid = false, is_virus = false; if (source.GetOrg().IsSetTaxname()) { name = source.GetOrg().GetTaxname(); } else { return kEmptyStr; } string lc_name = name; NStr::ToLower(lc_name); if (lc_name.find("virus") != NPOS) { is_virus = true; } if (source.IsSetSubtype()) { ITERATE (CBioSource::TSubtype, it, source.GetSubtype()) { switch ((*it)->GetSubtype()) { case CSubSource::eSubtype_chromosome: chromosome = (*it)->GetName(); break; case CSubSource::eSubtype_segment: segment = (*it)->GetName(); break; case CSubSource::eSubtype_plasmid_name: { plasmid_name = (*it)->GetName(); string lc_plasmid = plasmid_name; NStr::ToLower(lc_plasmid); if (lc_plasmid.find("plasmid") == NPOS && lc_plasmid.find("element") == NPOS) { plasmid_name = "plasmid " + plasmid_name; } break; } } } } switch (source.GetGenome()) { // unknown, genomic case CBioSource::eGenome_chloroplast: orgnl = "chloroplast"; break; case CBioSource::eGenome_chromoplast: orgnl = "chromoplast"; break; case CBioSource::eGenome_kinetoplast: orgnl = "kinetoplast"; break; case CBioSource::eGenome_mitochondrion: orgnl = plasmid_name.empty() ? "mitochondrion" : "mitochondrial"; break; case CBioSource::eGenome_plastid: orgnl = "plastid"; break; case CBioSource::eGenome_macronuclear: orgnl = "macronuclear"; break; case CBioSource::eGenome_extrachrom: orgnl = "extrachromosomal"; break; case CBioSource::eGenome_plasmid: orgnl = "plasmid"; is_plasmid = true; break; // transposon, insertion-seq case CBioSource::eGenome_cyanelle: orgnl = "cyanelle"; break; case CBioSource::eGenome_proviral: if (!is_virus) { orgnl = plasmid_name.empty() ? "provirus" : "proviral"; } break; case CBioSource::eGenome_virion: if (!is_virus) { orgnl = "virion"; } break; case CBioSource::eGenome_nucleomorph: orgnl = "nucleomorph"; break; case CBioSource::eGenome_apicoplast: orgnl = "apicoplast"; break; case CBioSource::eGenome_leucoplast: orgnl = "leucoplast"; break; case CBioSource::eGenome_proplastid: orgnl = "protoplast"; break; // endogenous-virus } switch (mol_info.GetCompleteness()) { case CMolInfo::eCompleteness_partial: case CMolInfo::eCompleteness_no_left: case CMolInfo::eCompleteness_no_right: case CMolInfo::eCompleteness_no_ends: seq_tag = ", partial sequence"; gen_tag = ", genome"; break; default: seq_tag = ", complete sequence"; gen_tag = ", complete genome"; break; } if (lc_name.find("plasmid") != NPOS) { return name + seq_tag; } else if (is_plasmid) { if (plasmid_name.empty()) { return name + " unnamed plasmid" + seq_tag; } else { return name + ' ' + plasmid_name + seq_tag; } } else if ( !plasmid_name.empty() ) { if (orgnl.empty()) { return name + ' ' + plasmid_name + seq_tag; } else { return name + ' ' + orgnl + ' ' + plasmid_name + seq_tag; } } else if ( !orgnl.empty() ) { if ( chromosome.empty() ) { return name + ' ' + orgnl + gen_tag; } else { return name + ' ' + orgnl + " chromosome " + chromosome + seq_tag; } } else if ( !segment.empty() ) { if (segment.find("DNA") == NPOS && segment.find("RNA") == NPOS && segment.find("segment") == NPOS && segment.find("Segment") == NPOS) { return name + " segment " + segment + seq_tag; } else { return name + ' ' + segment + seq_tag; } } else if ( !chromosome.empty() ) { return name + " chromosome " + chromosome + seq_tag; } else { return name + gen_tag; }}static string s_TitleFromChromosome(const CBioSource& source, const CMolInfo& mol_info){ string result = x_TitleFromChromosome(source, mol_info); result = NStr::Replace(result, "Plasmid", "plasmid"); result = NStr::Replace(result, "Element", "element"); if (!result.empty()) { result[0] = toupper(result[0]); } return result;}static CConstRef<CSeq_feat> s_FindLongestFeature(const CSeq_loc& location, CScope& scope, CSeqFeatData::E_Choice type, CFeat_CI::EFeat_Location lt = CFeat_CI::e_Location){ CConstRef<CSeq_feat> result; TSeqPos best_length = 0; CFeat_CI it(scope, location, type, SAnnotSelector::eOverlap_Intervals, SAnnotSelector::eResolve_TSE, lt); for (; it; ++it) { if (it->GetLocation().IsWhole()) { // kludge; length only works on a Seq-loc of type "whole" // if its Seq-id points to an object manager, which may not // be the case here. result = &it->GetMappedFeature(); BREAK(it); } else if (GetLength(it->GetLocation(), &scope) > best_length) { best_length = GetLength(it->GetLocation(), &scope); result = &it->GetMappedFeature(); } } return result;}static string s_TitleFromProtein(const CBioseq_Handle& handle, CScope& scope, string& organism){ CConstRef<CProt_ref> prot; CConstRef<CSeq_loc> cds_loc; CConstRef<CGene_ref> gene; CBioseq_Handle::TBioseqCore core = handle.GetBioseqCore(); string result; CSeq_loc everywhere; everywhere.SetWhole().Assign(*core->GetId().front()); {{ CConstRef<CSeq_feat> prot_feat = s_FindLongestFeature(everywhere, scope, CSeqFeatData::e_Prot); if (prot_feat) { prot = &prot_feat->GetData().GetProt(); } }} {{ CConstRef<CSeq_feat> cds_feat = s_FindLongestFeature(everywhere, scope, CSeqFeatData::e_Cdregion, CFeat_CI::e_Product); if (cds_feat) { cds_loc = &cds_feat->GetLocation(); } }} if (cds_loc) { CConstRef<CSeq_feat> gene_feat = s_FindLongestFeature(*cds_loc, scope, CSeqFeatData::e_Gene); if (gene_feat) { gene = &gene_feat->GetData().GetGene(); } } if (prot.NotEmpty() && prot->IsSetName() && !prot->GetName().empty()) { bool first = true; ITERATE (CProt_ref::TName, it, prot->GetName()) { if (!first) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?