utils.cpp
来自「ncbi源码」· C++ 代码 · 共 595 行 · 第 1/2 页
CPP
595 行
/* * =========================================================================== * PRODUCTION $Log: utils.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:45:35 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: utils.cpp,v 1000.1 2004/06/01 19:45:35 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Mati Shomrat, NCBI** File Description:* shared utility functions**/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/general/Date.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Delta_ext.hpp>#include <objects/seq/Delta_seq.hpp>#include <objects/seq/Seq_literal.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objmgr/scope.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)string ExpandTildes(const string& s, ETildeStyle style){ if ( style == eTilde_tilde ) { return s; } SIZE_TYPE start = 0, tilde, length = s.length(); string result; while ( (start < length) && (tilde = s.find('~', start)) != NPOS ) { result += s.substr(start, tilde - start); char next = (tilde + 1) < length ? s[tilde + 1] : 0; switch ( style ) { case eTilde_space: if ( (tilde + 1 < length && isdigit(next) ) || (tilde + 2 < length && (next == ' ' || next == '(') && isdigit(s[tilde + 2]))) { result += '~'; } else { result += ' '; } start = tilde + 1; break; case eTilde_newline: if ( tilde + 1 < length && s[tilde + 1] == '~' ) { result += '~'; start = tilde + 2; } else { result += '\n'; start = tilde + 1; } break; default: // just keep it, for lack of better ideas result += '~'; start = tilde + 1; break; } } result += s.substr(start); return result;}void StripSpaces(string& str){ if ( str.empty() ) { return; } string::iterator new_str = str.begin(); NON_CONST_ITERATE(string, it, str) { *new_str++ = *it; if ( (*it == ' ') || (*it == '\t') || (*it == '(') ) { for (++it; *it == ' ' || *it == '\t'; ++it) continue; if (*it == ')' || *it == ',') { new_str--; } } else { it++; } } str.erase(new_str, str.end());}bool RemovePeriodFromEnd(string& str, bool keep_ellipsis){ static const string period = "."; static const string ellipsis = "..."; if ( NStr::EndsWith(str, period) ) { if ( !keep_ellipsis || !NStr::EndsWith(str, ellipsis) ) { str.erase(str.length() - 1); return true; } } return false;}static bool s_IsWholeWord(const string& str, size_t pos, const string& word){ // NB: To preserve the behavior of the C toolkit we only test on the left. // This was an old bug in the C toolkit that was never fixed and by now // has become the expected behavior. return (pos > 0) ? isspace(str[pos - 1]) || ispunct(str[pos - 1]) : true;}void JoinNoRedund(string& to, const string& prefix, const string& str){ if ( str.empty() ) { return; } if ( to.empty() ) { to += str; return; } size_t pos = NPOS; for ( pos = NStr::Find(to, str); pos != NPOS && !s_IsWholeWord(to, pos, str); pos += str.length()); if ( pos == NPOS || !s_IsWholeWord(to, pos, str) ) { to += prefix; to += str; }}string JoinNoRedund(const list<string>& l, const string& delim){ if ( l.empty() ) { return kEmptyStr; } string result = l.front(); list<string>::const_iterator it = l.begin(); while ( ++it != l.end() ) { JoinNoRedund(result, delim, *it); } return result;}// Validate the correct format of an accession string.bool ValidateAccession(const string& acc){ if ( acc.empty() ) { return false; } if ( acc.length() >= 16 ) { return false; } // first character must be uppercase letter if ( !(isalpha(acc[0]) && isupper(acc[0])) ) { return false; } size_t num_alpha = 0, num_undersc = 0, num_digits = 0; const char* ptr = acc.c_str(); if ( NStr::StartsWith(acc, "NZ_") ) { ptr += 3; } for ( ; isalpha(*ptr); ++ptr, ++num_alpha ); for ( ; *ptr == '_'; ++ptr, ++num_undersc ); for ( ; isdigit(*ptr); ++ptr, ++num_digits ); if ( (*ptr != '\0') && (*ptr != ' ') && (*ptr != '.') ) { return false; } switch ( num_undersc ) { case 0: {{ if ( (num_alpha == 1 && num_digits == 5) || (num_alpha == 2 && num_digits == 6) || (num_alpha == 3 && num_digits == 5) || (num_alpha == 4 && num_digits == 8) ) { return true; } }} break; case 1: {{ // RefSeq accession if ( (num_alpha != 2) || (num_digits != 6 && num_digits != 8 && num_digits != 9) ) { return false; } char first_letter = acc[0]; char second_letter = acc[1]; if ( first_letter == 'N' ) { if ( second_letter == 'C' || second_letter == 'G' || second_letter == 'M' || second_letter == 'R' || second_letter == 'P' || second_letter == 'W' || second_letter == 'T' ) { return true; } } else if ( first_letter == 'X' ) { if ( second_letter == 'M' || second_letter == 'R' || second_letter == 'P' ) { return true; } } else if ( first_letter == 'Z' ) { if ( second_letter == 'P' ) { return true; } } }} break; default: return false; } return false;}void DateToString(const CDate& date, string& str, bool is_cit_sub){ static const string regular_format = "%{%2D%|01%}-%{%3N%|JUN%}-%Y"; static const string cit_sub_format = "%{%2D%|??%}-%{%3N%|???%}-%Y"; const string& format = is_cit_sub ? cit_sub_format : regular_format; string date_str; date.GetDate(&date_str, format); NStr::ToUpper(date_str); str.append(date_str);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?