utils.cpp

来自「ncbi源码」· C++ 代码 · 共 595 行 · 第 1/2 页

CPP
595
字号
/* * =========================================================================== * PRODUCTION $Log: utils.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 19:45:35  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//*  $Id: utils.cpp,v 1000.1 2004/06/01 19:45:35 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Mati Shomrat, NCBI** File Description:*   shared utility functions**/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/general/Date.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Delta_ext.hpp>#include <objects/seq/Delta_seq.hpp>#include <objects/seq/Seq_literal.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objmgr/scope.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)string ExpandTildes(const string& s, ETildeStyle style){    if ( style == eTilde_tilde ) {        return s;    }    SIZE_TYPE start = 0, tilde, length = s.length();    string result;    while ( (start < length)  &&  (tilde = s.find('~', start)) != NPOS ) {        result += s.substr(start, tilde - start);        char next = (tilde + 1) < length ? s[tilde + 1] : 0;        switch ( style ) {        case eTilde_space:            if ( (tilde + 1 < length  &&  isdigit(next) )  ||                 (tilde + 2 < length  &&  (next == ' '  ||  next == '(')  &&                  isdigit(s[tilde + 2]))) {                result += '~';            } else {                result += ' ';            }            start = tilde + 1;            break;                    case eTilde_newline:            if ( tilde + 1 < length  &&  s[tilde + 1] == '~' ) {                result += '~';                start = tilde + 2;            } else {                result += '\n';                start = tilde + 1;            }            break;                    default: // just keep it, for lack of better ideas            result += '~';            start = tilde + 1;            break;        }    }    result += s.substr(start);    return result;}void StripSpaces(string& str){    if ( str.empty() ) {        return;    }    string::iterator new_str = str.begin();    NON_CONST_ITERATE(string, it, str) {        *new_str++ = *it;        if ( (*it == ' ')  ||  (*it == '\t')  ||  (*it == '(') ) {            for (++it; *it == ' ' || *it == '\t'; ++it) continue;            if (*it == ')' || *it == ',') {                new_str--;            }        } else {            it++;        }    }    str.erase(new_str, str.end());}bool RemovePeriodFromEnd(string& str, bool keep_ellipsis){    static const string period = ".";    static const string ellipsis = "...";    if ( NStr::EndsWith(str, period) ) {        if ( !keep_ellipsis  ||  !NStr::EndsWith(str, ellipsis) ) {            str.erase(str.length() - 1);            return true;        }    }    return false;}static bool s_IsWholeWord(const string& str, size_t pos, const string& word){    // NB: To preserve the behavior of the C toolkit we only test on the left.    // This was an old bug in the C toolkit that was never fixed and by now    // has become the expected behavior.    return (pos > 0) ?        isspace(str[pos - 1])  ||  ispunct(str[pos - 1]) : true;}void JoinNoRedund(string& to, const string& prefix, const string& str){    if ( str.empty() ) {        return;    }    if ( to.empty() ) {        to += str;        return;    }        size_t pos = NPOS;    for ( pos = NStr::Find(to, str);          pos != NPOS  &&  !s_IsWholeWord(to, pos, str);          pos += str.length());    if ( pos == NPOS  ||  !s_IsWholeWord(to, pos, str) ) {        to += prefix;        to += str;    }}string JoinNoRedund(const list<string>& l, const string& delim){    if ( l.empty() ) {        return kEmptyStr;    }    string result = l.front();    list<string>::const_iterator it = l.begin();    while ( ++it != l.end() ) {        JoinNoRedund(result, delim, *it);    }    return result;}// Validate the correct format of an accession string.bool ValidateAccession(const string& acc){    if ( acc.empty() ) {        return false;    }    if ( acc.length() >= 16 ) {        return false;    }    // first character must be uppercase letter    if ( !(isalpha(acc[0])  &&  isupper(acc[0])) ) {        return false;    }    size_t num_alpha   = 0,           num_undersc = 0,           num_digits  = 0;    const char* ptr = acc.c_str();    if ( NStr::StartsWith(acc, "NZ_") ) {        ptr += 3;    }    for ( ; isalpha(*ptr); ++ptr, ++num_alpha );    for ( ; *ptr == '_'; ++ptr, ++num_undersc );    for ( ; isdigit(*ptr); ++ptr, ++num_digits );    if ( (*ptr != '\0')  &&  (*ptr != ' ')  &&  (*ptr != '.') ) {        return false;    }    switch ( num_undersc ) {    case 0:        {{            if ( (num_alpha == 1  &&  num_digits == 5)  ||                 (num_alpha == 2  &&  num_digits == 6)  ||                 (num_alpha == 3  &&  num_digits == 5)  ||                  (num_alpha == 4  &&  num_digits == 8) ) {                return true;            }        }}        break;    case 1:        {{            // RefSeq accession            if ( (num_alpha != 2)  ||                 (num_digits != 6  &&  num_digits != 8  &&  num_digits != 9) ) {                return false;            }                        char first_letter = acc[0];            char second_letter = acc[1];            if ( first_letter == 'N' ) {                if ( second_letter == 'C'  ||  second_letter == 'G'  ||                     second_letter == 'M'  ||  second_letter == 'R'  ||                     second_letter == 'P'  ||  second_letter == 'W'  ||                     second_letter == 'T' ) {                    return true;                }            } else if ( first_letter == 'X' ) {                if ( second_letter == 'M'  ||  second_letter == 'R'  ||                     second_letter == 'P' ) {                    return true;                }            } else if ( first_letter == 'Z' ) {                if ( second_letter == 'P' ) {                    return true;                }            }        }}        break;    default:        return false;    }    return false;}void DateToString(const CDate& date, string& str,  bool is_cit_sub){    static const string regular_format  = "%{%2D%|01%}-%{%3N%|JUN%}-%Y";    static const string cit_sub_format = "%{%2D%|??%}-%{%3N%|???%}-%Y";    const string& format = is_cit_sub ? cit_sub_format : regular_format;    string date_str;    date.GetDate(&date_str, format);    NStr::ToUpper(date_str);    str.append(date_str);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?