📄 seqport_util.hpp

📁 ncbi源码
💻 HPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * =========================================================================== * PRODUCTION $Log: seqport_util.hpp,v $ * PRODUCTION Revision 1000.2  2003/11/21 18:20:24  gouriano * PRODUCTION PRODUCTION: UPGRADED [ORIGINAL] Dev-tree R1.10 * PRODUCTION * =========================================================================== */#ifndef OBJECTS_SEQ___SEQPORT_UTIL__HPP#define OBJECTS_SEQ___SEQPORT_UTIL__HPP/*  $Id: seqport_util.hpp,v 1000.2 2003/11/21 18:20:24 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Clifford Clausen *          (also reviewed/fixed/groomed by Denis Vakatov and Aaron Ucko) * * File Description: */   #include <corelib/ncbi_limits.hpp>#include <objects/seq/Seq_data.hpp>#include <objects/seqcode/Seq_code_type.hpp>#include <util/random_gen.hpp>#include <memory>#include <vector>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE// CSeqportUtil is a wrapper for a hidden object of class// CSeqportUtil_implementation.class CSeqportUtil_implementation;class NCBI_SEQ_EXPORT CSeqportUtil{public:    // TypeDefs    typedef unsigned int TIndex;    typedef pair<TIndex, TIndex> TPair;    // Classes thrown as errors    struct NCBI_SEQ_EXPORT CBadIndex : public runtime_error    {        CBadIndex(TIndex idx, string method)            : runtime_error("CSeqportUtil::" + method +            " -- bad index specified: " + NStr::UIntToString(idx)) {}    };    struct NCBI_SEQ_EXPORT CBadSymbol : public runtime_error     {        CBadSymbol(string code, string method)            : runtime_error("CSeqportUtil::" + method +            " -- bad symbol specified: " + code) {}    };    struct NCBI_SEQ_EXPORT CBadType : public runtime_error     {        CBadType(string method)            : runtime_error("CSeqportUtil::" + method +            " -- specified code or code combination not supported") {}    };        // Alphabet conversion function. Function returns the    // number of converted codes.    static TSeqPos Convert(const CSeq_data&       in_seq,                           CSeq_data*             out_seq,                           CSeq_data::E_Choice    to_code,                           TSeqPos                uBeginIdx = 0,                           TSeqPos                uLength   = 0,                           bool                   bAmbig    = false,                           CRandom::TValue        seed      = 17734276);    // Function to provide maximum in-place packing of na    // sequences without loss of information. Iupacna    // can always be packed to ncbi4na without loss. Iupacna    // can sometimes be packed to ncbi2na. Ncbi4na can    // sometimes be packed to ncbi2na. Returns number of    // residues packed. If in_seq cannot be packed, the    // original in_seq is returned unchanged and the return value    // from Pack is 0    static TSeqPos Pack(CSeq_data*   in_seq,                                TSeqPos uLength = ncbi::numeric_limits<TSeqPos>::max());    // Performs fast validation of CSeq_data. If all data in the    // sequence represent valid elements of a biological sequence, then    // FastValidate returns true. Otherwise it returns false    static bool FastValidate(const CSeq_data&   in_seq,                             TSeqPos            uBeginIdx = 0,                             TSeqPos            uLength   = 0);    // Performs validation of CSeq_data. Returns a list of indices    // corresponding to data that does not represent a valid element    // of a biological sequence.    static void Validate(const CSeq_data&   in_seq,                         vector<TSeqPos>*   badIdx,                         TSeqPos            uBeginIdx = 0,                         TSeqPos            uLength   = 0);    // Get ambiguous bases. out_indices returns    // the indices relative to in_seq of ambiguous bases.    // out_seq returns the ambiguous bases. Note, there are    // only ambiguous bases for iupacna->ncib2na and    // ncib4na->ncbi2na coversions.    static TSeqPos GetAmbigs(const CSeq_data&    in_seq,                             CSeq_data*          out_seq,                             vector<TSeqPos>*    out_indices,                             CSeq_data::E_Choice to_code = CSeq_data::e_Ncbi2na,                             TSeqPos             uBeginIdx = 0,                             TSeqPos             uLength   = 0);    // Get a copy of CSeq_data. No conversion is done. uBeginIdx of the    // biological sequence in in_seq will be in position    // 0 of out_seq. Usually, uLength bases will be copied    // from in_seq to out_seq. If uLength goes beyond the end of    // in_seq, it will be shortened to go to the end of in_seq.    // For packed sequence formats (ncbi2na and ncbi4na),    // only uLength bases are valid copies. For example,    // in an ncbi4na encoded sequence, if uLength is odd, the last    // sequence returned will be uLength+1 because 2 bases are encoded    // per byte in ncbi4na. However, in this case, uLength will be returned    // unchanged (it will remain odd unless it goes beyond the end    // of in_seq). If uLength=0, then a copy from uBeginIdx to the end    // of in_seq is returned.    static TSeqPos GetCopy(const CSeq_data&   in_seq,                           CSeq_data*         out_seq,                           TSeqPos            uBeginIdx = 0,                           TSeqPos            uLength   = 0);    // Method to keep only a contiguous piece of a sequence beginning    // at uBeginIdx and uLength residues long. Does bit shifting as    // needed to put uBeginIdx of original sequence at position zero on output.    // Similar to GetCopy(), but done in place.  Returns length of    // kept sequence.    static TSeqPos Keep(CSeq_data*   in_seq,                        TSeqPos      uBeginIdx = 0,                        TSeqPos      uLength   = 0);    // Append in_seq2 to to end of in_seq1. Both in seqs must be    // in the same alphabet or this method will throw a runtime_error.    // The result of the append will be put into out_seq.    // For packed sequences ncbi2na and ncbi4na, Append will shift and    // append so as to remove any jaggedness at the append point.    static TSeqPos Append(CSeq_data*         out_seq,                          const CSeq_data&   in_seq1,                          TSeqPos            uBeginIdx1,                          TSeqPos            uLength1,                          const CSeq_data&   in_seq2,                          TSeqPos            uBeginIdx2,                          TSeqPos            uLength2);    // Create a biological complement of an na sequence.    // Attempts to complement an aa sequence will throw    // a runtime_error. Returns length of complemented sequence.    // Complement the input sequence in place    static TSeqPos Complement(CSeq_data*   in_seq,                              TSeqPos      uBeginIdx = 0,                              TSeqPos      uLength   = 0);    // Complement the input sequence and put the result in    // the output sequence    static TSeqPos Complement(const CSeq_data&   in_seq,                              CSeq_data*         out_seq,                              TSeqPos            uBeginIdx = 0,                              TSeqPos            uLength   = 0);    // Create a biological sequence that is the reversse
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -