📄 sequtil_manip.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: sequtil_manip.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:42:20 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * PRODUCTION * =========================================================================== *//* $Id: sequtil_manip.cpp,v 1000.1 2004/06/01 19:42:20 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Mati Shomrat * * File Description: * */ #include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <corelib/ncbistr.hpp>#include <vector>#include <algorithm>#include <util/sequtil/sequtil.hpp>#include <util/sequtil/sequtil_expt.hpp>#include <util/sequtil/sequtil_manip.hpp>#include <util/sequtil/sequtil_convert.hpp>#include "sequtil_shared.hpp"#include "sequtil_tables.hpp"BEGIN_NCBI_SCOPE///////////////////////////////////////////////////////////////////////////////// Reverse// When reversing a sequence the packed formats ncbi2na and ncbi4na // get "special" treatment, since the requesetd interval might not// fall on a byte boundry.// Other formats perform a simple conversion on the sequence. Note that// if the original sequnece is erroneous (e.g. lower case) the reverse// isn't "fixed". static SIZE_TYPE s_2naReverse(const char* src, TSeqPos pos, TSeqPos length, char* dst){ const char* begin = src + (pos / 4); const char* end = src + ((pos + length - 1) / 4) + 1; const char* iter = end; size_t offset = (pos + length - 1) % 4; const Uint1* table = C2naReverse::GetTable(offset); if ( offset == 3 ) { // byte boundry when viewed from the end for ( ; iter != begin; ++dst ) { *dst = table[static_cast<Uint1>(*--iter)]; } --dst; } else { --iter; for ( size_t count = length / 4; count; --count, ++dst ) { *dst = table[static_cast<Uint1>(*iter) * 2 + 1] | table[static_cast<Uint1>(*(iter - 1)) * 2]; --iter; } // handle the overhang if ( length % 4 != 0 ) { *dst = table[static_cast<Uint1>(*iter) * 2 + 1]; if ( iter != begin ) { --iter; *dst |= table[static_cast<Uint1>(*iter) * 2]; } } } // now, take care of the last byte *dst &= (0xFF << ((4 - (length % 4)) % 4) * 2); return length;}static SIZE_TYPE s_4naReverse(const char* src, TSeqPos pos, TSeqPos length, char* dst){ size_t start_offset = (pos + length - 1) % 2; const Uint1* table = C4naReverse::GetTable(); const char* begin = src + (pos / 2); const char* end = src + ((pos + length - 1) / 2) + 1; const char* iter = end; switch ( start_offset ) { case 1: // byte boundry {{ for ( ; iter != begin; ++dst ) { *dst = table[static_cast<Uint1>(*--iter)]; } --dst; if ( length % 2 != 0 ) { *dst &= 0xF0; } }} break; case 0: {{ for ( size_t count = length / 2; count; --count, ++dst ) { --iter; *dst = (static_cast<Uint1>(*iter) & 0xF0) | (static_cast<Uint1>(*(iter - 1)) & 0x0F); } if ( length % 2 != 0 ) { --iter; *dst = static_cast<Uint1>(*iter) & 0xF0; } }} break; } return length;}template <typename SrcCont, typename DstCont>SIZE_TYPE s_Reverse(const SrcCont& src, CSeqUtil::TCoding src_coding, TSeqPos pos, TSeqPos length, DstCont& dst){ _ASSERT(!OutOfRange(pos, src, src_coding)); if ( src.empty() || (length == 0) ) { return 0; } AdjustLength(src, src_coding, pos, length); ResizeDst(dst, src_coding, length); return CSeqManip::Reverse(&*src.begin(), src_coding, pos, length, &*dst.begin());}SIZE_TYPE CSeqManip::Reverse(const string& src, TCoding src_coding, TSeqPos pos, TSeqPos length, string& dst){ // call the templated version return s_Reverse(src, src_coding, pos, length, dst);}SIZE_TYPE CSeqManip::Reverse(const vector<char>& src, TCoding coding, TSeqPos pos, TSeqPos length, vector<char>& dst){ // call the templated version return s_Reverse(src, coding,pos, length, dst);}SIZE_TYPE CSeqManip::Reverse(const char* src, TCoding src_coding, TSeqPos pos, TSeqPos length, char* dst){ _ASSERT((dst != 0) && (src != 0)); switch ( src_coding ) { // "special" treatment case CSeqUtil::e_Ncbi2na: return s_2naReverse(src, pos, length, dst); case CSeqUtil::e_Ncbi4na: return s_4naReverse(src, pos, length, dst); // a simple reverse default: reverse_copy(src + pos, src + pos + length, dst); return length; } NCBI_THROW(CSeqUtilException, eInvalidCoding, "Unknown coding");}///////////////////////////////////////////////////////////////////////////////// Complementtemplate <typename SrcCont, typename DstCont>SIZE_TYPE s_Complement(const SrcCont& src, CSeqUtil::TCoding src_coding, TSeqPos pos, TSeqPos length, DstCont& dst){ _ASSERT(!OutOfRange(pos, src, src_coding)); if ( src.empty() || (length == 0) ) { return 0; } AdjustLength(src, src_coding, pos, length); ResizeDst(dst, src_coding, length); return CSeqManip::Complement(&*src.begin(), src_coding, pos, length, &*dst.begin());}SIZE_TYPE CSeqManip::Complement(const string& src, TCoding coding, TSeqPos pos, TSeqPos length, string& dst){ // call the templated version return s_Complement(src, coding,pos, length, dst);}SIZE_TYPE CSeqManip::Complement(const vector<char>& src, TCoding coding, TSeqPos pos, TSeqPos length, vector<char>& dst){ // call the templated version return s_Complement(src, coding,pos, length, dst);}static SIZE_TYPE s_Ncbi2naComplement(const char* src, TSeqPos pos, TSeqPos length, char* dst){ const char* iter = src + (pos / 4); const char* end = src + ((pos + length - 1) / 4) + 1; if ( pos % 4 == 0 ) { for ( ; iter != end; ++iter, ++dst ) { *dst = ~(*iter); } if ( length % 4 != 0 ) { *(--dst) &= (0xFF << (8 - (length % 4) * 2)); } } else { const Uint1* table = C2naCmp::GetTable(pos % 4); for ( size_t count = length / 4; count; --count, ++dst, ++iter ) { *dst= table[static_cast<Uint1>(*iter) * 2] | table[static_cast<Uint1>(*(iter + 1)) * 2 + 1]; } // handle the overhang if ( length % 4 != 0 ) { *dst = table[static_cast<Uint1>(*iter) * 2]; if ( ++iter != end ) { *dst |= table[static_cast<Uint1>(*iter) * 2 + 1]; } } } // now, take care of the last byte *dst &= (0xFF << ((4 - (length % 4)) % 4) * 2); return length;}static SIZE_TYPE s_Ncbi2naExpandComplement(const char* src, TSeqPos pos, TSeqPos length, char* dst){ const char* end = src + pos + length; const char* iter = src + pos; for ( ; iter != end; ++iter, ++dst ) { *dst = 3 - static_cast<Uint1>(*iter); } return length;}static SIZE_TYPE s_Ncbi4naComplement(const char* src, TSeqPos pos, TSeqPos length, char* dst){ const char* iter = src + (pos / 2); const char* end = src + (pos + length - 1) / 2 + 1; const Uint1* table = C4naCmp::GetTable(pos % 2); switch ( pos % 2 ) { case 0: {{ for ( ; iter != end; ++iter, ++dst ) { *dst = table[static_cast<Uint1>(*iter)]; } if ( length % 2 != 0 ) { *dst &= 0xF0; } }} break; case 1: {{ for ( size_t count = length / 2; count; --count, ++iter, ++dst ) { *dst = table[static_cast<Uint1>(*iter) * 2] | table[static_cast<Uint1>(*(iter + 1)) * 2 + 1]; } if ( length % 2 != 0 ) { *dst = table[static_cast<Uint1>(*iter) * 2]; } }} break; } return length;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -