📄 sequtil_convert_imp.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: sequtil_convert_imp.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:42:17 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.2 * PRODUCTION * =========================================================================== *//* $Id: sequtil_convert_imp.cpp,v 1000.1 2004/06/01 19:42:17 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Mati Shomrat * * File Description: * */ #include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <util/sequtil/sequtil_expt.hpp>#include "sequtil_convert_imp.hpp"#include "sequtil_shared.hpp"#include "sequtil_tables.hpp"BEGIN_NCBI_SCOPE///////////////////////////////////////////////////////////////////////////////// Conversions// NB: We try to use conversion tables wherever possible, minimizing bit // shifting or any other operation within the main conversion loop.// All conversion functions takes the following parameters:// src - input sequence// pos - starting position in sequence coordinates// length - number of residues to convert// dst - an output containerSIZE_TYPE CSeqConvert_imp::Convert(const char* src, TCoding src_coding, TSeqPos pos, TSeqPos length, char* dst, TCoding dst_coding){ _ASSERT((dst != 0) && (src != 0)); _ASSERT(CSeqUtil::GetCodingType(src_coding) == CSeqUtil::GetCodingType(dst_coding)); if ( length == 0 ) { return 0; } // conversion from a coding to itself. if ( src_coding == dst_coding ) { return Subseq(src, src_coding, pos, length, dst); } // all other conversions switch ( src_coding ) { // --- NA conversions // iupacna -> ... case CSeqUtil::e_Iupacna: switch ( dst_coding ) { case CSeqUtil::e_Ncbi2na: return x_ConvertIupacnaTo2na(src, pos, length, dst); case CSeqUtil::e_Ncbi2na_expand: return x_ConvertIupacnaTo2naExpand(src, pos, length, dst); case CSeqUtil::e_Ncbi4na: return x_ConvertIupacnaTo4na(src, pos, length, dst); case CSeqUtil::e_Ncbi4na_expand: case CSeqUtil::e_Ncbi8na: return x_ConvertIupacnaTo8na(src, pos, length, dst); } break; // ncbi2na -> ... case CSeqUtil::e_Ncbi2na: switch ( dst_coding ) { case CSeqUtil::e_Iupacna: return x_Convert2naToIupacna(src, pos, length, dst); case CSeqUtil::e_Ncbi2na_expand: return x_Convert2naTo2naExpand(src, pos, length, dst); case CSeqUtil::e_Ncbi4na: return x_Convert2naTo4na(src, pos, length, dst); case CSeqUtil::e_Ncbi4na_expand: case CSeqUtil::e_Ncbi8na: return x_Convert2naTo8na(src, pos, length, dst); } break; // ncbi2na_expand -> ... case CSeqUtil::e_Ncbi2na_expand: switch ( dst_coding ) { case CSeqUtil::e_Iupacna: return x_Convert2naExpandToIupacna(src, pos, length, dst); case CSeqUtil::e_Ncbi2na: return x_Convert2naExpandTo2na(src, pos, length, dst); case CSeqUtil::e_Ncbi4na: return x_Convert2naExpandTo4na(src, pos, length, dst); case CSeqUtil::e_Ncbi4na_expand: case CSeqUtil::e_Ncbi8na: return x_Convert2naExpandTo8na(src, pos, length, dst); } break; // ncbi4na -> ... case CSeqUtil::e_Ncbi4na: switch ( dst_coding ) { case CSeqUtil::e_Iupacna: return x_Convert4naToIupacna(src, pos, length, dst); case CSeqUtil::e_Ncbi2na: return x_Convert4naTo2na(src, pos, length, dst); case CSeqUtil::e_Ncbi2na_expand: return x_Convert4naTo2naExpand(src, pos, length, dst); case CSeqUtil::e_Ncbi4na_expand: case CSeqUtil::e_Ncbi8na: return x_Convert4naTo8na(src, pos, length, dst); } break; // ncbi8na / ncbi4na_expand -> ... case CSeqUtil::e_Ncbi8na: case CSeqUtil::e_Ncbi4na_expand: switch ( dst_coding ) { case CSeqUtil::e_Iupacna: return x_Convert8naToIupacna(src, pos, length, dst); case CSeqUtil::e_Ncbi2na: return x_Convert8naTo2na(src, pos, length, dst); case CSeqUtil::e_Ncbi2na_expand: return x_Convert8naTo2naExpand(src, pos, length, dst); case CSeqUtil::e_Ncbi4na: return x_Convert8naTo4na(src, pos, length, dst); case CSeqUtil::e_Ncbi8na: case CSeqUtil::e_Ncbi4na_expand: return Subseq(src, src_coding, pos, length, dst); } break; // --- AA conversions // NB: currently ncbi8aa is the same as ncbistdaa. // iupacaa -> ... case CSeqUtil::e_Iupacaa: switch ( dst_coding ) { case CSeqUtil::e_Ncbieaa: return x_ConvertIupacaaToEaa(src, pos, length, dst); case CSeqUtil::e_Ncbistdaa: case CSeqUtil::e_Ncbi8aa: return x_ConvertIupacaaToStdaa(src, pos, length, dst); } break; // ncbieaa -> ... case CSeqUtil::e_Ncbieaa: switch ( dst_coding ) { case CSeqUtil::e_Iupacaa: return x_ConvertEaaToIupacaa(src, pos, length, dst); case CSeqUtil::e_Ncbistdaa: case CSeqUtil::e_Ncbi8aa: return x_ConvertEaaToStdaa(src, pos, length, dst); } break; // ncbistdaa / ncbi8aa -> ... case CSeqUtil::e_Ncbi8aa: case CSeqUtil::e_Ncbistdaa: switch ( dst_coding ) { case CSeqUtil::e_Ncbieaa: return x_ConvertStdaaToEaa(src, pos, length, dst); case CSeqUtil::e_Iupacaa: return x_ConvertStdaaToIupacaa(src, pos, length, dst); case CSeqUtil::e_Ncbi8aa: case CSeqUtil::e_Ncbistdaa: return Subseq(src, src_coding, pos, length, dst); } break; } // We should never reach here NCBI_THROW(CSeqUtilException, eInvalidCoding, "Unknown conversion.");}// --- NA conversions:// from IUPACna to ...//===========================================================================// IUPACna -> IUPACna// This is not a simple copy since we handle conversion of lower to upper // case and conversion of 'U'('u') to 'T'SIZE_TYPE CSeqConvert_imp::x_ConvertIupacnaToIupacna(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // call the shared implementation for converting 1 byte to another byte // given a specific conversion table. // the iupacna to iupacna table converts upper and lower case to upper case // and U (u) to T return convert_1_to_1(src, pos, length, dst, CIupacnaToIupacna::GetTable());}// IUPACna -> NCBI2na// convert 4 IUPACna characters into a single NCBI2na byteSIZE_TYPE CSeqConvert_imp::x_ConvertIupacnaTo2na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // The iupacna to ncbi2na table is constructed such that each row // correspond to an iupacna letter and each column corresponds to // that letter being in one of the 4 possible offsets within the // ncbi2na byte const Uint1* table = CIupacnaTo2na::GetTable(); const char* src_i = src + pos; for ( size_t count = length / 4; count; --count ) { *dst = table[*src_i * 4 ] | table[*(src_i + 1) * 4 + 1] | table[*(src_i + 2) * 4 + 2] | table[*(src_i + 3) * 4 + 3]; src_i += 4; ++dst; } // Handle overhang if ( length % 4 != 0 ) { *dst = 0x0; for( size_t i = 0; i < (length % 4); ++i, ++src_i ) { *dst |= table[static_cast<Uint1>(*src_i) * 4 + i]; } } return length;}// IUPACna -> NCBI2na_expand// convert a single IUPACna character into a single NCBI2na_expand byte.SIZE_TYPE CSeqConvert_imp::x_ConvertIupacnaTo2naExpand(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // call the shared implementation for converting 1 byte to another byte // given a specific conversion table. // the iupacna to ncbi2na_expand table converts upper and lower case IUPACna // into a single ncbi2na_expand byte. return convert_1_to_1(src, pos, length, dst, CIupacnaTo2naExpand::GetTable());}// IUPACna -> NCBI4na// convert 2 IUPACna characters into a single NCBI4na byteSIZE_TYPE CSeqConvert_imp::x_ConvertIupacnaTo4na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // The iupacna to ncbi4na table is constructed such that each row // correspond to an iupacna letter and each column corresponds to // that letter being in one of the 2 possible offsets within the // ncbi4na byte const Uint1* table = CIupacnaTo4na::GetTable(); const char* src_i = src + pos; for ( size_t count = length / 2; count; --count ) { *dst = table[*src_i * 2] | table[*(src_i + 1) * 2 + 1]; src_i += 2; ++dst; } // handle overhang if ( length % 2 != 0 ) { *dst = table[static_cast<Uint1>(*src_i) * 2]; } return length;}// IUPACna -> NCBI8na (NCBI4na_expand)// convert a single IUPACna character into a single NCBI8na byte.SIZE_TYPE CSeqConvert_imp::x_ConvertIupacnaTo8na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // call the shared implementation for converting 1 byte to another byte // given a specific conversion table. // the iupacna to ncbi8na table converts upper and lower case IUPACna // into a single ncbi8na byte (which is the same as ncbi4na_expand) return convert_1_to_1(src, pos, length, dst, CIupacnaTo8na::GetTable());}// from NCBI2na to ...//===========================================================================// NCBI2na -> IUPACna// convert a NCBI2na byte into 4 IUPACna characters.SIZE_TYPE CSeqConvert_imp::x_Convert2naToIupacna(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_4(src, pos, length, dst, C2naToIupacna::GetTable());}// NCBI2na -> NCBI2na_expand// convert a NCBI2na byte into 4 NCBI2na_expand characters.SIZE_TYPE CSeqConvert_imp::x_Convert2naTo2naExpand(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_4(src, pos, length, dst, C2naTo2naExpand::GetTable());}// NCBI2na -> NCBI4na// convert a NCBI2na byte into 2 NCBI4na bytes.SIZE_TYPE CSeqConvert_imp::x_Convert2naTo4na(const char* src, TSeqPos pos,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -