📄 sequtil_convert_imp.cpp
字号:
TSeqPos length, char* dst){ const Uint1* table = C2naTo4na::GetTable(pos % 2 == 0); const Uint2* table2 = reinterpret_cast<const Uint2*>(table); const char* iter = src + (pos / 4); size_t size = length; // NB: branch within the inner loop as devastating consequences // on performance. // we handle two different cases, the first being offsets 0 and 2. // the second are offsets 1 and 3. // for offsets 0,2 we use a 2 column table, where the first column // corresponds to the lower 4 bits of the ncbi2na coding (entry 0) // and the second column corresponds to the upper 4 bits (entry 1). // in this case once we set the initial entry progress is the same. // the overhang for this case is either 0 or 1. // the 1\3 offset is a more complex one. for it we use a 3 column table. // the first column corresponds to the lower 2 bits of the ncbi2na // coding, the second corresponds to the middle 4 bits and the third // correspond to the upper 2 bits. // we handle all cases as offset 3. for offset one we simply handle // the first 4 bits, which will being us to offset 3. // as handling the middle 4 bits or the combination of the 2 lower // ones and 2 upper ones are done differently, we handle 4 letters (8 bits) // at a time, in oredr to prevent branching withing the inner loop. // overhang for this case is 1, 2 or 3. switch ( pos % 4 ) { // --- table entry size for offsets 0,2 is 2 case 2: {{ *dst = table[static_cast<Uint1>(*iter) * 2 + 1]; if ( length == 1 ) { *dst &= 0xf0; return length; } size -= 2; ++iter; ++dst; }} // intentional fall through case 0: {{ // "trick" the compiler so that each assignment will // be of 2 bytes. Uint2* dst2 = reinterpret_cast<Uint2*>(dst); for ( size_t i = size / 4; i; --i , ++dst2, ++iter ) { *dst2 = table2[static_cast<Uint1>(*iter)]; } dst = reinterpret_cast<char*>(dst2); }} // handle overhang if ( (size % 4) != 0 ) { switch ( size % 4 ) { case 1: *dst = table[static_cast<Uint1>(*iter) * 2] & 0xf0; break; case 2: *dst = table[static_cast<Uint1>(*iter) * 2]; break; case 3: *dst = table[static_cast<Uint1>(*iter) * 2]; ++dst; *dst = table[static_cast<Uint1>(*iter) * 2 + 1] & 0xf0; break; } } break; // --- table entry size for offsets 1,3 is 3 case 3: {{ if ( length == 1 ) { *dst = table[static_cast<Uint1>(*iter) * 3 + 2]; return length; } else { *dst = table[static_cast<Uint1>(*iter) * 3 + 2] | table[static_cast<Uint1>(*(iter + 1)) * 3]; ++dst; ++iter; size -= 2; } }} // intentional fall through case 1: {{ for ( size_t i = size / 4; i; --i, ++iter ) { *dst = table[static_cast<Uint1>(*iter) * 3 + 1]; ++dst; *dst = table[static_cast<Uint1>(*iter) * 3 + 2] | table[static_cast<Uint1>(*(iter + 1)) * 3]; ++dst; } }} // handle overhang if ( size % 4 != 0 ) { switch ( size % 4 ) { case 1: *dst = table[static_cast<Uint1>(*iter) * 3 + 1] & 0xF0; break; case 2: *dst = table[static_cast<Uint1>(*iter) * 3 + 1]; break; case 3: *dst = table[static_cast<Uint1>(*iter) * 3 + 1]; ++dst; *dst = table[static_cast<Uint1>(*iter) * 3 + 2]; break; } } break; } // end of switch ( offset ) return length;} // NCBI2na -> NCBI8na (NCBI4na_expand)// convert a ncbi2na byte into 4 ncbi4na bytes.SIZE_TYPE CSeqConvert_imp::x_Convert2naTo8na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_4(src, pos, length, dst, C2naTo8na::GetTable());}// from NCBI2na_expand to ...//===========================================================================// NCBI2na_expand -> IUPACna// convert a single NCBI2na_expand byte into a single IUPACna byte.SIZE_TYPE CSeqConvert_imp::x_Convert2naExpandToIupacna(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_1(src, pos, length, dst, C2naExpandToIupacna::GetTable());}// NCBI2na_expand -> NCBI2na// convert 4 NCBI2na_expand bytes to a single NCBI2na one.SIZE_TYPE CSeqConvert_imp::x_Convert2naExpandTo2na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ const char* iter = src + pos; // main loop. pack 4 ncbi2na_expand bytes into a single bye and add it // to the output container for ( size_t i = length / 4; i; --i, ++dst ) { *dst = (*iter << 6) | (*(iter + 1) << 4) | (*(iter + 2) << 2) | (*(iter + 3)); iter += 4; } switch ( length % 4 ) { case 1: *dst = (*iter << 6); break; case 2: *dst = (*iter << 6) | (*(iter + 1) << 4); break; case 3: *dst = (*iter << 6) | (*(iter + 1) << 4) | (*(iter + 2) << 2); break; } return length;}// NCBI2na_expand -> NCBI4na// convert 2 NCBI2na_expand bytes into a single NCBI4na byte.SIZE_TYPE CSeqConvert_imp::x_Convert2naExpandTo4na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // A simple conversion table that converts a ncbi2na_expand byte // into a "half" ncbi4na byte, based on the position of the // ncbi2na_expand byte within the ncbi4na byte // positions 0 and 1 corresponds to the lower and upper 4 bits in // a ncbi4na byte respectively. static Uint1 table[8] = { // 0 1 0x10, 0x01, // A 0x20, 0x02, // C 0x40, 0x04, // G 0x80, 0x08 // T }; const char* iter = src + pos; for ( size_t i = length / 2; i; --i, ++dst ) { *dst = table[*iter * 2] | table[*(iter + 1) * 2 + 1]; iter += 2; } if ( length % 2 != 0 ) { // == 1 *dst = table[*iter * 2]; } return length;}// NCBI2na_expand -> NCBI8na (NCBI4na_expand)// convert a single NCBI2na_expand byte into a single NCBI8na one.SIZE_TYPE CSeqConvert_imp::x_Convert2naExpandTo8na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ // simple static Uint1 table[4] = { 0x01, // A 0 -> 1 0x02, // C 1 -> 2 0x04, // G 2 -> 4 0x08 // T 3 -> 8 }; return convert_1_to_1(src, pos, length, dst, table);}// from NCBI4na to ...//===========================================================================// NCBI4na -> IUPACna// convert a NCBI4na byte into 2 IUPACna characters.SIZE_TYPE CSeqConvert_imp::x_Convert4naToIupacna(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_2(src, pos, length, dst, C4naToIupacna::GetTable());}// NCBI4na -> NCBI2na// convert 2 NCBI4na bytes into a NCBI2na byte.SIZE_TYPE CSeqConvert_imp::x_Convert4naTo2na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ Uint1 offset = pos % 2; const Uint1* table = C4naTo2na::GetTable(offset); size_t overhang = length % 4; const char* iter = src + (pos / 2); switch ( offset ) { case 0: // aligned copy {{ for ( size_t i = length / 4; i; --i, ++dst ) { *dst = table[static_cast<Uint1>(*iter) * 2] | table[static_cast<Uint1>(*(iter + 1)) * 2 + 1]; iter += 2; } // handle overhang if ( overhang != 0 ) { switch ( overhang ) { case 1: // leave just the 2 lower bits *dst = (table[static_cast<Uint1>(*iter) * 2]) & 0xC0; break; case 2: // leave just the 4 lower bits *dst = (table[static_cast<Uint1>(*iter) * 2]) & 0xF0; break; case 3: *dst = table[static_cast<Uint1>(*iter) * 2] | table[static_cast<Uint1>(*(iter + 1)) * 2 + 1] & 0xFC; break; } } }} break; case 1: // unaligned copy {{ for ( size_t i = length / 4; i; --i, ++dst ) { *dst = table[static_cast<Uint1>(*iter) * 3] | table[static_cast<Uint1>(*(iter + 1)) * 3 + 1] | table[static_cast<Uint1>(*(iter + 2)) * 3 + 2]; iter += 2; } // handle overhang if ( overhang != 0 ) { switch ( overhang ) { case 1: *dst = table[static_cast<Uint1>(*iter) * 3] & 0xC0; break; case 2: *dst = table[static_cast<Uint1>(*iter) * 3] | table[static_cast<Uint1>(*(iter + 1)) * 3 + 1] & 0xF0; break; case 3: *dst = table[static_cast<Uint1>(*iter) * 3] | table[static_cast<Uint1>(*(iter + 1)) * 3 + 1] & 0xFC; break; } } }} break; } return length;}// NCBI4na -> NCBI2na_expand// convert a NCBI4na byte into 2 NCBI2na_expand bytes.SIZE_TYPE CSeqConvert_imp::x_Convert4naTo2naExpand(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_2(src, pos, length, dst, C4naTo2naExpand::GetTable());}// NCBI4na -> NCBI8na (NCBI4na_expand)// convert a NCBI2na byte into 4 IUPACna characters.SIZE_TYPE CSeqConvert_imp::x_Convert4naTo8na(const char* src, TSeqPos pos, TSeqPos length, char* dst){ return convert_1_to_2(src, pos, length, dst, C4naTo8na::GetTable());}// from NCBI8na (NCBI4na_expand) to ...//===========================================================================// NCBI8na -> IUPACna
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -