📄 pattern.c
字号:
Int4 x; Int4 i; /*index on words*/ for (i = 0; i < patternSearch->numWords; i++) { x = (a[i] << 1) + b; if (x >= OVERFLOW1) { a[i] = x - OVERFLOW1; b = 1; } else { a[i] = x; b = 0; } }} /*Do a word-by-word bit-wise or of a and b and put the result back in a*/static void or(Int4 *a, Int4 *b, patternSearchItems *patternSearch){ Int4 i; /*index over words*/ for (i = 0; i < patternSearch->numWords; i++) a[i] = (a[i] | b[i]);}/*Do a word-by-word bit-wise or of a and b and put the result in result; return 1 if there are any non-zero words*/static Int4 and(Int4 *result, Int4 *a, Int4 *b, patternSearchItems *patternSearch){ Int4 i; /*index over words*/ Int4 returnValue = 0; for (i = 0; i < patternSearch->numWords; i++) if ( (result[i] = (a[i] & b[i]) ) ) returnValue = 1; return returnValue;}/*Let F be the number of the first bit in s that is 1 Let G be the first bit in mask that is one such that G < F; Else let G = -1; Returns F - G*/static Int4 lenofL(Int4 *s, Int4 *mask, patternSearchItems *patternSearch){ Int4 bitIndex; /*loop index over bits in a word*/ Int4 wordIndex; /*loop index over words*/ Int4 firstOneInMask; firstOneInMask = -1; for (wordIndex = 0; wordIndex < patternSearch->numWords; wordIndex++) { for (bitIndex = 0; bitIndex < BITS_PACKED_PER_WORD; bitIndex++) { if ((s[wordIndex] >> bitIndex) % 2 == 1) return wordIndex*BITS_PACKED_PER_WORD+bitIndex-firstOneInMask; if ((mask[wordIndex] >> bitIndex) %2 == 1) firstOneInMask = wordIndex*BITS_PACKED_PER_WORD+bitIndex; } } /*ErrPostEx(SEV_FATAL, 1, 0, "wrong\n");*/ return(-1);}/* Finds places where pattern matches seq and returns them as pairs of positions in consecutive entries of hitArray; similar to find_hitsS hitArray is array of hits to return seq is the input sequence and len1 is its length patternSearch carries all the pattern variables return twice the number of hits*/static Int4 find_hitsL(Int4 *hitArray, const Uint1* seq, Int4 len1, patternSearchItems *patternSearch){ Int4 wordIndex; /*index on words in mask*/ Int4 i; /*loop index on seq */ Int4 *prefixMatchedBitPattern; /*see similar variable in find_hitsS*/ Int4 twiceNumHits = 0; /*counter for hitArray*/ Int4 *mask; /*local copy of match_maskL version of pattern indicates after which positions a match can be declared*/ Int4 *matchResult; /*Array of words to hold the result of the final test for a match*/ matchResult = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4)); mask = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4)); prefixMatchedBitPattern = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4)); for (wordIndex = 0; wordIndex < patternSearch->numWords; wordIndex++) { mask[wordIndex] = patternSearch->match_maskL[wordIndex]; prefixMatchedBitPattern[wordIndex] = 0; } /*This is a multiword version of the algorithm in find_hitsS*/ lmove(mask, 1, patternSearch); for (i = 0; i < len1; i++) { lmove(prefixMatchedBitPattern, 0, patternSearch); or(prefixMatchedBitPattern, mask, patternSearch); and(prefixMatchedBitPattern, prefixMatchedBitPattern, patternSearch->bitPatternByLetter[seq[i]], patternSearch); if (and(matchResult, prefixMatchedBitPattern, patternSearch->match_maskL, patternSearch)) { hitArray[twiceNumHits++] = i; hitArray[twiceNumHits++] = i-lenofL(matchResult, patternSearch->match_maskL, patternSearch)+1; } } sfree(prefixMatchedBitPattern); sfree(matchResult); sfree(mask); return twiceNumHits;}/*find matches when pattern is very long, hitArray is used to pass back pairs of end position. start position for hits seq is the sequence; len is its length is_dna indicates if the sequence is DNA or protein*/static Int4 find_hitsLL(Int4 *hitArray, const Uint1* seq, Int4 len, Boolean is_dna, patternSearchItems *patternSearch){ Int4 twiceNumHits; /*twice the number of matches*/ Int4 twiceHitsOneCall; /*twice the number of hits in one call to find_hitsS */ Int4 wordIndex; /*index over words in pattern*/ Int4 start; /*start position in sequence for calls to find_hitsS */ Int4 hitArray1[MAX_HIT]; /*used to get hits against different words*/ Int4 nextPosInHitArray; /*next available position in hitArray1 */ Int4 hitIndex1, hitIndex2; /*indices over hitArray1*/ patternSearch->whichPositionPtr = patternSearch->SLL[patternSearch->whichMostSpecific]; patternSearch->match_mask = patternSearch->match_maskL[patternSearch->whichMostSpecific]; if (is_dna) { patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[patternSearch->whichMostSpecific]; patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[patternSearch->whichMostSpecific]; } /*find matches to most specific word of pattern*/ twiceNumHits = find_hitsS_head(hitArray, seq, 0, len, is_dna, patternSearch); if (twiceNumHits < 2) return 0; /*extend matches word by word*/ for (wordIndex = patternSearch->whichMostSpecific+1; wordIndex < patternSearch->numWords; wordIndex++) { patternSearch->whichPositionPtr = patternSearch->SLL[wordIndex]; patternSearch->match_mask = patternSearch->match_maskL[wordIndex]; if (is_dna) { patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[wordIndex]; patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[wordIndex]; } nextPosInHitArray = 0; for (hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) { twiceHitsOneCall = find_hitsS_head(&hitArray1[nextPosInHitArray], seq, hitArray[hitIndex2]+1, MIN(len-hitArray[hitIndex2]-1, patternSearch->spacing[wordIndex-1] + patternSearch->numPlacesInWord[wordIndex]), is_dna, patternSearch); for (hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) { hitArray1[nextPosInHitArray+hitIndex1] = hitArray[hitIndex2]+hitArray1[nextPosInHitArray+hitIndex1]+1; hitArray1[nextPosInHitArray+hitIndex1+1] = hitArray[hitIndex2+1]; } nextPosInHitArray += twiceHitsOneCall; } twiceNumHits = nextPosInHitArray; if (twiceNumHits < 2) return 0; /*copy back matches that extend */ for (hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++) hitArray[hitIndex2] = hitArray1[hitIndex2]; } /*extend each match back one word at a time*/ for (wordIndex = patternSearch->whichMostSpecific-1; wordIndex >=0; wordIndex--) { patternSearch->whichPositionPtr = patternSearch->SLL[wordIndex]; patternSearch->match_mask = patternSearch->match_maskL[wordIndex]; if (is_dna) { patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[wordIndex]; patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[wordIndex]; } nextPosInHitArray = 0; for (hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) { start = hitArray[hitIndex2+1]-patternSearch->spacing[wordIndex]-patternSearch->numPlacesInWord[wordIndex]; if (start < 0) start = 0; twiceHitsOneCall = find_hitsS_head(&hitArray1[nextPosInHitArray], seq, start, hitArray[hitIndex2+1]-start, is_dna, patternSearch); for (hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) { hitArray1[nextPosInHitArray+hitIndex1] = hitArray[hitIndex2]; hitArray1[nextPosInHitArray+hitIndex1+1] = start + hitArray1[nextPosInHitArray+hitIndex1+1]; } nextPosInHitArray += twiceHitsOneCall; } twiceNumHits = nextPosInHitArray; if (twiceNumHits < 2) return 0; /*copy back matches that extend*/ for (hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++) hitArray[hitIndex2] = hitArray1[hitIndex2]; } return twiceNumHits;}Int4 FindPatternHits(Int4 *hitArray, const Uint1* seq, Int4 len, Boolean is_dna, patternSearchItems * patternSearch){ if (patternSearch->flagPatternLength == ONE_WORD_PATTERN) return find_hitsS_head(hitArray, seq, 0, len, is_dna, patternSearch); if (patternSearch->flagPatternLength == MULTI_WORD_PATTERN) return find_hitsL(hitArray, seq, len, patternSearch); return find_hitsLL(hitArray, seq, len, is_dna, patternSearch);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -