⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pattern.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
    Int4 x;    Int4 i; /*index on words*/    for (i = 0; i < patternSearch->numWords; i++) {      x = (a[i] << 1) + b;      if (x >= OVERFLOW1) {	a[i] = x - OVERFLOW1; 	b = 1;      }      else { 	a[i] = x; 	b = 0;      }    }}  /*Do a word-by-word bit-wise or of a and b and put the result back in a*/static void or(Int4 *a, Int4 *b, patternSearchItems *patternSearch){    Int4 i; /*index over words*/    for (i = 0; i < patternSearch->numWords; i++) 	a[i] = (a[i] | b[i]);}/*Do a word-by-word bit-wise or of a and b and put the result in  result; return 1 if there are any non-zero words*/static Int4 and(Int4 *result, Int4 *a, Int4 *b, patternSearchItems *patternSearch){    Int4 i; /*index over words*/    Int4 returnValue = 0;    for (i = 0; i < patternSearch->numWords; i++)       if ( (result[i] = (a[i] & b[i]) ) ) 	returnValue = 1;    return returnValue;}/*Let F be the number of the first bit in s that is 1  Let G be the first bit in mask that is one such that G < F;  Else let G = -1;  Returns F - G*/static Int4 lenofL(Int4 *s, Int4 *mask, patternSearchItems *patternSearch){    Int4 bitIndex; /*loop index over bits in a word*/    Int4 wordIndex;  /*loop index over words*/    Int4 firstOneInMask;    firstOneInMask = -1;    for (wordIndex = 0; wordIndex < patternSearch->numWords; wordIndex++) {      for (bitIndex = 0; bitIndex < BITS_PACKED_PER_WORD; bitIndex++) { 	if ((s[wordIndex] >> bitIndex) % 2  == 1) 	  return wordIndex*BITS_PACKED_PER_WORD+bitIndex-firstOneInMask;	if ((mask[wordIndex] >> bitIndex) %2  == 1) 	  firstOneInMask = wordIndex*BITS_PACKED_PER_WORD+bitIndex;      }    }    /*ErrPostEx(SEV_FATAL, 1, 0, "wrong\n");*/    return(-1);}/* Finds places where pattern matches seq and returns them as   pairs of positions in consecutive entries of hitArray;   similar to find_hitsS   hitArray is array of hits to return   seq is the input sequence and len1 is its length   patternSearch carries all the pattern variables   return twice the number of hits*/static Int4 find_hitsL(Int4 *hitArray, const Uint1* seq, Int4 len1, 		       patternSearchItems *patternSearch){    Int4 wordIndex; /*index on words in mask*/    Int4 i; /*loop index on seq */    Int4  *prefixMatchedBitPattern; /*see similar variable in                                      find_hitsS*/    Int4 twiceNumHits = 0; /*counter for hitArray*/    Int4 *mask; /*local copy of match_maskL version of pattern                  indicates after which positions a match can be declared*/    Int4 *matchResult; /*Array of words to hold the result of the                         final test for a match*/    matchResult = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4));    mask = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4));    prefixMatchedBitPattern = (Int4 *) calloc(patternSearch->numWords, sizeof(Int4));    for (wordIndex = 0; wordIndex < patternSearch->numWords; wordIndex++) {      mask[wordIndex] = patternSearch->match_maskL[wordIndex];      prefixMatchedBitPattern[wordIndex] = 0;    }    /*This is a multiword version of the algorithm in find_hitsS*/    lmove(mask, 1, patternSearch);    for (i = 0; i < len1; i++) {      lmove(prefixMatchedBitPattern, 0, patternSearch);      or(prefixMatchedBitPattern, mask, patternSearch);       and(prefixMatchedBitPattern, prefixMatchedBitPattern, patternSearch->bitPatternByLetter[seq[i]], patternSearch);      if (and(matchResult, prefixMatchedBitPattern, patternSearch->match_maskL, patternSearch)) { 	hitArray[twiceNumHits++] = i; 	hitArray[twiceNumHits++] = i-lenofL(matchResult, patternSearch->match_maskL, patternSearch)+1;      }    }    sfree(prefixMatchedBitPattern);     sfree(matchResult);     sfree(mask);    return twiceNumHits;}/*find matches when pattern is very long,  hitArray is used to pass back pairs of end position. start position for hits  seq is the sequence; len is its length  is_dna indicates if the sequence is DNA or protein*/static Int4 find_hitsLL(Int4 *hitArray, const Uint1* seq, Int4 len, Boolean is_dna,		 patternSearchItems *patternSearch){    Int4 twiceNumHits; /*twice the number of matches*/    Int4 twiceHitsOneCall; /*twice the number of hits in one call to                                  find_hitsS */    Int4 wordIndex;  /*index over words in pattern*/    Int4 start; /*start position in sequence for calls to find_hitsS */    Int4 hitArray1[MAX_HIT]; /*used to get hits against different words*/    Int4 nextPosInHitArray; /*next available position in hitArray1 */    Int4 hitIndex1, hitIndex2;  /*indices over hitArray1*/    patternSearch->whichPositionPtr =       patternSearch->SLL[patternSearch->whichMostSpecific];     patternSearch->match_mask =       patternSearch->match_maskL[patternSearch->whichMostSpecific];    if (is_dna) {      patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[patternSearch->whichMostSpecific];      patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[patternSearch->whichMostSpecific];    }    /*find matches to most specific word of pattern*/    twiceNumHits = find_hitsS_head(hitArray, seq, 0, len, is_dna, patternSearch);    if (twiceNumHits < 2)       return 0;    /*extend matches word by word*/    for (wordIndex = patternSearch->whichMostSpecific+1; 	 wordIndex < patternSearch->numWords; wordIndex++) {	patternSearch->whichPositionPtr = 	  patternSearch->SLL[wordIndex]; 	patternSearch->match_mask = patternSearch->match_maskL[wordIndex];	if (is_dna) {	  patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[wordIndex]; 	  patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[wordIndex];	}	nextPosInHitArray = 0;	for (hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) {	  twiceHitsOneCall = find_hitsS_head(&hitArray1[nextPosInHitArray], seq,        hitArray[hitIndex2]+1, MIN(len-hitArray[hitIndex2]-1,        patternSearch->spacing[wordIndex-1] + patternSearch->numPlacesInWord[wordIndex]), is_dna, patternSearch);	  for (hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) {	    hitArray1[nextPosInHitArray+hitIndex1] = 	      hitArray[hitIndex2]+hitArray1[nextPosInHitArray+hitIndex1]+1;	    hitArray1[nextPosInHitArray+hitIndex1+1] = hitArray[hitIndex2+1];	  }	  nextPosInHitArray += twiceHitsOneCall;	}	twiceNumHits = nextPosInHitArray;	if (twiceNumHits < 2) 	  return 0;        /*copy back matches that extend */	for (hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++) 	  hitArray[hitIndex2] = hitArray1[hitIndex2];    }    /*extend each match back one word at a time*/    for (wordIndex = patternSearch->whichMostSpecific-1; wordIndex >=0; 	 wordIndex--) {      patternSearch->whichPositionPtr = 	patternSearch->SLL[wordIndex];       patternSearch->match_mask = patternSearch->match_maskL[wordIndex];      if (is_dna) {	patternSearch->DNAwhichPrefixPosPtr = patternSearch->DNAprefixSLL[wordIndex]; 	patternSearch->DNAwhichSuffixPosPtr = patternSearch->DNAsuffixSLL[wordIndex];      }      nextPosInHitArray = 0;      for (hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) {	start = hitArray[hitIndex2+1]-patternSearch->spacing[wordIndex]-patternSearch->numPlacesInWord[wordIndex];	if (start < 0) 	  start = 0;	twiceHitsOneCall = find_hitsS_head(&hitArray1[nextPosInHitArray], seq, start, 			    hitArray[hitIndex2+1]-start, is_dna, patternSearch);	for (hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) {	  hitArray1[nextPosInHitArray+hitIndex1] = hitArray[hitIndex2];	  hitArray1[nextPosInHitArray+hitIndex1+1] = start + 	    hitArray1[nextPosInHitArray+hitIndex1+1];	}	nextPosInHitArray += twiceHitsOneCall;      }      twiceNumHits = nextPosInHitArray;      if (twiceNumHits < 2) 	return 0;      /*copy back matches that extend*/      for (hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++) 	hitArray[hitIndex2] = hitArray1[hitIndex2];    }    return twiceNumHits;}Int4 FindPatternHits(Int4 *hitArray, const Uint1* seq, Int4 len,                Boolean is_dna, patternSearchItems * patternSearch){    if (patternSearch->flagPatternLength == ONE_WORD_PATTERN)       return find_hitsS_head(hitArray, seq, 0, len, is_dna, patternSearch);    if (patternSearch->flagPatternLength == MULTI_WORD_PATTERN)       return find_hitsL(hitArray, seq, len, patternSearch);    return find_hitsLL(hitArray, seq, len, is_dna, patternSearch);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -