⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_lookup.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_lookup.c,v $ * PRODUCTION Revision 1000.3  2004/06/01 18:07:25  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.25 * PRODUCTION * =========================================================================== *//* $Id: blast_lookup.c,v 1000.3 2004/06/01 18:07:25 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's offical duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== *//** @file blast_lookup.c * @todo FIXME needs file description */#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_options.h>#include <algo/blast/core/blast_lookup.h>#include <algo/blast/core/blast_rps.h>#include <algo/blast/core/lookup_util.h>#include <algo/blast/core/blast_encoding.h>#include "blast_inline.h"static char const rcsid[] =     "$Id: blast_lookup.c,v 1000.3 2004/06/01 18:07:25 gouriano Exp $";static void AddWordHits( LookupTable *lookup,			 Int4** matrix,			 Uint1* word,			 Int4 offset,                         Int4 query_bias);static void AddPSSMWordHits( LookupTable *lookup,			 Int4** matrix,			 Int4 offset,                         Int4 query_bias);Int4 BlastAaLookupNew(const LookupTableOptions* opt,		      LookupTable* * lut){  return LookupTableNew(opt, lut, TRUE);}Int4 RPSLookupTableNew(const RPSInfo *info,		      RPSLookupTable* * lut){   Int4 i;   RPSLookupFileHeader *lookup_header;   RPSProfileHeader *profile_header;   RPSLookupTable* lookup = *lut =       (RPSLookupTable*) calloc(1, sizeof(RPSLookupTable));   Int4* pssm_start;   Int4 num_profiles;   Int4 num_pssm_rows;   Int4 longest_chain;   ASSERT(info != NULL);   /* Fill in the lookup table information. */   lookup_header = info->lookup_header;   if (lookup_header->magic_number != RPS_MAGIC_NUM)      return -1;   lookup->rps_aux_info = (RPSAuxInfo *)(&info->aux_info);   lookup->wordsize = BLAST_WORDSIZE_PROT;   lookup->alphabet_size = PSI_ALPHABET_SIZE;   lookup->charsize = ilog2(lookup->alphabet_size) + 1;   lookup->backbone_size = 1 << (lookup->wordsize * lookup->charsize);   lookup->mask = lookup->backbone_size - 1;   lookup->rps_backbone = (RPSBackboneCell *)((Uint1 *)lookup_header +                           lookup_header->start_of_backbone);   lookup->overflow = (Int4 *)((Uint1 *)lookup_header +    			lookup_header->start_of_backbone + 			(lookup->backbone_size + 1)* sizeof(RPSBackboneCell));   lookup->overflow_size = lookup_header->overflow_hits;   /* fill in the pv_array */      lookup->pv = (PV_ARRAY_TYPE *)      calloc((lookup->backbone_size >> PV_ARRAY_BTS) , sizeof(PV_ARRAY_TYPE));   longest_chain = 0;   for (i = 0; i < lookup->backbone_size; i++) {      if (lookup->rps_backbone[i].num_used > 0) {	 PV_SET(lookup,i);      }      if (lookup->rps_backbone[i].num_used > longest_chain) {         longest_chain = lookup->rps_backbone[i].num_used;      }   }   lookup->longest_chain = longest_chain;   /* Fill in the PSSM information */   profile_header = info->profile_header;   if (profile_header->magic_number != RPS_MAGIC_NUM)      return -2;   lookup->rps_seq_offsets = profile_header->start_offsets;   num_profiles = profile_header->num_profiles;   num_pssm_rows = lookup->rps_seq_offsets[num_profiles];   lookup->rps_pssm = (Int4 **)malloc((num_pssm_rows+1) * sizeof(Int4 *));   pssm_start = profile_header->start_offsets + num_profiles + 1;   for (i = 0; i < num_pssm_rows + 1; i++) {      lookup->rps_pssm[i] = pssm_start;      pssm_start += lookup->alphabet_size;   }   return 0;}Int4 LookupTableNew(const LookupTableOptions* opt,		      LookupTable* * lut,		      Boolean is_protein){   LookupTable* lookup = *lut =       (LookupTable*) calloc(1, sizeof(LookupTable));   ASSERT(lookup != NULL);  if (is_protein) {    Int4 i;    lookup->charsize = ilog2(opt->alphabet_size) + 1;    lookup->wordsize = opt->word_size;    lookup->backbone_size = 0;    for(i=0;i<lookup->wordsize;i++)        lookup->backbone_size |= (opt->alphabet_size - 1) << (i * lookup->charsize);    lookup->backbone_size += 1;    lookup->mask = makemask(opt->word_size * lookup->charsize);  } else {     lookup->word_length = opt->word_size;     lookup->scan_step = opt->scan_step;     lookup->charsize = ilog2(opt->alphabet_size / COMPRESSION_RATIO);      lookup->wordsize =         (opt->word_size - MIN(lookup->scan_step,COMPRESSION_RATIO) + 1)         / COMPRESSION_RATIO;     lookup->reduced_wordsize = (lookup->wordsize >= 2) ? 2 : 1;     lookup->backbone_size =        iexp(2,lookup->reduced_wordsize*lookup->charsize*COMPRESSION_RATIO);     lookup->mask = lookup->backbone_size - 1;  }  lookup->alphabet_size = opt->alphabet_size;  lookup->exact_matches=0;  lookup->neighbor_matches=0;  lookup->threshold = opt->threshold;  lookup->thin_backbone =      (Int4**) calloc(lookup->backbone_size , sizeof(Int4*));  ASSERT(lookup->thin_backbone != NULL);  lookup->use_pssm = opt->use_pssm;  lookup->neighbors=NULL;  lookup->overflow=NULL;  return 0;}Int4 BlastAaLookupAddWordHit(LookupTable* lookup, /* in/out: the lookup table */                             Uint1* w,			     Int4 query_offset){  Int4 index=0;  Int4 chain_size = 0; /* total number of elements in the chain */  Int4 hits_in_chain = 0; /* number of occupied elements in the chain, not including the zeroth and first positions */   Int4 * chain = NULL;      /* compute its index, */  _ComputeIndex(lookup->wordsize,lookup->charsize,lookup->mask, w, &index);  ASSERT(index < lookup->backbone_size);  /* if backbone cell is null, initialize a new chain */  if (lookup->thin_backbone[index] == NULL)    {      chain_size = 8;      hits_in_chain = 0;      chain = (Int4*) calloc( chain_size, sizeof(Int4) );      ASSERT(chain != NULL);      chain[0] = chain_size;      chain[1] = hits_in_chain;      lookup->thin_backbone[index] = chain;    }  else    /* otherwise, use the existing chain */    {      chain = lookup->thin_backbone[index];      chain_size = chain[0];      hits_in_chain = chain[1];    }    /* if the chain is full, allocate more room */  if ( (hits_in_chain + 2) == chain_size )    {      chain_size = chain_size * 2;      chain = (Int4*) realloc(chain, chain_size * sizeof(Int4) );      ASSERT(chain != NULL);      lookup->thin_backbone[index] = chain;      chain[0] = chain_size;    }    /* add the hit */  chain[ chain[1] + 2 ] = query_offset;  chain[1] += 1;  return 0;}Int4 _BlastAaLookupFinalize(LookupTable* lookup){  Int4 i;  Int4 overflow_cells_needed=0;  Int4 overflow_cursor = 0;  Int4 backbone_occupancy=0;  Int4 thick_backbone_occupancy=0;  Int4 num_overflows=0;  Int4 longest_chain=0;  /* allocate the new lookup table */ lookup->thick_backbone = (LookupBackboneCell *)    calloc(lookup->backbone_size , sizeof(LookupBackboneCell));    ASSERT(lookup->thick_backbone != NULL); /* allocate the pv_array */ lookup->pv = (PV_ARRAY_TYPE *)    calloc((lookup->backbone_size >> PV_ARRAY_BTS) + 1 , sizeof(PV_ARRAY_TYPE));  ASSERT(lookup->pv != NULL); /* find out how many cells have >3 hits */ for(i=0;i<lookup->backbone_size;i++)   if (lookup->thin_backbone[i] != NULL)     {     if (lookup->thin_backbone[i][1] > HITS_ON_BACKBONE)       overflow_cells_needed += lookup->thin_backbone[i][1];     if (lookup->thin_backbone[i][1] > longest_chain)       longest_chain = lookup->thin_backbone[i][1];     } lookup->longest_chain = longest_chain; /* allocate the overflow array */ lookup->overflow = (Int4*) calloc( overflow_cells_needed, sizeof(Int4) ); ASSERT(lookup->overflow != NULL);/* for each position in the lookup table backbone, */for(i=0;i<lookup->backbone_size;i++)    {	    /* if there are hits there, */    if ( lookup->thin_backbone[i] != NULL )        {        /* set the corresponding bit in the pv_array */	  PV_SET(lookup,i);	  backbone_occupancy++;        /* if there are three or fewer hits, */        if ( (lookup->thin_backbone[i])[1] <= HITS_ON_BACKBONE )            /* copy them into the thick_backbone cell */            {            Int4 j;	    thick_backbone_occupancy++;	    lookup->thick_backbone[i].num_used = lookup->thin_backbone[i][1];            for(j=0;j<lookup->thin_backbone[i][1];j++)                lookup->thick_backbone[i].payload.entries[j] = lookup->thin_backbone[i][j+2];            }        else	  /* more than three hits; copy to overflow array */            {	      Int4 j;	      num_overflows++;	      lookup->thick_backbone[i].num_used = lookup->thin_backbone[i][1];	      lookup->thick_backbone[i].payload.overflow_cursor = overflow_cursor;	      for(j=0;j<lookup->thin_backbone[i][1];j++)		{                lookup->overflow[overflow_cursor] = lookup->thin_backbone[i][j+2];		overflow_cursor++;		}            }    /* done with this chain- free it */        sfree(lookup->thin_backbone[i]);	lookup->thin_backbone[i]=NULL;        }    else        /* no hits here */        {        lookup->thick_backbone[i].num_used=0;        }    } /* end for */ lookup->overflow_size = overflow_cursor;/* done copying hit info- free the backbone */ sfree(lookup->thin_backbone); lookup->thin_backbone=NULL;#ifdef LOOKUP_VERBOSE printf("backbone size : %d\nbackbone occupancy: %d (%f%%)\nthick_backbone occupancy: %d (%f%%)\nnum_overflows: %d\noverflow size: %d\nlongest chain: %d\n",lookup->backbone_size, backbone_occupancy, 100.0 * (float) backbone_occupancy/ (float) lookup->backbone_size, thick_backbone_occupancy, 100.0 * (float) thick_backbone_occupancy / (float) lookup->backbone_size, num_overflows, overflow_cells_needed,longest_chain); printf("exact matches : %d\nneighbor matches : %d\n",lookup->exact_matches,lookup->neighbor_matches);#endif return 0;}Int4 BlastAaScanSubject(const LookupTableWrap* lookup_wrap,                        const BLAST_SequenceBlk *subject,                        Int4* offset,                        Uint4 * NCBI_RESTRICT query_offsets,                        Uint4 * NCBI_RESTRICT subject_offsets,                        Int4 array_size		   ){  Int4 index=0;  Uint1* s=NULL;  Uint1* s_first=NULL;  Uint1* s_last=NULL;  Int4 numhits = 0; /* number of hits found for a given subject offset */  Int4 totalhits = 0; /* cumulative number of hits found */  LookupTable* lookup = lookup_wrap->lut;  s_first = subject->sequence + *offset;  s_last  = subject->sequence + subject->length - lookup->wordsize;   _ComputeIndex(lookup->wordsize - 1, /* prime the index */		lookup->charsize,		lookup->mask,		s_first,		&index);  for(s=s_first; s <= s_last; s++)    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -