⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_filter.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_filter.c,v $ * PRODUCTION Revision 1000.3  2004/06/01 18:07:16  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.45 * PRODUCTION * =========================================================================== *//* $Id: blast_filter.c,v 1000.3 2004/06/01 18:07:16 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's offical duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_filter.c * All code related to query sequence masking/filtering for BLAST */static char const rcsid[] =     "$Id: blast_filter.c,v 1000.3 2004/06/01 18:07:16 gouriano Exp $";#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_filter.h>#include <algo/blast/core/blast_dust.h>#include <algo/blast/core/blast_seg.h>#ifdef CC_FILTER_ALLOWED#include <algo/blast/core/urkpcc.h>#endif/* The following function will replace BlastSetUp_CreateSSeqRange */BlastSeqLoc* BlastSeqLocNew(Int4 from, Int4 to){   BlastSeqLoc* loc = (BlastSeqLoc*) calloc(1, sizeof(BlastSeqLoc));   SSeqRange* di = (SSeqRange*) malloc(sizeof(SSeqRange));   di->left = from;   di->right = to;   loc->ptr = di;   return loc;}BlastSeqLoc* BlastSeqLocFree(BlastSeqLoc* loc){   SSeqRange* dintp;   BlastSeqLoc* next_loc;   while (loc) {      next_loc = loc->next;      dintp = (SSeqRange*) loc->ptr;      sfree(dintp);      sfree(loc);      loc = next_loc;   }   return NULL;}BlastMaskLoc* BlastMaskLocNew(Int4 index, BlastSeqLoc *loc_list){      BlastMaskLoc* retval = (BlastMaskLoc *) calloc(1, sizeof(BlastMaskLoc));      retval->index = index;      retval->loc_list = loc_list;      return retval;}BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc){   BlastMaskLoc* next_loc;   while (mask_loc) {      next_loc = mask_loc->next;      BlastSeqLocFree(mask_loc->loc_list);      sfree(mask_loc);      mask_loc = next_loc;   }   return NULL;}/** Used for qsort, compares two SeqLoc's by starting position. */static int SSeqRangeSortByStartPosition(const void *vp1, const void *vp2){   ListNode* v1 = *((ListNode**) vp1);   ListNode* v2 = *((ListNode**) vp2);   SSeqRange* loc1 = (SSeqRange*) v1->ptr;   SSeqRange* loc2 = (SSeqRange*) v2->ptr;      if (loc1->left < loc2->left)      return -1;   else if (loc1->left > loc2->left)      return 1;   else      return 0;}/* This will go in place of CombineSeqLocs to combine filtered locations */Int2CombineMaskLocations(BlastSeqLoc* mask_loc, BlastSeqLoc* *mask_loc_out,                     Int4 link_value){   Int2 status=0;		/* return value. */   Int4 start, stop;	/* USed to merge overlapping SeqLoc's. */   SSeqRange* di = NULL,* di_next = NULL,* di_tmp=NULL;   BlastSeqLoc* loc_head=NULL,* last_loc=NULL,* loc_var=NULL;   BlastSeqLoc* new_loc = NULL,* new_loc_last = NULL;      if (!mask_loc) {      *mask_loc_out = NULL;      return 0;   }   /* Put all the SeqLoc's into one big linked list. */   loc_head = last_loc =       (BlastSeqLoc*) BlastMemDup(mask_loc, sizeof(BlastSeqLoc));            /* Copy all locations, so loc points at the end of the chain */   while (last_loc->next) {		      last_loc->next =          (BlastSeqLoc*) BlastMemDup(last_loc->next, sizeof(BlastSeqLoc));      last_loc = last_loc->next;   }      /* Sort them by starting position. */   loc_head = (BlastSeqLoc*)       ListNodeSort ((ListNode*) loc_head,                    SSeqRangeSortByStartPosition);      di = (SSeqRange*) loc_head->ptr;   start = di->left;   stop = di->right;   loc_var = loc_head;      while (loc_var) {      di = loc_var->ptr;      if (loc_var->next)         di_next = loc_var->next->ptr;      if (di_next && ((stop + link_value) > di_next->left)) {         stop = MAX(stop, di_next->right);      } else {         di_tmp = (SSeqRange*) malloc(sizeof(SSeqRange));         di_tmp->left = start;         di_tmp->right = stop;         if (!new_loc)            new_loc_last = ListNodeAddPointer(&new_loc, 0, di_tmp);         else            new_loc_last = ListNodeAddPointer(&new_loc_last, 0, di_tmp);         if (loc_var->next) {             start = di_next->left;             stop = di_next->right;         }      }      loc_var = loc_var->next;      di_next = NULL;   }   *mask_loc_out = new_loc;         /* Free memory allocated for the temporary list of SeqLocs */   while (loc_head) {      loc_var = loc_head->next;      sfree(loc_head);      loc_head = loc_var;   }   return status;}Int2 BLAST_ComplementMaskLocations(Uint1 program_number,    BlastQueryInfo* query_info,    BlastMaskLoc* mask_loc, BlastSeqLoc* *complement_mask) {   Int4 start_offset, end_offset, filter_start, filter_end;   Int4 context, index;   BlastSeqLoc* loc,* last_loc = NULL,* start_loc = NULL;   SSeqRange* double_int = NULL,* di;   Boolean first;	/* Specifies beginning of query. */   Boolean last_interval_open=TRUE; /* if TRUE last interval needs to be closed. */   Boolean reverse = FALSE;   const Boolean k_is_na = (program_number == blast_type_blastn);   if (complement_mask == NULL)	return -1;   *complement_mask = NULL;   for (context = query_info->first_context;         context <= query_info->last_context; ++context) {      start_offset = query_info->context_offsets[context];      end_offset = query_info->context_offsets[context+1] - 2;      /* For blastn: check if this strand is not searched at all */      if (end_offset < start_offset)          continue;      index = (k_is_na ? context / 2 : context);      reverse = (k_is_na && ((context & 1) != 0));      first = TRUE;      if (!reverse) {         for ( ; mask_loc && mask_loc->index < index;                mask_loc = mask_loc->next);      }      if (!mask_loc || (mask_loc->index > index) ||          !mask_loc->loc_list) {         /* No masks for this context */         double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange));         double_int->left = start_offset;         double_int->right = end_offset;         if (!last_loc)            last_loc = ListNodeAddPointer(complement_mask, 0, double_int);         else             last_loc = ListNodeAddPointer(&last_loc, 0, double_int);         continue;      }            if (reverse) {         BlastSeqLoc* prev_loc = NULL;         /* Reverse the order of the locations */         for (start_loc = mask_loc->loc_list; start_loc;               start_loc = start_loc->next) {            loc = (BlastSeqLoc*) BlastMemDup(start_loc, sizeof(BlastSeqLoc));            loc->next = prev_loc;            prev_loc = loc;         }         /* Save where this list starts, so it can be freed later */         start_loc = loc;      } else {         loc = mask_loc->loc_list;      }      for ( ; loc; loc = loc->next) {         di = loc->ptr;         if (reverse) {            filter_start = end_offset - di->right;            filter_end = end_offset - di->left;         } else {            filter_start = start_offset + di->left;            filter_end = start_offset + di->right;         }         /* The canonical "state" at the top of this             while loop is that a SSeqRange has been             created and one field was filled in on the             last iteration. The first time this loop is             entered in a call to the funciton this is not            true and the following "if" statement moves             everything to the canonical state. */         if (first) {            last_interval_open = TRUE;            first = FALSE;            double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange));                        if (filter_start > start_offset) {               /* beginning of sequence not filtered */               double_int->left = start_offset;            } else {               /* beginning of sequence filtered */               double_int->left = filter_end + 1;               continue;            }         }         double_int->right = filter_start - 1;         if (!last_loc)            last_loc = ListNodeAddPointer(complement_mask, 0, double_int);         else             last_loc = ListNodeAddPointer(&last_loc, 0, double_int);         if (filter_end >= end_offset) {            /* last masked region at end of sequence */            last_interval_open = FALSE;            break;         } else {            double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange));               double_int->left = filter_end + 1;         }      }      if (reverse) {         start_loc = ListNodeFree(start_loc);      }            if (last_interval_open) {         /* Need to finish SSeqRange* for last interval. */         double_int->right = end_offset;         if (!last_loc)            last_loc = ListNodeAddPointer(complement_mask, 0, double_int);         else             last_loc = ListNodeAddPointer(&last_loc, 0, double_int);         last_interval_open = FALSE;      }   }   return 0;}#define BLASTOPTIONS_BUFFER_SIZE 128/** Parses options used for dust. * @param ptr buffer containing instructions. [in] * @param level sets level for dust. [out] * @param window sets window for dust [out] * @param cutoff sets cutoff for dust. [out]  * @param linker sets linker for dust. [out]*/static Int2parse_dust_options(const char *ptr, Int2* level, Int2* window,	Int2* cutoff, Int2* linker){	char buffer[BLASTOPTIONS_BUFFER_SIZE];	Int4 arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1, cutoff_pri=-1;	long	tmplong;	arg = 0;	index1 = 0;	for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)	{		if (*ptr == ' ' || *ptr == NULLB)		{			buffer[index1] = NULLB;			index1 = 0;			switch(arg) {				case 0:					sscanf(buffer, "%ld", &tmplong);					level_pri = tmplong;					break;				case 1:					sscanf(buffer, "%ld", &tmplong);					window_pri = tmplong;					break;				case 2:					sscanf(buffer, "%ld", &tmplong);					cutoff_pri = tmplong;					break;				case 3:					sscanf(buffer, "%ld", &tmplong);					linker_pri = tmplong;					break;				default:					break;			}			arg++;			while (*ptr == ' ')				ptr++;			/* end of the buffer. */			if (*ptr == NULLB)				break;		}		else		{			buffer[index1] = *ptr; ptr++;			index1++;		}	}	*level = level_pri; 	*window = window_pri; 	*cutoff = cutoff_pri; 	*linker = linker_pri; 	return 0;}/** parses a string to set three seg options.  * @param ptr buffer containing instructions [in] * @param window returns "window" for seg algorithm. [out] * @param locut returns "locut" for seg. [out] * @param hicut returns "hicut" for seg. [out]*/static Int2parse_seg_options(const char *ptr, Int4* window, double* locut, double* hicut){	char buffer[BLASTOPTIONS_BUFFER_SIZE];	Int4 arg, index, index1; 	long	tmplong;	double	tmpdouble;	arg = 0;	index1 = 0;	for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)	{		if (*ptr == ' ' || *ptr == NULLB)		{			buffer[index1] = NULLB;			index1 = 0;			switch(arg) {				case 0:					sscanf(buffer, "%ld", &tmplong);					*window = tmplong;					break;				case 1:					sscanf(buffer, "%le", &tmpdouble);					*locut = tmpdouble;					break;				case 2:					sscanf(buffer, "%le", &tmpdouble);					*hicut = tmpdouble;					break;				default:					break;			}			arg++;			while (*ptr == ' ')				ptr++;			/* end of the buffer. */			if (*ptr == NULLB)				break;		}		else		{			buffer[index1] = *ptr; ptr++;			index1++;		}	}	return 0;}#ifdef CC_FILTER_ALLOWED/** Coiled-coiled algorithm parameters.  * @param ptr buffer containing instructions. [in] * @param window returns window for coil-coiled algorithm. [out] * @param cutoff cutoff for coil-coiled algorithm [out] * @param linker returns linker [out]*/static Int2parse_cc_options(const char *ptr, Int4* window, double* cutoff, Int4* linker){	char buffer[BLASTOPTIONS_BUFFER_SIZE];	Int4 arg, index, index1;	long	tmplong;	double	tmpdouble;	arg = 0;	index1 = 0;	for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)	{		if (*ptr == ' ' || *ptr == NULLB)		{			buffer[index1] = NULLB;			index1 = 0;			switch(arg) {				case 0:					sscanf(buffer, "%ld", &tmplong);					*window = tmplong;					break;				case 1:					sscanf(buffer, "%le", &tmpdouble);					*cutoff = tmpdouble;					break;				case 2:					sscanf(buffer, "%ld", &tmplong);					*linker = tmplong;					break;				default:					break;			}			arg++;			while (*ptr == ' ')				ptr++;			/* end of the buffer. */			if (*ptr == NULLB)				break;		}		else		{			buffer[index1] = *ptr; ptr++;			index1++;		}	}	return 0;}#endif/** Copies filtering commands for one filtering algorithm from "instructions" to * "buffer".  * ";" is a delimiter for the commands for different algorithms, so it stops * copying when a ";" is found.  * Example filtering string: "m L; R -d rodents.lib" * @param instructions filtering commands [in]  * @param buffer filled with filtering commands for one algorithm. [out]*/static const char *BlastSetUp_load_options_to_buffer(const char *instructions, char* buffer){	Boolean not_started=TRUE;	char* buffer_ptr;	const char *ptr;	Int4 index;	ptr = instructions;	buffer_ptr = buffer;	for (index=0; index<BLASTOPTIONS_BUFFER_SIZE && *ptr != NULLB; index++)	{		if (*ptr == ';')		{	/* ";" is a delimiter for different filtering algorithms. */			ptr++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -