📄 blast_filter.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_filter.c,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:07:16 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.45 * PRODUCTION * =========================================================================== *//* $Id: blast_filter.c,v 1000.3 2004/06/01 18:07:16 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_filter.c * All code related to query sequence masking/filtering for BLAST */static char const rcsid[] = "$Id: blast_filter.c,v 1000.3 2004/06/01 18:07:16 gouriano Exp $";#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_filter.h>#include <algo/blast/core/blast_dust.h>#include <algo/blast/core/blast_seg.h>#ifdef CC_FILTER_ALLOWED#include <algo/blast/core/urkpcc.h>#endif/* The following function will replace BlastSetUp_CreateSSeqRange */BlastSeqLoc* BlastSeqLocNew(Int4 from, Int4 to){ BlastSeqLoc* loc = (BlastSeqLoc*) calloc(1, sizeof(BlastSeqLoc)); SSeqRange* di = (SSeqRange*) malloc(sizeof(SSeqRange)); di->left = from; di->right = to; loc->ptr = di; return loc;}BlastSeqLoc* BlastSeqLocFree(BlastSeqLoc* loc){ SSeqRange* dintp; BlastSeqLoc* next_loc; while (loc) { next_loc = loc->next; dintp = (SSeqRange*) loc->ptr; sfree(dintp); sfree(loc); loc = next_loc; } return NULL;}BlastMaskLoc* BlastMaskLocNew(Int4 index, BlastSeqLoc *loc_list){ BlastMaskLoc* retval = (BlastMaskLoc *) calloc(1, sizeof(BlastMaskLoc)); retval->index = index; retval->loc_list = loc_list; return retval;}BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc){ BlastMaskLoc* next_loc; while (mask_loc) { next_loc = mask_loc->next; BlastSeqLocFree(mask_loc->loc_list); sfree(mask_loc); mask_loc = next_loc; } return NULL;}/** Used for qsort, compares two SeqLoc's by starting position. */static int SSeqRangeSortByStartPosition(const void *vp1, const void *vp2){ ListNode* v1 = *((ListNode**) vp1); ListNode* v2 = *((ListNode**) vp2); SSeqRange* loc1 = (SSeqRange*) v1->ptr; SSeqRange* loc2 = (SSeqRange*) v2->ptr; if (loc1->left < loc2->left) return -1; else if (loc1->left > loc2->left) return 1; else return 0;}/* This will go in place of CombineSeqLocs to combine filtered locations */Int2CombineMaskLocations(BlastSeqLoc* mask_loc, BlastSeqLoc* *mask_loc_out, Int4 link_value){ Int2 status=0; /* return value. */ Int4 start, stop; /* USed to merge overlapping SeqLoc's. */ SSeqRange* di = NULL,* di_next = NULL,* di_tmp=NULL; BlastSeqLoc* loc_head=NULL,* last_loc=NULL,* loc_var=NULL; BlastSeqLoc* new_loc = NULL,* new_loc_last = NULL; if (!mask_loc) { *mask_loc_out = NULL; return 0; } /* Put all the SeqLoc's into one big linked list. */ loc_head = last_loc = (BlastSeqLoc*) BlastMemDup(mask_loc, sizeof(BlastSeqLoc)); /* Copy all locations, so loc points at the end of the chain */ while (last_loc->next) { last_loc->next = (BlastSeqLoc*) BlastMemDup(last_loc->next, sizeof(BlastSeqLoc)); last_loc = last_loc->next; } /* Sort them by starting position. */ loc_head = (BlastSeqLoc*) ListNodeSort ((ListNode*) loc_head, SSeqRangeSortByStartPosition); di = (SSeqRange*) loc_head->ptr; start = di->left; stop = di->right; loc_var = loc_head; while (loc_var) { di = loc_var->ptr; if (loc_var->next) di_next = loc_var->next->ptr; if (di_next && ((stop + link_value) > di_next->left)) { stop = MAX(stop, di_next->right); } else { di_tmp = (SSeqRange*) malloc(sizeof(SSeqRange)); di_tmp->left = start; di_tmp->right = stop; if (!new_loc) new_loc_last = ListNodeAddPointer(&new_loc, 0, di_tmp); else new_loc_last = ListNodeAddPointer(&new_loc_last, 0, di_tmp); if (loc_var->next) { start = di_next->left; stop = di_next->right; } } loc_var = loc_var->next; di_next = NULL; } *mask_loc_out = new_loc; /* Free memory allocated for the temporary list of SeqLocs */ while (loc_head) { loc_var = loc_head->next; sfree(loc_head); loc_head = loc_var; } return status;}Int2 BLAST_ComplementMaskLocations(Uint1 program_number, BlastQueryInfo* query_info, BlastMaskLoc* mask_loc, BlastSeqLoc* *complement_mask) { Int4 start_offset, end_offset, filter_start, filter_end; Int4 context, index; BlastSeqLoc* loc,* last_loc = NULL,* start_loc = NULL; SSeqRange* double_int = NULL,* di; Boolean first; /* Specifies beginning of query. */ Boolean last_interval_open=TRUE; /* if TRUE last interval needs to be closed. */ Boolean reverse = FALSE; const Boolean k_is_na = (program_number == blast_type_blastn); if (complement_mask == NULL) return -1; *complement_mask = NULL; for (context = query_info->first_context; context <= query_info->last_context; ++context) { start_offset = query_info->context_offsets[context]; end_offset = query_info->context_offsets[context+1] - 2; /* For blastn: check if this strand is not searched at all */ if (end_offset < start_offset) continue; index = (k_is_na ? context / 2 : context); reverse = (k_is_na && ((context & 1) != 0)); first = TRUE; if (!reverse) { for ( ; mask_loc && mask_loc->index < index; mask_loc = mask_loc->next); } if (!mask_loc || (mask_loc->index > index) || !mask_loc->loc_list) { /* No masks for this context */ double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange)); double_int->left = start_offset; double_int->right = end_offset; if (!last_loc) last_loc = ListNodeAddPointer(complement_mask, 0, double_int); else last_loc = ListNodeAddPointer(&last_loc, 0, double_int); continue; } if (reverse) { BlastSeqLoc* prev_loc = NULL; /* Reverse the order of the locations */ for (start_loc = mask_loc->loc_list; start_loc; start_loc = start_loc->next) { loc = (BlastSeqLoc*) BlastMemDup(start_loc, sizeof(BlastSeqLoc)); loc->next = prev_loc; prev_loc = loc; } /* Save where this list starts, so it can be freed later */ start_loc = loc; } else { loc = mask_loc->loc_list; } for ( ; loc; loc = loc->next) { di = loc->ptr; if (reverse) { filter_start = end_offset - di->right; filter_end = end_offset - di->left; } else { filter_start = start_offset + di->left; filter_end = start_offset + di->right; } /* The canonical "state" at the top of this while loop is that a SSeqRange has been created and one field was filled in on the last iteration. The first time this loop is entered in a call to the funciton this is not true and the following "if" statement moves everything to the canonical state. */ if (first) { last_interval_open = TRUE; first = FALSE; double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange)); if (filter_start > start_offset) { /* beginning of sequence not filtered */ double_int->left = start_offset; } else { /* beginning of sequence filtered */ double_int->left = filter_end + 1; continue; } } double_int->right = filter_start - 1; if (!last_loc) last_loc = ListNodeAddPointer(complement_mask, 0, double_int); else last_loc = ListNodeAddPointer(&last_loc, 0, double_int); if (filter_end >= end_offset) { /* last masked region at end of sequence */ last_interval_open = FALSE; break; } else { double_int = (SSeqRange*) calloc(1, sizeof(SSeqRange)); double_int->left = filter_end + 1; } } if (reverse) { start_loc = ListNodeFree(start_loc); } if (last_interval_open) { /* Need to finish SSeqRange* for last interval. */ double_int->right = end_offset; if (!last_loc) last_loc = ListNodeAddPointer(complement_mask, 0, double_int); else last_loc = ListNodeAddPointer(&last_loc, 0, double_int); last_interval_open = FALSE; } } return 0;}#define BLASTOPTIONS_BUFFER_SIZE 128/** Parses options used for dust. * @param ptr buffer containing instructions. [in] * @param level sets level for dust. [out] * @param window sets window for dust [out] * @param cutoff sets cutoff for dust. [out] * @param linker sets linker for dust. [out]*/static Int2parse_dust_options(const char *ptr, Int2* level, Int2* window, Int2* cutoff, Int2* linker){ char buffer[BLASTOPTIONS_BUFFER_SIZE]; Int4 arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1, cutoff_pri=-1; long tmplong; arg = 0; index1 = 0; for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++) { if (*ptr == ' ' || *ptr == NULLB) { buffer[index1] = NULLB; index1 = 0; switch(arg) { case 0: sscanf(buffer, "%ld", &tmplong); level_pri = tmplong; break; case 1: sscanf(buffer, "%ld", &tmplong); window_pri = tmplong; break; case 2: sscanf(buffer, "%ld", &tmplong); cutoff_pri = tmplong; break; case 3: sscanf(buffer, "%ld", &tmplong); linker_pri = tmplong; break; default: break; } arg++; while (*ptr == ' ') ptr++; /* end of the buffer. */ if (*ptr == NULLB) break; } else { buffer[index1] = *ptr; ptr++; index1++; } } *level = level_pri; *window = window_pri; *cutoff = cutoff_pri; *linker = linker_pri; return 0;}/** parses a string to set three seg options. * @param ptr buffer containing instructions [in] * @param window returns "window" for seg algorithm. [out] * @param locut returns "locut" for seg. [out] * @param hicut returns "hicut" for seg. [out]*/static Int2parse_seg_options(const char *ptr, Int4* window, double* locut, double* hicut){ char buffer[BLASTOPTIONS_BUFFER_SIZE]; Int4 arg, index, index1; long tmplong; double tmpdouble; arg = 0; index1 = 0; for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++) { if (*ptr == ' ' || *ptr == NULLB) { buffer[index1] = NULLB; index1 = 0; switch(arg) { case 0: sscanf(buffer, "%ld", &tmplong); *window = tmplong; break; case 1: sscanf(buffer, "%le", &tmpdouble); *locut = tmpdouble; break; case 2: sscanf(buffer, "%le", &tmpdouble); *hicut = tmpdouble; break; default: break; } arg++; while (*ptr == ' ') ptr++; /* end of the buffer. */ if (*ptr == NULLB) break; } else { buffer[index1] = *ptr; ptr++; index1++; } } return 0;}#ifdef CC_FILTER_ALLOWED/** Coiled-coiled algorithm parameters. * @param ptr buffer containing instructions. [in] * @param window returns window for coil-coiled algorithm. [out] * @param cutoff cutoff for coil-coiled algorithm [out] * @param linker returns linker [out]*/static Int2parse_cc_options(const char *ptr, Int4* window, double* cutoff, Int4* linker){ char buffer[BLASTOPTIONS_BUFFER_SIZE]; Int4 arg, index, index1; long tmplong; double tmpdouble; arg = 0; index1 = 0; for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++) { if (*ptr == ' ' || *ptr == NULLB) { buffer[index1] = NULLB; index1 = 0; switch(arg) { case 0: sscanf(buffer, "%ld", &tmplong); *window = tmplong; break; case 1: sscanf(buffer, "%le", &tmpdouble); *cutoff = tmpdouble; break; case 2: sscanf(buffer, "%ld", &tmplong); *linker = tmplong; break; default: break; } arg++; while (*ptr == ' ') ptr++; /* end of the buffer. */ if (*ptr == NULLB) break; } else { buffer[index1] = *ptr; ptr++; index1++; } } return 0;}#endif/** Copies filtering commands for one filtering algorithm from "instructions" to * "buffer". * ";" is a delimiter for the commands for different algorithms, so it stops * copying when a ";" is found. * Example filtering string: "m L; R -d rodents.lib" * @param instructions filtering commands [in] * @param buffer filled with filtering commands for one algorithm. [out]*/static const char *BlastSetUp_load_options_to_buffer(const char *instructions, char* buffer){ Boolean not_started=TRUE; char* buffer_ptr; const char *ptr; Int4 index; ptr = instructions; buffer_ptr = buffer; for (index=0; index<BLASTOPTIONS_BUFFER_SIZE && *ptr != NULLB; index++) { if (*ptr == ';') { /* ";" is a delimiter for different filtering algorithms. */ ptr++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -