📄 blast_setup.c
字号:
} else { char* p = NULL; sbp->read_in_matrix = TRUE; BLAST_ScoreSetAmbigRes(sbp, 'X'); sbp->name = strdup(scoring_options->matrix); /* protein matrices are in all caps by convention */ for (p = sbp->name; *p != NULLB; p++) *p = toupper(*p); } return BLAST_ScoreBlkMatFill(sbp, scoring_options->matrix_path);}Int2 BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info, const BlastScoringOptions* scoring_options, Uint1 program_number, Boolean phi_align, BlastScoreBlk* *sbpp, double scale_factor, Blast_Message* *blast_message){ BlastScoreBlk* sbp; Int2 status=0; /* return value. */ Int4 context; /* loop variable. */ if (sbpp == NULL) return 1; if (program_number == blast_type_blastn) sbp = BlastScoreBlkNew(BLASTNA_SEQ_CODE, query_info->last_context + 1); else sbp = BlastScoreBlkNew(BLASTAA_SEQ_CODE, query_info->last_context + 1); if (!sbp) return 1; *sbpp = sbp; sbp->scale_factor = scale_factor; status = BlastScoreBlkMatrixInit(program_number, scoring_options, sbp); if (status != 0) return status; for (context = query_info->first_context; context <= query_info->last_context; ++context) { Int4 context_offset; Int4 query_length; Uint1 *buffer; /* holds sequence */ /* For each query, check if forward strand is present */ if ((query_length = BLAST_GetQueryLength(query_info, context)) < 0) continue; context_offset = query_info->context_offsets[context]; buffer = &query_blk->sequence[context_offset]; if (!phi_align && (status = BLAST_ScoreBlkFill(sbp, (char *) buffer, query_length, context))) { Blast_MessageWrite(blast_message, BLAST_SEV_ERROR, 2, 1, "Query completely filtered; nothing left to search"); return status; } } /* Fills in block for gapped blast. */ if (phi_align) { PHIScoreBlkFill(sbp, scoring_options, blast_message); } else if (scoring_options->gapped_calculation) { status = BlastScoreBlkGappedFill(sbp, scoring_options, program_number, query_info); if (status) { Blast_MessageWrite(blast_message, BLAST_SEV_ERROR, 2, 1, "Unable to initialize scoring block"); return status; } } /* Get "ideal" values if the calculated Karlin-Altschul params bad. */ if (program_number == blast_type_blastx || program_number == blast_type_tblastx || program_number == blast_type_rpstblastn) { /* Adjust the ungapped Karlin parameters */ sbp->kbp = sbp->kbp_std; Blast_KarlinBlkStandardCalc(sbp, query_info->first_context, query_info->last_context); } if (program_number == blast_type_blastx || program_number == blast_type_tblastx) { /* Adjust the gapped Karlin parameters, if it is a gapped search */ if (scoring_options->gapped_calculation) { sbp->kbp = sbp->kbp_gap_std; Blast_KarlinBlkStandardCalc(sbp, query_info->first_context, query_info->last_context); } } /* Why are there so many Karlin block names?? */ sbp->kbp = sbp->kbp_std; if (scoring_options->gapped_calculation) sbp->kbp_gap = sbp->kbp_gap_std; return 0;}Int2 BLAST_MainSetUp(Uint1 program_number, const QuerySetUpOptions *qsup_options, const BlastScoringOptions *scoring_options, const BlastHitSavingOptions *hit_options, BLAST_SequenceBlk *query_blk, BlastQueryInfo *query_info, double scale_factor, BlastSeqLoc **lookup_segments, BlastMaskLoc **filter_out, BlastScoreBlk **sbpp, Blast_Message **blast_message){ Boolean mask_at_hash = FALSE; /* mask only for making lookup table? */ Int2 status = 0; /* return value */ BlastMaskLoc *filter_maskloc = NULL; /* Local variable for mask locs. */ status = BlastSetUp_GetFilteringLocations(query_blk, query_info, program_number, qsup_options->filter_string, &filter_maskloc, &mask_at_hash, blast_message); if (status) { return status; } if (!mask_at_hash) { status = BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc, program_number); if (status != 0) { return status; } } /* If there was a lower case mask, its contents have now been moved to * filter_maskloc and are no longer needed in the query block. */ query_blk->lcase_mask = NULL; if (filter_out) *filter_out = filter_maskloc; else filter_maskloc = BlastMaskLocFree(filter_maskloc); if (program_number == blast_type_blastx && scoring_options->is_ooframe) { BLAST_InitDNAPSequence(query_blk, query_info); } BLAST_ComplementMaskLocations(program_number, query_info, filter_maskloc, lookup_segments); status = BlastSetup_GetScoreBlock(query_blk, query_info, scoring_options, program_number, hit_options->phi_align, sbpp, scale_factor, blast_message); if (status > 0) { return status; } return 0;}Int2 BLAST_CalcEffLengths (Uint1 program_number, const BlastScoringOptions* scoring_options, const BlastEffectiveLengthsParameters* eff_len_params, const BlastScoreBlk* sbp, BlastQueryInfo* query_info){ double alpha=0, beta=0; /*alpha and beta for new scoring system */ Int4 length_adjustment = 0; /* length adjustment for current iteration. */ Int4 index; /* loop index. */ Int4 db_num_seqs; /* number of sequences in database. */ Int8 db_length; /* total length of database. */ Blast_KarlinBlk* *kbp_ptr; /* Array of Karlin block pointers */ Int4 query_length; /* length of an individual query sequence */ Int8 effective_search_space = 0; /* Effective search space for a given sequence/strand/frame */ const BlastEffectiveLengthsOptions* eff_len_options = eff_len_params->options; if (sbp == NULL) return 1; /* use overriding value from effective lengths options or the real value from effective lengths parameters. */ if (eff_len_options->db_length > 0) db_length = eff_len_options->db_length; else db_length = eff_len_params->real_db_length; /* If database (subject) length is not available at this stage, do nothing */ if (db_length == 0) return 0; if (program_number == blast_type_tblastn || program_number == blast_type_tblastx) db_length = db_length/3; if (eff_len_options->dbseq_num > 0) db_num_seqs = eff_len_options->dbseq_num; else db_num_seqs = eff_len_params->real_num_seqs; if (program_number != blast_type_blastn) { if (scoring_options->gapped_calculation) { BLAST_GetAlphaBeta(sbp->name,&alpha,&beta,TRUE, scoring_options->gap_open, scoring_options->gap_extend); } } if (scoring_options->gapped_calculation) kbp_ptr = sbp->kbp_gap_std; else kbp_ptr = sbp->kbp_std; for (index = query_info->first_context; index <= query_info->last_context; index++) { Blast_KarlinBlk * kbp; /* statistical parameters for the current context */ kbp = kbp_ptr[index]; if ( (query_length = BLAST_GetQueryLength(query_info, index)) > 0) { /* Use the correct Karlin block. For blastn, two identical Karlin blocks are allocated for each sequence (one per strand), but we only need one of them. */ if (program_number != blast_type_blastn && scoring_options->gapped_calculation) { BLAST_ComputeLengthAdjustment(kbp->K, kbp->logK, alpha/kbp->Lambda, beta, query_length, db_length, db_num_seqs, &length_adjustment); } else { BLAST_ComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0, query_length, db_length, db_num_seqs, &length_adjustment); } /* If overriding search space value is provided in options, do not calculate it. */ if (eff_len_options->searchsp_eff) { effective_search_space = eff_len_options->searchsp_eff; } else { effective_search_space = (query_length - length_adjustment) * (db_length - db_num_seqs*length_adjustment); /* For translated RPS blast, the DB size is left unchanged and the query size is divided by 3 (for conversion to a protein sequence) and multiplied by 6 (for 6 frames) */ if (program_number == blast_type_rpstblastn) effective_search_space *= (Int8)(NUM_FRAMES / CODON_LENGTH); } } query_info->eff_searchsp_array[index] = effective_search_space; query_info->length_adjustments[index] = length_adjustment; } return 0;}Int2 BLAST_GapAlignSetUp(Uint1 program_number, const BlastSeqSrc* seq_src, const BlastScoringOptions* scoring_options, const BlastEffectiveLengthsOptions* eff_len_options, const BlastExtensionOptions* ext_options, const BlastHitSavingOptions* hit_options, BlastQueryInfo* query_info, BlastScoreBlk* sbp, BlastScoringParameters** score_params, BlastExtensionParameters** ext_params, BlastHitSavingParameters** hit_params, BlastEffectiveLengthsParameters** eff_len_params, BlastGapAlignStruct** gap_align){ Int2 status = 0; Uint4 max_subject_length; Int8 total_length; Int4 num_seqs; total_length = BLASTSeqSrcGetTotLen(seq_src); if (total_length > 0) { num_seqs = BLASTSeqSrcGetNumSeqs(seq_src); } else { /* Not a database search; each subject sequence is considered individually */ num_seqs = 1; } /* Initialize the effective length parameters with real values of database length and number of sequences */ BlastEffectiveLengthsParametersNew(eff_len_options, total_length, num_seqs, eff_len_params); if ((status = BLAST_CalcEffLengths(program_number, scoring_options, *eff_len_params, sbp, query_info)) != 0) return status; BlastScoringParametersNew(scoring_options, sbp, score_params); BlastExtensionParametersNew(program_number, ext_options, sbp, query_info, ext_params); BlastHitSavingParametersNew(program_number, hit_options, *ext_params, sbp, query_info, hit_params); /* To initialize the gapped alignment structure, we need to know the maximal subject sequence length */ max_subject_length = BLASTSeqSrcGetMaxSeqLen(seq_src); if ((status = BLAST_GapAlignStructNew(*score_params, *ext_params, max_subject_length, query_info->max_length, sbp, gap_align)) != 0) { return status; } return status;}Int2 BLAST_OneSubjectUpdateParameters(Uint1 program_number, Uint4 subject_length, const BlastScoringOptions* scoring_options, BlastQueryInfo* query_info, BlastScoreBlk* sbp, const BlastExtensionParameters* ext_params, BlastHitSavingParameters* hit_params, BlastInitialWordParameters* word_params, BlastEffectiveLengthsParameters* eff_len_params){ Int2 status = 0; eff_len_params->real_db_length = subject_length; if ((status = BLAST_CalcEffLengths(program_number, scoring_options, eff_len_params, sbp, query_info)) != 0) return status; /* Update cutoff scores in hit saving parameters */ BlastHitSavingParametersUpdate(program_number, ext_params, sbp, query_info, hit_params); if (word_params) { /* Update cutoff scores in initial word parameters */ BlastInitialWordParametersUpdate(program_number, hit_params, ext_params, sbp, query_info, subject_length, word_params); } return status;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -