📄 blast_util.c
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_util.c,v $ * PRODUCTION Revision 1000.4 2004/06/01 18:07:56 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.70 * PRODUCTION * =========================================================================== *//* $Id: blast_util.c,v 1000.4 2004/06/01 18:07:56 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Ilya Dondoshansky * *//** @file blast_util.c * Various BLAST utilities */static char const rcsid[] = "$Id: blast_util.c,v 1000.4 2004/06/01 18:07:56 gouriano Exp $";#include <algo/blast/core/blast_def.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/blast_encoding.h>#include <algo/blast/core/blast_filter.h>Int2BlastSetUp_SeqBlkNew (const Uint1* buffer, Int4 length, Int4 context, BLAST_SequenceBlk* *seq_blk, Boolean buffer_allocated){ /* Check if BLAST_SequenceBlk itself needs to be allocated here or not */ if (*seq_blk == NULL) { *seq_blk = calloc(1, sizeof(BLAST_SequenceBlk)); } if (buffer_allocated) { (*seq_blk)->sequence_start_allocated = TRUE; (*seq_blk)->sequence_start = (Uint1 *) buffer; /* The first byte is a sentinel byte. */ (*seq_blk)->sequence = (*seq_blk)->sequence_start+1; } else { (*seq_blk)->sequence = (Uint1 *) buffer; (*seq_blk)->sequence_start = NULL; } (*seq_blk)->length = length; (*seq_blk)->context = context; return 0;}Int2 BlastSeqBlkNew(BLAST_SequenceBlk** retval){ if ( !retval ) { return -1; } else { *retval = (BLAST_SequenceBlk*) calloc(1, sizeof(BLAST_SequenceBlk)); if ( !*retval ) { return -1; } } return 0;}Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk* seq_blk, const Uint1* sequence, Int4 seqlen){ if ( !seq_blk ) { return -1; } seq_blk->sequence_start_allocated = TRUE; seq_blk->sequence_start = (Uint1*) sequence; seq_blk->sequence = (Uint1*) sequence + 1; seq_blk->length = seqlen; seq_blk->oof_sequence = NULL; return 0;}Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk* seq_blk, const Uint1* sequence){ if ( !seq_blk ) { return -1; } seq_blk->sequence_allocated = TRUE; seq_blk->sequence = (Uint1*) sequence; seq_blk->oof_sequence = NULL; return 0;}#if 0/** Create the subject sequence block given an ordinal id in a database */voidMakeBlastSequenceBlk(ReadDBFILEPtr db, BLAST_SequenceBlk** seq_blk, Int4 oid, Uint1 encoding){ Int4 length, buf_len = 0; Uint1* buffer = NULL; if (encoding == BLASTNA_ENCODING) { length = readdb_get_sequence_ex(db, oid, &buffer, &buf_len, TRUE); } else if (encoding == NCBI4NA_ENCODING) { length = readdb_get_sequence_ex(db, oid, &buffer, &buf_len, FALSE); } else { length=readdb_get_sequence(db, oid, &buffer); } BlastSetUp_SeqBlkNew(buffer, length, 0, seq_blk, (encoding != BLASTP_ENCODING)); (*seq_blk)->oid = oid;}#endifInt2 BlastSequenceBlkClean(BLAST_SequenceBlk* seq_blk){ if (!seq_blk) return 1; if (seq_blk->sequence_allocated) sfree(seq_blk->sequence); if (seq_blk->sequence_start_allocated) sfree(seq_blk->sequence_start); if (seq_blk->oof_sequence_allocated) sfree(seq_blk->oof_sequence); return 0;}BLAST_SequenceBlk* BlastSequenceBlkFree(BLAST_SequenceBlk* seq_blk){ if (!seq_blk) return NULL; BlastSequenceBlkClean(seq_blk); if (seq_blk->lcase_mask_allocated) BlastMaskLocFree(seq_blk->lcase_mask); sfree(seq_blk); return NULL;}void BlastSequenceBlkCopy(BLAST_SequenceBlk** copy, BLAST_SequenceBlk* src) { ASSERT(copy); ASSERT(src); if (*copy) memcpy(*copy, src, sizeof(BLAST_SequenceBlk)); else *copy = BlastMemDup(src, sizeof(BLAST_SequenceBlk)); (*copy)->sequence_allocated = FALSE; (*copy)->sequence_start_allocated = FALSE; (*copy)->oof_sequence_allocated = FALSE; (*copy)->lcase_mask_allocated = FALSE;}Int2 BlastProgram2Number(const char *program, Uint1 *number){ *number = blast_type_undefined; if (program == NULL) return 1; if (strcasecmp("blastn", program) == 0) *number = blast_type_blastn; else if (strcasecmp("blastp", program) == 0) *number = blast_type_blastp; else if (strcasecmp("blastx", program) == 0) *number = blast_type_blastx; else if (strcasecmp("tblastn", program) == 0) *number = blast_type_tblastn; else if (strcasecmp("tblastx", program) == 0) *number = blast_type_tblastx; else if (strcasecmp("rpsblast", program) == 0) *number = blast_type_rpsblast; else if (strcasecmp("rpstblastn", program) == 0) *number = blast_type_rpstblastn; return 0;}Int2 BlastNumber2Program(Uint1 number, char* *program){ if (program == NULL) return 1; switch (number) { case blast_type_blastn: *program = strdup("blastn"); break; case blast_type_blastp: *program = strdup("blastp"); break; case blast_type_blastx: *program = strdup("blastx"); break; case blast_type_tblastn: *program = strdup("tblastn"); break; case blast_type_tblastx: *program = strdup("tblastx"); break; case blast_type_rpsblast: *program = strdup("rpsblast"); break; case blast_type_rpstblastn: *program = strdup("rpstblastn"); break; default: *program = strdup("unknown"); break; } return 0;}#define X_STDAA 21/** Translate 3 nucleotides into an amino acid * MUST have 'X' as unknown amino acid * @param codon 3 values in ncbi4na code * @param codes Geneic code string to use (must be in ncbistdaa encoding!) * @return Amino acid in ncbistdaa */static Uint1 CodonToAA (Uint1* codon, const Uint1* codes){ register Uint1 aa = 0, taa; register int i, j, k, index0, index1, index2; static Uint1 mapping[4] = { 8, /* T in ncbi4na */ 2, /* C */ 1, /* A */ 4 }; /* G */ for (i = 0; i < 4; i++) { if (codon[0] & mapping[i]) { index0 = i * 16; for (j = 0; j < 4; j++) { if (codon[1] & mapping[j]) { index1 = index0 + (j * 4); for (k = 0; k < 4; k++) { if (codon[2] & mapping[k]) { index2 = index1 + k; taa = codes[index2]; if (! aa) aa = taa; else { if (taa != aa) { aa = X_STDAA; break; } } } if (aa == X_STDAA) break; } } if (aa == X_STDAA) break; } } if (aa == X_STDAA) break; } return aa;}Int4BLAST_GetTranslation(const Uint1* query_seq, const Uint1* query_seq_rev, Int4 nt_length, Int2 frame, Uint1* prot_seq, const Uint1* genetic_code){ Uint1 codon[CODON_LENGTH]; Int4 index, index_prot; Uint1 residue; Uint1* nucl_seq; nucl_seq = (frame >= 0 ? (Uint1 *)query_seq : (Uint1 *)(query_seq_rev+1)); /* The first character in the protein is the NULLB sentinel. */ prot_seq[0] = NULLB; index_prot = 1; for (index=ABS(frame)-1; index<nt_length-2; index += CODON_LENGTH) { codon[0] = nucl_seq[index]; codon[1] = nucl_seq[index+1]; codon[2] = nucl_seq[index+2]; residue = CodonToAA(codon, genetic_code); if (IS_residue(residue)) { prot_seq[index_prot] = residue; index_prot++; } } prot_seq[index_prot] = NULLB; return index_prot - 1;}/* Translate a compressed nucleotide sequence without ambiguity codes.*/Int4BLAST_TranslateCompressedSequence(Uint1* translation, Int4 length, const Uint1* nt_seq, Int2 frame, Uint1* prot_seq){ int state; Int2 total_remainder; Int4 prot_length; int byte_value, codon=-1; Uint1 last_remainder, last_byte, remainder; Uint1* nt_seq_end,* nt_seq_start;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -