📄 ncbl2_mlib.c
字号:
else { if (acc[0] != '\0') sprintf(descr,"%s ",acc); else descr[0] = '\0'; if (name[0] != '\0' && strcmp(name,"BL_ORD_ID")!=0) sprintf(descr+strlen(descr),"%s ", name); } if (my_db == 0 || m_fd->pref_db < 0) { if (!have_descr) { strncpy(str,descr,cnt-1); have_descr = 1; } else { strncat(str,"\001",cnt-strlen(str)-1); strncat(str,descr,cnt-strlen(str)-1); } strncat(str,title,cnt-strlen(str)-1); if (strlen(str) >= cnt-1) break; } else if (m_fd->pref_db == my_db) { have_descr = 1; strncpy(str,descr,cnt-1); strncat(str,title,cnt-strlen(str)-1); break; } } while (abp); if (!have_descr) { strncpy(str,descr,cnt-1); strncat(str,descr,cnt-strlen(str)-1); } if (have_my_buff) free(my_buff); } str[cnt-1]='\0'; bp = str; while((bp=strchr(bp,'\001'))!=NULL) {*bp++=' ';} if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[libpos],0); m_fd->lpos = lib_cnt; m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];}unsigned int bl2_uint4_cvt(unsigned int val){ unsigned int res;#ifdef IS_BIG_ENDIAN return val;#else /* it better be LITTLE_ENDIAN */ res = ((val&255)*256)+ ((val>>8)&255); res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255); return res;#endif} unsigned int bl2_long4_cvt(long val){ int val4; unsigned int res;#ifdef IS_BIG_ENDIAN val4 = val; return val4;#else /* it better be LITTLE_ENDIAN */ res = ((val&255)*256)+ ((val>>8)&255); res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255); return res;#endif} int64_t bl2_long8_cvt(int64_t val){ int64_t res;#ifdef IS_BIG_ENDIAN return val;#else /* it better be LITTLE_ENDIAN */ res = ((val&255)*256)+ ((val>>8)&255); res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);#ifdef BIG_LIB64 res = (res<<16) + (((val>>32)&255)*256) + ((val>>40)&255); res = (res<<16) + (((val>>48)&255)*256) + ((val>>56)&255);#else fprintf(stderr,"Cannot use bl2_long8_cvt without 64-bit longs\n"); exit(1);#endif return res;#endif} void src_int4_read(FILE *fd, int *val){#ifdef IS_BIG_ENDIAN fread((char *)val,(size_t)4,(size_t)1,fd);#else unsigned char b[4]; fread((char *)&b[0],(size_t)1,(size_t)4,fd); *val = 0; *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8) +(int)b[3];#endif}void src_long4_read(FILE *fd, long *valp){ int val4;#ifdef IS_BIG_ENDIAN fread(&val4,(size_t)4,(size_t)1,fd); *valp = val4;#else unsigned char b[4]; fread((char *)&b[0],(size_t)1,(size_t)4,fd); val4 = 0; val4 = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8) +(int)b[3]; *valp = val4;#endif}void src_uint4_read(FILE *fd, unsigned int *valp){#ifdef IS_BIG_ENDIAN fread(valp,(size_t)4,(size_t)1,fd);#else unsigned char b[4]; fread((char *)&b[0],(size_t)1,(size_t)4,fd); *valp = 0; *valp = (unsigned int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8) +(int)b[3];#endif}voidsrc_long8_read(FILE *fd, int64_t *val){#ifdef IS_BIG_ENDIAN fread((void *)val,(size_t)8,(size_t)1,fd);#else unsigned char b[8]; fread((char *)&b[0],(size_t)1,(size_t)8,fd); *val = 0; *val = (long)((((((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8) +(long)b[3]<<8)+(long)b[4]<<8)+(long)b[5]<<8) +(long)b[6]<<8)+(long)b[7];#endif}void ncbi_long8_read(FILE *fd, int64_t *val){ unsigned char b[8]; fread((char *)&b[0],(size_t)1,(size_t)8,fd); *val = 0; *val = (long)((((((long)((long)(b[7]<<8)+(long)b[6]<<8)+(long)b[5]<<8) +(long)b[4]<<8)+(long)b[3]<<8)+(long)b[2]<<8) +(long)b[1]<<8)+(long)b[0];}void src_char_read(FILE *fd, char *val){ fread(val,(size_t)1,(size_t)1,fd);}void src_fstr_read(FILE *fd, char *val, int slen){ fread(val,(size_t)slen,(size_t)1,fd);}voidnewname(char *nname, char *oname, char *suff, int maxn){ strncpy(nname,oname,maxn-1); strncat(nname,".",1); strncat(nname,suff,maxn-strlen(nname));}/* these asn functions largely duplicate the pssm_asn_subs.c functions, except that these functions assume that the entire ASN.1 entity is already in memory *//* these versions also use the old strategy of jumping over trailing NULLs from within the function - which is dangerous and has been abandoned */#define ASN_SEQ 0x30#define ASN_IS_BOOL 1#define ASN_IS_INT 2#define ASN_IS_STR 26unsigned char *get_asn_int(unsigned char *abp, int *val) { int v_len, v; v = 0; if (*abp++ != ASN_IS_INT) { /* check for int */ fprintf(stderr," int missing\n"); } else { v_len = *abp++; while (v_len-- > 0) { v *= 256; v += *abp++; } } *val = v; return abp;}unsigned char *get_asn_text(unsigned char *abp, char *text, int t_len) { int tch, at_len; text[0] = '\0'; if (*abp++ != ASN_IS_STR) { /* check for str */ fprintf(stderr," str missing\n"); } else { if ((tch = *abp++) > 128) { /* string length is in next bytes */ tch &= 0x7f; /* get number of bytes for len */ at_len = 0; while (tch-- > 0) { at_len = (at_len << 8) + *abp++;} } else { at_len = tch; } if ( at_len < t_len-1) { memcpy(text, abp, at_len); text[at_len] = '\0'; } else { memcpy(text, abp, t_len-1); text[t_len-1] = '\0'; } abp += at_len + 2; } return abp;}/* something to try to skip over stuff we don't want */unsigned char *get_asn_junk(unsigned char *abp) { int seq_cnt = 0; int tmp; char string[256]; while (*abp) { if ( *abp == ASN_SEQ) { abp += 2; seq_cnt++;} else if ( *abp == ASN_IS_BOOL ) {abp = get_asn_int(abp, &tmp)+2;} else if ( *abp == ASN_IS_INT ) {abp = get_asn_int(abp, &tmp)+2;} else if ( *abp == ASN_IS_STR ) {abp = get_asn_text(abp, string, sizeof(string)-1);} } while (seq_cnt-- > 0) abp += 2; return abp;}unsigned char *get_asn_textseq_id(unsigned char *abp, char *name, char *acc){ char release[20], ver_str[10]; int version; int seqcnt = 0; ver_str[0]='\0'; if (*abp == ASN_SEQ) { abp += 2; seqcnt++;} while (*abp) { switch (*abp) { case 0xa0: abp = get_asn_text(abp+2, name, 20); break; case 0xa1: abp = get_asn_text(abp+2, acc, 20); break; case 0xa2: abp = get_asn_text(abp+2, release, sizeof(release)); break; case 0xa3: abp = get_asn_int(abp+2, &version)+2; sprintf(ver_str,".%d",version); break; default: abp += 2; } } while (seqcnt-- > 0) abp += 4; strncat(acc,ver_str,20-strlen(acc)); acc[19]='\0'; return abp; /* skip 2 NULL's */}#define ASN_OBJID_INT 0xa0#define ASN_OBJID_STR 0xa1unsigned char *get_asn_object_id(unsigned char *abp, char *acc, int *iacc){ int seqcnt = 0; *iacc = 0; acc[0] = '\0'; if (*abp == ASN_SEQ) { abp += 2; seqcnt++;} if (*abp == ASN_OBJID_INT) { abp = get_asn_int(abp+2,iacc); } else if (*abp == ASN_OBJID_STR ) { abp = get_asn_text(abp+2, acc, 20); acc[19]='\0'; } else { fprintf(stderr, " object_id not str/int %d",*abp); return abp; } abp += 2; while (seqcnt-- > 0) abp += 2; return abp; /* skip 2 NULL's */}unsigned char *get_asn_dbtag(unsigned char *abp, char *name, char *str, int *id_p) { if (*abp == ASN_SEQ) { abp += 2;} if (*abp == 0xa0) { /* get db */ abp = get_asn_text(abp+2, name, 20); } else { fprintf(stderr," missing dbtag:db %d %d\n",abp[0],abp[1]); abp += 2; } if (*abp == 0xa1) { /* get tag */ abp += 2; abp += 2; /* skip over id */ if (*abp == 2) abp = get_asn_int(abp,id_p)+2; else abp = get_asn_text(abp+2, str, 20); } else { fprintf(stderr," missing dbtag:tag %2x %2x\n",abp[0],abp[1]); abp += 2; } return abp+2; /* skip 2 NULL's */}unsigned char *get_asn_pdb_id(unsigned char *abp, char *acc, char *chain){ int ichain, seq_cnt=0; if (*abp == ASN_SEQ) { abp += 2; seq_cnt++;} while (*abp) { switch (*abp) { case 0: abp += 2; break; case 0xa0: /* mol-id */ abp = get_asn_text(abp+2, acc, 20); break; case 0xa1: abp = get_asn_int(abp+2, &ichain)+2; chain[0] = ichain; chain[1] = '\0'; break; case 0xa2: /* ignore date - scan until NULL's */ while (*abp++) {} abp += 2; /* skip the NULL's */ break; default: abp+=2; } } while (seq_cnt-- > 0) {abp += 4;} return abp;}#define ASN_TYPE_MASK 31unsigned char*get_asn_seqid(unsigned char *abp, int *gi_p, int *db, char *acc, char *name) { int db_type, itmp, seq_cnt=0, iacc; *gi_p = 0; if (*abp != ASN_SEQ) { fprintf(stderr, "seqid - missing SEQ 1: %2x %2x\n",abp[0], abp[1]); return abp; } else { abp += 2; seq_cnt++;} db_type = (*abp & ASN_TYPE_MASK); if (db_type == 11) { /* gi */ abp = get_asn_int(abp+2,gi_p)+2; } while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;} db_type = (*abp & ASN_TYPE_MASK); if (db_type > 17) {db_type = 0;} *db = db_type; switch(db_type) { case 0: abp = get_asn_object_id(abp+2, acc, &iacc); abp += 2; break; case 1: case 2: abp = get_asn_int(abp+2,&itmp)+2; break; case 11: abp = get_asn_int(abp+2,&itmp)+2; break; case 4: case 5: case 6: case 7: case 9: case 12: case 13: case 15: case 16: case 17: abp = get_asn_textseq_id(abp+2,name,acc); break; case 10: abp = get_asn_dbtag(abp+2,name,acc,&itmp); case 14: abp = get_asn_pdb_id(abp+2,acc,name); break; default: abp += 2; } while (seq_cnt-- > 0) { abp += 2;} return abp; /* skip over 2 NULL's */}/* -- from ncbi/api/fastadl.asn --Blast-def-line ::= SEQUENCE { title VisibleString OPTIONAL, -- simple title seqid SEQUENCE OF Seq-id, -- Regular NCBI Seq-Id taxid INTEGER OPTIONAL, -- taxonomy id memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays links SEQUENCE OF INTEGER OPTIONAL, -- bit arrays other-info SEQUENCE OF INTEGER OPTIONAL -- for future use (probably genomic sequences)} -- from ncbi/asn/seq_loc_mod.asn ----*** Sequence identifiers ********************************--*Seq-id ::= CHOICE { local Object-id , -- local use gibbsq INTEGER , -- Geninfo backbone seqid gibbmt INTEGER , -- Geninfo backbone moltype giim Giimport-id , -- Geninfo import id genbank Textseq-id , embl Textseq-id , pir Textseq-id , swissprot Textseq-id , patent Patent-seq-id , other Textseq-id , -- catch all general Dbtag , -- for other databases gi INTEGER , -- GenInfo Integrated Database ddbj Textseq-id , -- DDBJ prf Textseq-id , -- PRF SEQDB pdb PDB-seq-id , -- PDB sequence tpg Textseq-id , -- Third Party Annot/Seq Genbank tpe Textseq-id , -- Third Party Annot/Seq EMBL tpd Textseq-id , -- Third Party Annot/Seq DDBJ gpipe Textseq-id -- Internal NCBI genome pipeline processing ID}*/#define ASN_FADL_TITLE 0xa0#define ASN_FADL_SEQID 0xa1#define ASN_FADL_TAXID 0xa2#define ASN_FADL_MEMBERS 0xa3#define ASN_FADL_LINKS 0xa4#define ASN_FADL_OTHER 0xa5unsigned char *parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max, int *gi_p, int *db, char *acc, char *name, char *title, int t_len, int *taxid_p) { unsigned char *abp; char tmp_db[4], tmp_acc[32], tmp_name[32]; int this_db; int seq_cnt = 0; int tmp_gi; acc[0] = name[0] = db[0] = title[0] = '\0'; abp = asn_buff; while ( abp < asn_max && *abp) { if (*abp == ASN_SEQ) { abp += 2; seq_cnt++; } else if (*abp == ASN_FADL_TITLE) { abp = get_asn_text(abp+2, title, t_len); } else if (*abp == ASN_FADL_SEQID ) { abp = get_asn_seqid(abp+2, gi_p, db, acc, name); if (*db > 17) *db = 0; } else if (*abp == ASN_FADL_TAXID ) { abp = get_asn_int(abp+2, taxid_p); abp += 2; } else if (*abp == ASN_FADL_MEMBERS) { abp = get_asn_junk(abp+2); abp += 2; } else if (*abp == ASN_FADL_LINKS ) { abp = get_asn_junk(abp+2); abp += 2; } else if (*abp == ASN_FADL_OTHER ) { abp = get_asn_junk(abp+2); abp += 2; } else { /* fprintf(stderr, " Error - missing ASN.1 %2x:%2x:%2x:%2x\n", abp[-2],abp[-1],abp[0],abp[1]); */ abp += 2; } } while (abp < asn_max && *abp == '\0' ) abp++; if (abp >= asn_max) return NULL; else return abp;}voidparse_pal(char *dname, char *msk_name, int *oid_seqs, int *max_oid, FILE *fd) { char line[MAX_STR]; while (fgets(line,sizeof(line),fd)) { if (line[0] == '#') continue; if (strncmp(line, "DBLIST", 6)==0) { sscanf(line+7,"%s",dname); } else if (strncmp(line, "OIDLIST", 7)==0) { sscanf(line+8,"%s",msk_name); } else if (strncmp(line, "NSEQ", 4)==0) { sscanf(line+5,"%d",oid_seqs); } else if (strncmp(line, "MAXOID", 6)==0) { sscanf(line+7,"%d",max_oid); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -