📄 sequence.c
字号:
/*============================================================================*/void sequence_d_print(FILE *file, sequence_d_t *sqd, int discrete) { int i, j; fprintf(file, "SEQD = {\n\tO = {\n"); for (i = 0; i < sqd->seq_number; i++) { if (sqd->seq_id[i] != -1.0) fprintf(file, "\t(%10.0f)", sqd->seq_id[i]); if (sqd->seq_label[i] != -1) fprintf(file, "\t<%ld>", sqd->seq_label[i]); if (sqd->seq_w[i] != 1) fprintf(file, "\t|%.0f|", sqd->seq_w[i]); fprintf(file,"\t"); if (sqd->seq_len[i] > 0) { if (discrete) fprintf(file, "%3.0f", sqd->seq[i][0]); else { if (sqd->seq[i][0] > 500) fprintf(file, "%8.0f", sqd->seq[i][0]); else fprintf(file, "%8.2f", sqd->seq[i][0]); } for (j = 1; j < sqd->seq_len[i]; j++) { if (discrete) fprintf(file, ", %3.0f", sqd->seq[i][j]); else { if (sqd->seq[i][j] > 500) fprintf(file, ", %8.0f", sqd->seq[i][j]); else fprintf(file, ", %8.2f", sqd->seq[i][j]); } } } fprintf(file, ";\n"); } fprintf(file, "\t};\n};\n\n");} /* sequence_print *//*============================================================================*/void sequence_d_mathematica_print(FILE *file, sequence_d_t *sqd, char *name) { int i; fprintf(file, "%s = {\n", name); for (i = 0; i < sqd->seq_number - 1; i++) vector_d_print(file, sqd->seq[i], sqd->seq_len[i], "{", ",", "},"); /* no comma after last seq. */ vector_d_print(file, sqd->seq[sqd->seq_number - 1], sqd->seq_len[sqd->seq_number - 1], "{", ",", "}"); fprintf(file, "};\n");} /* sequence_d_mathematica_print *//*============================================================================*/void sequence_clean(sequence_t *sq) { /* keep data, only delete references */ m_free(sq->seq); m_free(sq->seq_len); m_free(sq->seq_label); m_free(sq->seq_id); m_free(sq->seq_w); m_free(sq->states); sq->seq_number = 0; sq->total_w = 0.0;} /* sequence_clean *//*============================================================================*/void sequence_d_clean(sequence_d_t *sqd) { /* keep data, only delete references */ m_free(sqd->seq); m_free(sqd->seq_len); m_free(sqd->seq_label); m_free(sqd->seq_id); m_free(sqd->seq_w); sqd->seq_number = 0; sqd->total_w = 0.0;} /* sequence_d_clean *//*============================================================================*/int sequence_free(sequence_t **sq) {# define CUR_PROC "sequence_free" mes_check_ptr(sq, return(-1)); if( !*sq ) return(0); matrix_i_free(&(*sq)->seq, (*sq)->seq_number); /* The allocation of state must be fixed */ /*** Added attribute to the sequence_t if (&(*sq)->states) { matrix_i_free(&(*sq)->states, (*sq)->seq_number); } ***/ m_free((*sq)->seq); m_free((*sq)->seq_len); m_free((*sq)->seq_label); m_free((*sq)->seq_id); m_free((*sq)->seq_w); m_free((*sq)->states); m_free(*sq); return 0;# undef CUR_PROC} /* sequence_free *//*============================================================================*/int sequence_d_free(sequence_d_t **sqd) {# define CUR_PROC "sequence_d_free" mes_check_ptr(sqd, return(-1)); if( !*sqd ) return(0); // sequence_d_print(stdout,*sqd,0); matrix_d_free(&(*sqd)->seq, (*sqd)->seq_number); m_free((*sqd)->seq_len); m_free((*sqd)->seq_label); m_free((*sqd)->seq_id); m_free((*sqd)->seq_w); m_free(*sqd); return 0;# undef CUR_PROC} /* sequence_d_free *//*============================================================================*/sequence_d_t *sequence_d_create_from_sq(const sequence_t *sq) {# define CUR_PROC "sequence_d_create_from_sq" int j, i; sequence_d_t *sqd = NULL; /* target seq. array */ if (!(sqd = sequence_d_calloc(sq->seq_number))) { mes_proc(); goto STOP; } for (j = 0; j < sq->seq_number; j++) { if (!m_calloc(sqd->seq[j], sq->seq_len[j])) { mes_proc(); goto STOP; } for (i = 0; i < sq->seq_len[j]; i++) sqd->seq[j][i] = (double)(sq->seq[j][i]); sqd->seq_len[j] = sq->seq_len[j]; sqd->seq_label[j] = sq->seq_label[j]; sqd->seq_id[j] = sq->seq_id[j]; sqd->seq_w[j] = sq->seq_w[j]; } sqd->seq_number = sq->seq_number; sqd->total_w = sq->total_w; return(sqd);STOP: sequence_d_free(&sqd); return NULL;#undef CUR_PROC} /* sequence_d_create_from_sq */ /*============================================================================*/sequence_t *sequence_create_from_sqd(const sequence_d_t *sqd) {# define CUR_PROC "sequence_create_from_sqd" int j, i; sequence_t *sq = NULL; /* target seq. array */ if (!(sq = sequence_calloc(sqd->seq_number))) { mes_proc(); goto STOP; } for (j = 0; j < sqd->seq_number; j++) { if (!m_calloc(sq->seq[j], sqd->seq_len[j])) { mes_proc(); goto STOP; } for (i = 0; i < sqd->seq_len[j]; i++) { sq->seq[j][i] = m_int(fabs(sqd->seq[j][i])); } sq->seq_len[j] = sqd->seq_len[j]; sq->seq_label[j] = sqd->seq_label[j]; sq->seq_id[j] = sqd->seq_id[j]; sq->seq_w[j] = sqd->seq_w[j]; } sq->seq_number = sqd->seq_number; sq->total_w = sqd->total_w; return(sq);STOP: sequence_free(&sq); return NULL;#undef CUR_PROC} /* sequence_create_from_sqd */ /*============================================================================*/int sequence_max_len(const sequence_t *sqd) { int i, max_len = 0; for (i = 0; i < sqd->seq_number; i++) if (max_len < sqd->seq_len[i]) max_len = sqd->seq_len[i]; return max_len;} /* sequence_max_len *//*============================================================================*/int sequence_d_max_len(const sequence_d_t *sqd) { int i, max_len = 0; for (i = 0; i < sqd->seq_number; i++) if (max_len < sqd->seq_len[i]) max_len = sqd->seq_len[i]; return max_len;} /* sequence_d_max_len *//*============================================================================*/sequence_d_t *sequence_d_mean(const sequence_d_t *sqd) {# define CUR_PROC "sequence_d_mean" int i, j, max_len; sequence_d_t *out_sqd = NULL; max_len = sequence_d_max_len(sqd); if (!(out_sqd = sequence_d_calloc(1))) { mes_proc(); goto STOP; } if (!m_calloc(out_sqd->seq[0], max_len)) { mes_proc(); goto STOP; } out_sqd->seq_len[0] = max_len; for (i = 0; i < sqd->seq_number; i++) for (j = 0; j < sqd->seq_len[i]; j++) out_sqd->seq[0][j] += sqd->seq[i][j]; for (j = 0; j < max_len; j++) out_sqd->seq[0][j] /= sqd->seq_number; return out_sqd;STOP: sequence_d_free(&out_sqd); return NULL;# undef CUR_PROC} /* sequence_d_mean *//*============================================================================*/double **sequence_d_scatter_matrix(const sequence_d_t *sqd, int *dim) {# define CUR_PROC "sequence_d_scatter_matrix" int *count, k,l,i,j; double **W, *mean; *dim = sequence_d_max_len(sqd); if (!(W = matrix_d_alloc(*dim, *dim))) {mes_proc(); goto STOP;} /* Mean over all sequences. Individual counts for each dimension */ if (!m_calloc(mean, *dim)) { mes_proc(); goto STOP; } if (!m_calloc(count, *dim)) { mes_proc(); goto STOP; } for (i = 0; i < sqd->seq_number; i++) { for (l = 0; l < sqd->seq_len[i]; l++) { mean[l] += sqd->seq[i][l]; count[l]++; } } for (l = 0; l < *dim; l++) mean[l] /= count[l]; /* scatter matrix (upper triangle) */ for (j = 0; j < sqd->seq_number; j++) { for (k = 0; k < *dim; k++) { for (l = k; l < *dim; l++) { if (sqd->seq_len[j] > l) W[k][l] += (sqd->seq[j][k] - mean[k])*(sqd->seq[j][l] - mean[l]); } } } /* norm with counts, set lower triangle */ for (k = 0; k < *dim; k++) { for (l = *dim-1; l >= 0; l--) { if (l >= k) W[k][l] /= (double)count[l]; else W[k][l] = W[l][k]; } } return W;STOP: matrix_d_free(&W, *dim); return NULL;# undef CUR_PROC} /* sequence_d_scatter_matrix *//*============================================================================*//* dummy function at the moment */int sequence_d_class(const double *O, int index, double *osum) {#define CUR_PROC "sequence_d_class" return 0; # undef CUR_PROC} /* sequence_d_class *//*============================================================================*//* divide given field of seqs. randomly into two different fields. Also do allocating. train_ratio determines approx. the fraction of seqs. that go into the train_set and test_set resp.*/int sequence_d_partition(sequence_d_t *sqd, sequence_d_t * sqd_train, sequence_d_t *sqd_test, double train_ratio) {#define CUR_PROC "sequence_d_partition" double p; sequence_d_t *sqd_dummy = NULL; int i; long total_seqs, cur_number; total_seqs = sqd->seq_number; if (total_seqs <= 0) { mes_prot("Error: number of seqs. less or equal zero\n"); goto STOP; } /* waste of memory but avoids to many reallocations */ sqd_dummy = sqd_train; for (i = 0; i < 2; i++) { if(!m_calloc(sqd_dummy->seq, total_seqs)) {mes_proc(); goto STOP;} if(!m_calloc(sqd_dummy->seq_len, total_seqs)) {mes_proc(); goto STOP;} if(!m_calloc(sqd_dummy->seq_label, total_seqs)) {mes_proc(); goto STOP;} if(!m_calloc(sqd_dummy->seq_id, total_seqs)) {mes_proc(); goto STOP;} if(!m_calloc(sqd_dummy->seq_w, total_seqs)) {mes_proc(); goto STOP;} sqd_dummy->seq_number = 0; sqd_dummy = sqd_test; } for (i = 0; i < total_seqs; i++) { p = gsl_rng_uniform(RNG); if (p <= train_ratio) sqd_dummy = sqd_train; else sqd_dummy = sqd_test; cur_number = sqd_dummy->seq_number; if(!m_calloc(sqd_dummy->seq[cur_number], sqd->seq_len[i])) {mes_proc(); goto STOP;} /* copy all entries */ sequence_d_copy_all(sqd_dummy, cur_number, sqd, i); sqd_dummy->seq_number++; } /* reallocs*/ sqd_dummy = sqd_train; for (i = 0; i < 2; i++) { if(m_realloc(sqd_dummy->seq, sqd_dummy->seq_number)) {mes_proc(); goto STOP;} if(m_realloc(sqd_dummy->seq_len, sqd_dummy->seq_number)) {mes_proc(); goto STOP;} if(m_realloc(sqd_dummy->seq_label, sqd_dummy->seq_number)) {mes_proc(); goto STOP;} if(m_realloc(sqd_dummy->seq_id, sqd_dummy->seq_number)) {mes_proc(); goto STOP;} if(m_realloc(sqd_dummy->seq_w, sqd_dummy->seq_number)) {mes_proc(); goto STOP;} sqd_dummy = sqd_test; } return 0; STOP: return -1;#undef CUR_PROC}/*============================================================================*/void sequence_d_copy_all(sequence_d_t *target, long t_num, sequence_d_t *source, long s_num) { sequence_d_copy(target->seq[t_num], source->seq[s_num], source->seq_len[s_num]); target->seq_len[t_num] = source->seq_len[s_num]; target->seq_label[t_num] = source->seq_label[s_num]; target->seq_id[t_num] = source->seq_id[s_num]; target->seq_w[t_num] = source->seq_w[s_num];}/*============================================================================*//* Likelihood function in a mixture model: sum_k w^k log( sum_c (alpha_c p(O^k | lambda_c)))*/int sequence_d_mix_like(smodel **smo, int smo_number, sequence_d_t *sqd, double *like) {#define CUR_PROC "sequence_d_mix_like" int i, k, error_seqs = 0; double seq_like = 0.0, log_p; *like = 0.0; for (i = 0; i < sqd->seq_number; i++) { seq_like = 0.0; for (k = 0; k < smo_number; k++) { if (sfoba_logp(smo[k], sqd->seq[i], sqd->seq_len[i], &log_p) != -1) { if (log_p > -100) seq_like += exp(log_p) * smo[k]->prior; } } /* no model fits */ if (seq_like == 0.0) { error_seqs++; *like += (PENALTY_LOGP * sqd->seq_w[i]); } else *like += (log(seq_like) * sqd->seq_w[i]); } return error_seqs;#undef CUR_PROC} /* sequence_d_mix_like */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -