⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sequence.c

📁 一个通用的隐性马尔可夫C代码库 开发环境:C语言 简要说明:这是一个通用的隐性马尔可夫C代码库
💻 C
📖 第 1 页 / 共 3 页
字号:
/*============================================================================*/void sequence_d_print(FILE *file, sequence_d_t *sqd, int discrete) {  int i, j;  fprintf(file, "SEQD = {\n\tO = {\n");  for (i = 0; i < sqd->seq_number; i++) {    if (sqd->seq_id[i] != -1.0)      fprintf(file, "\t(%10.0f)", sqd->seq_id[i]);    if (sqd->seq_label[i] != -1)      fprintf(file, "\t<%ld>", sqd->seq_label[i]);    if (sqd->seq_w[i] != 1)      fprintf(file, "\t|%.0f|", sqd->seq_w[i]);    fprintf(file,"\t");    if (sqd->seq_len[i] > 0) {      if (discrete)	fprintf(file, "%3.0f", sqd->seq[i][0]);      else {	if (sqd->seq[i][0] > 500) 	  fprintf(file, "%8.0f", sqd->seq[i][0]);	else	  fprintf(file, "%8.2f", sqd->seq[i][0]);      }      for (j = 1; j < sqd->seq_len[i]; j++) {	if (discrete)	  fprintf(file, ", %3.0f", sqd->seq[i][j]);	else {	  if (sqd->seq[i][j] > 500) 	    fprintf(file, ", %8.0f", sqd->seq[i][j]);	  else	    fprintf(file, ", %8.2f", sqd->seq[i][j]);	}      }    }    fprintf(file, ";\n");  }  fprintf(file, "\t};\n};\n\n");} /* sequence_print *//*============================================================================*/void sequence_d_mathematica_print(FILE *file, sequence_d_t *sqd, char *name) {  int i;  fprintf(file, "%s = {\n", name);  for (i = 0; i < sqd->seq_number - 1; i++)      vector_d_print(file, sqd->seq[i], sqd->seq_len[i],       "{", ",", "},");  /* no comma after last seq. */  vector_d_print(file, sqd->seq[sqd->seq_number - 1], 		 sqd->seq_len[sqd->seq_number - 1], 		 "{", ",", "}");   fprintf(file, "};\n");} /* sequence_d_mathematica_print *//*============================================================================*/void sequence_clean(sequence_t *sq) {  /* keep data, only delete references */  m_free(sq->seq);   m_free(sq->seq_len);  m_free(sq->seq_label);  m_free(sq->seq_id);  m_free(sq->seq_w);  m_free(sq->states);  sq->seq_number = 0;  sq->total_w = 0.0;} /* sequence_clean *//*============================================================================*/void sequence_d_clean(sequence_d_t *sqd) {  /* keep data, only delete references */  m_free(sqd->seq);  m_free(sqd->seq_len);  m_free(sqd->seq_label);  m_free(sqd->seq_id);  m_free(sqd->seq_w);  sqd->seq_number = 0;  sqd->total_w = 0.0;} /* sequence_d_clean *//*============================================================================*/int sequence_free(sequence_t **sq) {# define CUR_PROC "sequence_free"  mes_check_ptr(sq, return(-1));  if( !*sq ) return(0);  matrix_i_free(&(*sq)->seq, (*sq)->seq_number);  /* The allocation of state must be fixed */  /*** Added attribute to the sequence_t  if (&(*sq)->states) {     matrix_i_free(&(*sq)->states, (*sq)->seq_number);   }  ***/  m_free((*sq)->seq);  m_free((*sq)->seq_len);  m_free((*sq)->seq_label);  m_free((*sq)->seq_id);  m_free((*sq)->seq_w);  m_free((*sq)->states);  m_free(*sq);  return 0;# undef CUR_PROC} /* sequence_free *//*============================================================================*/int sequence_d_free(sequence_d_t **sqd) {# define CUR_PROC "sequence_d_free"  mes_check_ptr(sqd, return(-1));  if( !*sqd ) return(0);    // sequence_d_print(stdout,*sqd,0);		    matrix_d_free(&(*sqd)->seq, (*sqd)->seq_number);  m_free((*sqd)->seq_len);  m_free((*sqd)->seq_label);  m_free((*sqd)->seq_id);  m_free((*sqd)->seq_w);  m_free(*sqd);  return 0;# undef CUR_PROC} /* sequence_d_free *//*============================================================================*/sequence_d_t *sequence_d_create_from_sq(const sequence_t *sq) {# define CUR_PROC "sequence_d_create_from_sq"  int j, i;  sequence_d_t *sqd = NULL; /* target seq. array */  if (!(sqd = sequence_d_calloc(sq->seq_number))) { mes_proc(); goto STOP; }  for (j = 0; j < sq->seq_number; j++) {    if (!m_calloc(sqd->seq[j], sq->seq_len[j])) { mes_proc(); goto STOP; }     for (i = 0; i < sq->seq_len[j]; i++)      sqd->seq[j][i] = (double)(sq->seq[j][i]);    sqd->seq_len[j] = sq->seq_len[j];    sqd->seq_label[j] = sq->seq_label[j];    sqd->seq_id[j] = sq->seq_id[j];    sqd->seq_w[j] = sq->seq_w[j];  }  sqd->seq_number = sq->seq_number;  sqd->total_w = sq->total_w;  return(sqd);STOP:  sequence_d_free(&sqd);  return NULL;#undef CUR_PROC} /* sequence_d_create_from_sq */ /*============================================================================*/sequence_t *sequence_create_from_sqd(const sequence_d_t *sqd) {# define CUR_PROC "sequence_create_from_sqd"  int j, i;  sequence_t *sq = NULL; /* target seq. array */  if (!(sq = sequence_calloc(sqd->seq_number))) { mes_proc(); goto STOP; }  for (j = 0; j < sqd->seq_number; j++) {    if (!m_calloc(sq->seq[j], sqd->seq_len[j])) { mes_proc(); goto STOP; }     for (i = 0; i < sqd->seq_len[j]; i++) {      sq->seq[j][i] = m_int(fabs(sqd->seq[j][i]));    }    sq->seq_len[j] = sqd->seq_len[j];    sq->seq_label[j] = sqd->seq_label[j];    sq->seq_id[j] = sqd->seq_id[j];    sq->seq_w[j] = sqd->seq_w[j];  }  sq->seq_number = sqd->seq_number;  sq->total_w = sqd->total_w;  return(sq);STOP:  sequence_free(&sq);  return NULL;#undef CUR_PROC} /* sequence_create_from_sqd */ /*============================================================================*/int sequence_max_len(const sequence_t *sqd) {  int i, max_len = 0;  for (i = 0; i < sqd->seq_number; i++)    if (max_len < sqd->seq_len[i])      max_len = sqd->seq_len[i];  return max_len;} /* sequence_max_len *//*============================================================================*/int sequence_d_max_len(const sequence_d_t *sqd) {  int i, max_len = 0;  for (i = 0; i < sqd->seq_number; i++)    if (max_len < sqd->seq_len[i])      max_len = sqd->seq_len[i];  return max_len;} /* sequence_d_max_len *//*============================================================================*/sequence_d_t *sequence_d_mean(const sequence_d_t *sqd) {# define CUR_PROC "sequence_d_mean"  int i, j, max_len;  sequence_d_t *out_sqd = NULL;  max_len = sequence_d_max_len(sqd);  if (!(out_sqd = sequence_d_calloc(1))) { mes_proc(); goto STOP; }  if (!m_calloc(out_sqd->seq[0], max_len)) { mes_proc(); goto STOP; }   out_sqd->seq_len[0] = max_len;    for (i = 0; i < sqd->seq_number; i++)     for (j = 0; j < sqd->seq_len[i]; j++)      out_sqd->seq[0][j] += sqd->seq[i][j];  for (j = 0; j < max_len; j++)    out_sqd->seq[0][j] /= sqd->seq_number;  return out_sqd;STOP:  sequence_d_free(&out_sqd);  return NULL;# undef CUR_PROC} /* sequence_d_mean *//*============================================================================*/double **sequence_d_scatter_matrix(const sequence_d_t *sqd, int *dim) {# define CUR_PROC "sequence_d_scatter_matrix"  int *count, k,l,i,j;  double **W, *mean;  *dim = sequence_d_max_len(sqd);  if (!(W = matrix_d_alloc(*dim, *dim))) {mes_proc(); goto STOP;}    /* Mean over all sequences. Individual counts for each dimension */  if (!m_calloc(mean, *dim)) { mes_proc(); goto STOP; }   if (!m_calloc(count, *dim)) { mes_proc(); goto STOP; }   for (i = 0; i < sqd->seq_number; i++) {    for (l = 0; l < sqd->seq_len[i]; l++) {      mean[l] += sqd->seq[i][l];      count[l]++;    }  }  for (l = 0; l < *dim; l++)    mean[l] /= count[l];  /* scatter matrix (upper triangle) */  for (j = 0; j < sqd->seq_number; j++) {    for (k = 0; k < *dim; k++) {      for (l = k; l < *dim; l++) { 	if (sqd->seq_len[j] > l) 	  W[k][l] += (sqd->seq[j][k] - mean[k])*(sqd->seq[j][l] - mean[l]);      }    }  }  /* norm with counts, set lower triangle */  for (k = 0; k < *dim; k++) {    for (l = *dim-1; l >= 0; l--) {       if (l >= k) W[k][l] /= (double)count[l];      else W[k][l] = W[l][k];    }  }  return W;STOP:  matrix_d_free(&W, *dim);  return NULL;# undef CUR_PROC} /* sequence_d_scatter_matrix *//*============================================================================*//* dummy function at the moment */int sequence_d_class(const double *O, int index, double *osum) {#define CUR_PROC "sequence_d_class"    return 0; # undef CUR_PROC} /* sequence_d_class *//*============================================================================*//* divide given field of seqs. randomly into two different fields. Also do    allocating. train_ratio determines approx. the fraction of seqs. that go   into the train_set and test_set resp.*/int sequence_d_partition(sequence_d_t *sqd, sequence_d_t * sqd_train, 			 sequence_d_t *sqd_test, double train_ratio) {#define CUR_PROC "sequence_d_partition"  double p;  sequence_d_t *sqd_dummy = NULL;  int i;  long total_seqs, cur_number;  total_seqs = sqd->seq_number;  if (total_seqs <= 0) {    mes_prot("Error: number of seqs. less or equal zero\n");    goto STOP;  }  /* waste of memory but avoids to many reallocations */  sqd_dummy = sqd_train;  for (i = 0; i < 2; i++) {    if(!m_calloc(sqd_dummy->seq, total_seqs)) {mes_proc(); goto STOP;}    if(!m_calloc(sqd_dummy->seq_len, total_seqs)) {mes_proc(); goto STOP;}    if(!m_calloc(sqd_dummy->seq_label, total_seqs)) {mes_proc(); goto STOP;}    if(!m_calloc(sqd_dummy->seq_id, total_seqs)) {mes_proc(); goto STOP;}    if(!m_calloc(sqd_dummy->seq_w, total_seqs)) {mes_proc(); goto STOP;}    sqd_dummy->seq_number = 0;    sqd_dummy = sqd_test;  }  for (i = 0; i < total_seqs; i++) {    p = gsl_rng_uniform(RNG);    if (p <= train_ratio)      sqd_dummy = sqd_train;    else      sqd_dummy = sqd_test;    cur_number = sqd_dummy->seq_number;    if(!m_calloc(sqd_dummy->seq[cur_number], sqd->seq_len[i]))       {mes_proc(); goto STOP;}    /* copy all entries */    sequence_d_copy_all(sqd_dummy, cur_number, sqd, i);    sqd_dummy->seq_number++;  }    /* reallocs*/  sqd_dummy = sqd_train;  for (i = 0; i < 2; i++) {    if(m_realloc(sqd_dummy->seq, sqd_dummy->seq_number)) {mes_proc(); goto STOP;}    if(m_realloc(sqd_dummy->seq_len, sqd_dummy->seq_number)) {mes_proc(); goto STOP;}    if(m_realloc(sqd_dummy->seq_label, sqd_dummy->seq_number)) {mes_proc(); goto STOP;}    if(m_realloc(sqd_dummy->seq_id, sqd_dummy->seq_number)) {mes_proc(); goto STOP;}    if(m_realloc(sqd_dummy->seq_w, sqd_dummy->seq_number)) {mes_proc(); goto STOP;}    sqd_dummy = sqd_test;  }  return 0; STOP:  return -1;#undef CUR_PROC}/*============================================================================*/void sequence_d_copy_all(sequence_d_t *target, long t_num, 			 sequence_d_t *source, long s_num) {    sequence_d_copy(target->seq[t_num], source->seq[s_num], source->seq_len[s_num]);      target->seq_len[t_num] = source->seq_len[s_num];    target->seq_label[t_num] = source->seq_label[s_num];    target->seq_id[t_num] = source->seq_id[s_num];    target->seq_w[t_num] = source->seq_w[s_num];}/*============================================================================*//* Likelihood function in a mixture model:   sum_k w^k log( sum_c (alpha_c p(O^k | lambda_c)))*/int sequence_d_mix_like(smodel **smo, int  smo_number, sequence_d_t *sqd,			double *like) {#define CUR_PROC "sequence_d_mix_like"  int i, k, error_seqs = 0;  double seq_like = 0.0, log_p;    *like = 0.0;  for (i = 0; i < sqd->seq_number; i++) {    seq_like = 0.0;    for (k = 0; k < smo_number; k++) {      if (sfoba_logp(smo[k], sqd->seq[i], sqd->seq_len[i], &log_p) != -1) {	if (log_p > -100)	  seq_like += exp(log_p) * smo[k]->prior;       }    }    /* no model fits */    if (seq_like == 0.0) {      error_seqs++;      *like += (PENALTY_LOGP * sqd->seq_w[i]);     }    else      *like += (log(seq_like) * sqd->seq_w[i]);  }  return error_seqs;#undef CUR_PROC} /* sequence_d_mix_like */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -