⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readhmm_multialpha.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 4 页
字号:
#ifdef DEBUG_RD    printf("adding path: ");#endif    /* direct path to w, add total path */    for(i = 0; i < length; i++) {      p_el.vertex = (temp_pathp + i)->vertex;#ifdef DEBUG_RD      printf("%d ", p_el.vertex);#endif      p_el.next = (*from_transp) + 1;      memcpy(*from_transp, &p_el, sizeof(struct path_element));      (*from_transp)++;    }    memcpy(*from_transp, &cur_p_el, sizeof(struct path_element));#ifdef DEBUG_RD    printf("%d %d\n", cur_p_el.vertex, w);#endif        (*from_transp)++;  }  xp = *(hmmp->to_silent_trans_array + v);  while(*xp != END) {    add_all_from_paths_multi(*xp, w, hmmp, from_transp, temp_pathp, length + 1);    xp++;  }}/* Go through transmission matrix and get all probabilities that are not 0 * into to_trans_array */void create_to_trans_array_multi(struct hmm_multi_s *hmmp){  int v,w,*xp;  int has_to_trans;  int array_head_size, array_tail_size;  struct path_element **to_trans_array, *to_trans, *temp_path;  array_tail_size = 0;  array_head_size = hmmp->nr_v;    /* estimate how much space we need to store transitions */  array_tail_size = (hmmp->nr_t/hmmp->nr_v + 3 + MAX_GAP_SIZE) * MAX_GAP_SIZE/2 * hmmp->nr_v;#ifdef DEBUG_RD  printf("array_tail_size = %d\n", array_tail_size);#endif  to_trans_array = (struct path_element**)    (malloc_or_die(array_head_size * sizeof(struct path_element*) +		   (array_tail_size + hmmp->nr_v) * sizeof(struct path_element)));  to_trans = (struct path_element*)(to_trans_array + hmmp->nr_v);  hmmp->to_trans_array = to_trans_array;    /* find all paths and add them to to_trans_array */  for(v = 0; v < hmmp->nr_v; v++) /* from-vertex */ {    *to_trans_array = to_trans;    if(silent_vertex_multi(v, hmmp) == YES) {      to_trans->vertex = END;      to_trans->next = NULL;      to_trans++;      to_trans_array++;      continue;    }    for(w = 0; w < hmmp->nr_v; w++) /* to-vertex */ {      if(silent_vertex_multi(w,hmmp) == YES) {	continue;      }      temp_path = (struct path_element*)(malloc_or_die(1000 * sizeof(struct path_element)));      add_all_to_paths_multi(v, w, hmmp, &to_trans, temp_path, 0);      free(temp_path);    }    to_trans->vertex = END;    to_trans->next = NULL;    to_trans++;    to_trans_array++;  }  #ifdef DEBUG_RD  printf("array_head_size, array_tail_size = %d, %d\n", array_head_size, array_tail_size);  dump_to_trans_array(hmmp->nr_v, hmmp->to_trans_array);#endif }void add_all_to_paths_multi(int v, int w, struct hmm_multi_s *hmmp,		  struct path_element **to_transp, struct path_element *temp_pathp, int length){  int i,j;  int *xp;  struct path_element p_el;    if(length > MAX_GAP_SIZE) {    return;  }  if(*(hmmp->transitions + get_mtx_index(v, w, hmmp->nr_v)) != 0.0) {    /* direct path to w, add total path */    for(i = 0; i < length; i++) {      p_el.vertex = (temp_pathp + i)->vertex;      p_el.next = (*to_transp) + 1;      memcpy(*to_transp, &p_el, sizeof(struct path_element));      (*to_transp)++;    }    p_el.vertex = w;    p_el.next = NULL;    memcpy(*to_transp, &p_el, sizeof(struct path_element));    (*to_transp)++;  }    xp = *(hmmp->to_silent_trans_array + v);  while(*xp != END) {    (temp_pathp + length)->vertex = *xp;    (temp_pathp + length)->next = NULL;    add_all_to_paths_multi(*xp, w, hmmp, to_transp, temp_pathp, length + 1);    xp++;  }  }int silent_vertex_multi(int k, struct hmm_multi_s *hmmp){#ifdef DEBUG_RD  printf("startnode = %d\n",hmmp->startnode);  printf("endnode = %d\n",hmmp->endnode);  dump_silent_vertices_multi(hmmp);  dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->emissions);#endif  if(*(hmmp->emissions + get_mtx_index(k,0,hmmp->a_size)) == SILENT && k != hmmp->startnode && k != hmmp->endnode) {    return YES;  }  else {    return NO;  }}/* check all probabilities and abort if some prob > 1.0 or < 0.0 */void check_probs_multi(struct hmm_multi_s *hmmp){  int i,j;  double sum;  double diff;  double prob;  /* transition probabilities first */  for(i = 0; i < hmmp->nr_v; i++) {    sum = 0;    for(j = 0; j < hmmp->nr_v; j++) {      prob = *((hmmp->transitions) + (i*hmmp->nr_v) + j);      if(prob > 1.0 || prob < 0.0) {	printf("Illegal probabilities (prob < 0.0 or prob > 1.0)\n");	exit(-1);      }      else {	sum += prob;      }    }    diff = 1.0 - sum;    /* maybe something about auto correcting the probabilities     * will be implemented later */  }  /* then emission probabilities */  for(i = 0; i < hmmp->nr_v; i++) {    sum = 0;    for(j = 0; j < hmmp->a_size; j++) {      prob = *((hmmp->emissions) + (i*hmmp->a_size) + j);      if((prob > 1.0 || prob < 0.0) && prob != SILENT) {	printf("Illegal probabilities (prob < 0.0 or prob > 1.0)\n");	exit(-1);      }      else {	sum += prob;      }    }    diff = 1.0 - sum;    /* maybe something about auto correcting the probabilities     * will be implemented later */  }#ifdef DEBUG_RD  //dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->transitions);  //dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->log_transitions);  //dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->emissions);  //dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->log_emissions);#endif}int read_prior_files_multi(int nr_priorfiles, struct emission_dirichlet_s *emission_priorsp,			   int a_size, FILE *file){  int i,j,k;  double q_value, alpha_value, alpha_sum, logbeta;  char s[2048], *p;  char ps[2048];  char *file_name;  char *pri;  FILE *priorfile;     /* find \n sign and remove it */  if(fgets(s, 2048, file) != NULL) {    p = s;    while((p = strstr(p, "\n")) != NULL) {      strncpy(p, " ", 1);    }  }    /* read all before first filename */  strtok(s," ");   if((file_name = strtok(NULL, " ")) == NULL) {    /* done */    return 0;  }   for(i = 0; i < nr_priorfiles; i++) {    /* open the priorfile */    if((file_name = strtok(NULL, " ")) == NULL) {      /* done */      return 0;    }        else {      if((priorfile = fopen(file_name,"r")) != NULL) {	printf("Opened priorfile %s\n", file_name);      }      else {	perror(file_name);	return -1;      }    }    /* put name i struct */    strcpy(emission_priorsp->name, file_name);    /* put nr of components in struct */    if(fgets(ps, 2048, priorfile) != NULL) {    }    else {      return -1;    }    while(*ps == '#') {      if(fgets(ps, 2048, priorfile) != NULL) {      }      else {	return -1;      }    }    (emission_priorsp + i)->nr_components = atoi(&ps[0]);    (emission_priorsp + i)->alphabet_size = a_size;            /* allocate memory for arrays and matrix to this prior struct */    (emission_priorsp + i)->q_values = malloc_or_die((emission_priorsp + i)->nr_components *						     sizeof(double));    (emission_priorsp + i)->alpha_sums = malloc_or_die((emission_priorsp + i)->nr_components *						     sizeof(double));    (emission_priorsp + i)->logbeta_values =      malloc_or_die((emission_priorsp + i)->nr_components * sizeof(double));    (emission_priorsp + i)->prior_values = malloc_or_die((emission_priorsp + i)->nr_components *							 a_size * sizeof(double));    for(j = 0; j < (emission_priorsp + i)->nr_components; j++) {      /* put q-value in array, skip empty and comment lines  */      while(1) {	if(fgets(ps, 2048, priorfile) != NULL) {	  if(*ps == '#' || *ps == '\n') {	    	  }	  else {	    break;	  }	}	else {	  printf("Prior file has incorrect format\n");	}      }      q_value = atof(&ps[0]);      *((emission_priorsp + i)->q_values + j) = q_value; #ifdef DEBUG_RDPRI      printf("q_value = %f\n", *(((emission_priorsp + i)->q_values) + j));#endif            /* put alpha-values of this component in matrix */      alpha_sum = 0.0;      k = 0;      if(fgets(ps, 2048, priorfile) != NULL) {      }      else {	return -1;      }      while(*ps == '#') {	if(fgets(ps, 2048, priorfile) != NULL) {	}	else {	  return -1;	}      }            pri = &ps[0];      for(k = 0; k < a_size; k++) {	alpha_value = strtod(pri, &pri);	alpha_sum += alpha_value;	*(((emission_priorsp + i)->prior_values) +	  get_mtx_index(j, k,a_size)) = alpha_value;      }            /* put sum of alphavalues in array */      *(((emission_priorsp + i)->alpha_sums) + j) = alpha_sum;             /* calculate logB(alpha) for this component, store in array*/      logbeta = 0;      for(k = 0; k < a_size; k++) {	logbeta += lgamma(*(((emission_priorsp + i)->prior_values) +			    get_mtx_index(j, k, a_size)));	#ifdef DEBUG_RDPRI	printf("prior_value = %f\n", *(((emission_priorsp + i)->prior_values) +				       get_mtx_index(j, k, a_size)));	printf("lgamma_value = %f\n", lgamma(*(((emission_priorsp + i)->prior_values) +					       get_mtx_index(j, k, a_size))));#endif      }      logbeta = logbeta - lgamma(*(((emission_priorsp + i)->alpha_sums) + j));      *(((emission_priorsp + i)->logbeta_values) + j) = logbeta;    }#ifdef DEBUG_RDPRI    dump_prior_struct(emission_priorsp + i);#endif        /* some cleanup before continuing with next prior file */    fclose(priorfile);    emission_priorsp++;  }  return 0;}int read_trans_prior_files_multi(int nr_priorfiles, void *emission_priorsp,		     struct hmm_multi_s *hmmp, FILE *file){  char s[2048];    /* not implemented yet */  if(fgets(s, 2048, file) != NULL) {  }  return 0;}void create_tot_transitions_multi(struct hmm_multi_s *hmmp){  int v,w;  struct path_element *wp;  double t_res;  double log_t_res, cur_value;  hmmp->tot_transitions = (double*)malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double));  hmmp->max_log_transitions = (double*)malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double));  init_float_mtx(hmmp->max_log_transitions, DEFAULT, hmmp->nr_v * hmmp->nr_v);  for(v = 0; v < hmmp->nr_v; v++) {    wp = *(hmmp->from_trans_array + v);    while(wp->vertex != END) /* w = from-vertex */ {      t_res = 1.0;      w = wp->vertex;      while(wp->next != NULL) {	t_res = t_res * *((hmmp->transitions) +			    get_mtx_index(wp->vertex, (wp + 1)->vertex, hmmp->nr_v));	/* probability of transition from w to v via silent states */	wp++;      }      t_res = t_res * *((hmmp->transitions) +			  get_mtx_index(wp->vertex, v, hmmp->nr_v));      /* tot_transitions */      *(hmmp->tot_transitions + get_mtx_index(w,v,hmmp->nr_v)) += t_res;            /* max_log_transitions */      if(t_res != 0.0) {	log_t_res = log10(t_res);	cur_value = *(hmmp->max_log_transitions + get_mtx_index(w,v,hmmp->nr_v));	if(cur_value == DEFAULT || log_t_res > cur_value) {	  *(hmmp->max_log_transitions + get_mtx_index(w,v,hmmp->nr_v)) = log_t_res;	}      }      wp++;    }  }}void create_tot_trans_arrays_multi(struct hmm_multi_s *hmmp){  int v,w;  struct path_element *p_elp;  int malloc_size;  malloc_size = 0;  for(v = 0; v < hmmp->nr_v; v++) {    for(w = 0; w < hmmp->nr_v; w++) {      if(*(hmmp->tot_transitions + get_mtx_index(v,w,hmmp->nr_v)) != 0.0) {	malloc_size++;      }    }  }    hmmp->tot_to_trans_array = (struct path_element**)malloc_or_die(hmmp->nr_v * sizeof(struct path_element*) +								  (malloc_size + hmmp->nr_v) * sizeof(struct path_element));    hmmp->tot_from_trans_array = (struct path_element**)malloc_or_die(hmmp->nr_v * sizeof(struct path_element*) +								    (malloc_size + hmmp->nr_v) * sizeof(struct path_element));  /* fill in tot_to_trans_array */  p_elp = (struct path_element*)(hmmp->tot_to_trans_array + hmmp->nr_v);  for(v = 0; v < hmmp->nr_v; v++) {    *(hmmp->tot_to_trans_array + v) = p_elp;    for(w = 0; w < hmmp->nr_v; w++) {      if(*(hmmp->tot_transitions + get_mtx_index(v,w,hmmp->nr_v)) != 0.0) {	p_elp->vertex = w;	p_elp->next = NULL;	p_elp++;      }    }    p_elp->vertex = END;    p_elp->next = NULL;    p_elp++;  }    /* fill in tot_from_trans_array */  p_elp = (struct path_element*)(hmmp->tot_from_trans_array + hmmp->nr_v);  for(v = 0; v < hmmp->nr_v; v++) {    *(hmmp->tot_from_trans_array + v) = p_elp;    for(w = 0; w < hmmp->nr_v; w++) {      if(*(hmmp->tot_transitions + get_mtx_index(w,v,hmmp->nr_v)) != 0.0) {	p_elp->vertex = w;	p_elp->next = NULL;	p_elp++;      }    }    p_elp->vertex = END;    p_elp->next = NULL;    p_elp++;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -