⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pv.c

📁 机器学习作者tom mitchell的书上代码
💻 C
📖 第 1 页 / 共 2 页
字号:
  int size = 0;  if (pv->write_last_di != di)    size += PV_WRITE_SIZE_INT (di - pv->write_last_di);  size += PV_WRITE_SIZE_INT (pi - pv->write_last_pi);  return size;}static inline intbow_pv_read_size_di_pi (bow_pv *pv, int di, int pi){  int size = 0;  if (pv->read_last_di != di)    size += PV_WRITE_SIZE_INT (di - pv->read_last_di);  size += PV_WRITE_SIZE_INT (pi - pv->read_last_pi);  return size;}/* Write "document index" DI and "position index" PI to FP.  Assumes   that PV->PVM is already created, and there is space there in this   PVM segment to write the info.  Returns the number of bytes   written. */static inline intbow_pv_write_next_di_pi (bow_pv *pv, int di, int pi){  int bytes_written = 0;  if (pv->pvm == NULL)    pv->pvm = bow_pvm_new (bow_pv_sizeof_first_segment);  assert (di >= pv->write_last_di);  if (di != pv->write_last_di)    {      bytes_written += 	bow_pvm_write_unsigned_int (pv->pvm, di - pv->write_last_di, 1);      pv->write_last_di = di;      pv->write_last_pi = -1;    }  bytes_written +=    bow_pvm_write_unsigned_int (pv->pvm, pi - pv->write_last_pi, 0);  pv->write_last_pi = pi;  return bytes_written;}/* Read "document index" DI and "position index" PI from FP.  Assumes   that FP is already seek'ed to the correct position.  Returns the   number of bytes read. */static inline intbow_pv_read_next_di_pi (bow_pv *pv, int *di, int *pi, FILE *fp){  unsigned int incr;  int bytes_read = 0;  int is_di;  bytes_read += bow_pv_read_unsigned_int (&incr, &is_di, fp);  if (is_di)    {      pv->read_last_di += incr;      pv->read_last_pi = -1;      bytes_read += bow_pv_read_unsigned_int (&incr, &is_di, fp);      assert (!is_di);    }  pv->read_last_pi += incr;  *di = pv->read_last_di;  *pi = pv->read_last_pi;  return bytes_read;}intbow_pvm_read_next_di_pi (bow_pv *pv, int *di, int *pi){  unsigned int incr;  int bytes_read = 0;  int is_di;  assert (pv->pvm);  /* If the special flag was set by bow_pv_unnext(), then return the     same values returned last time without reading the next entry,     and unset the flag. */  if (pv->read_seek_end < 0)    {      *di = pv->read_last_di;      *pi = pv->read_last_pi;      pv->read_seek_end = -pv->read_seek_end;      assert (pv->read_seek_end > 0);      return 0;    }  /* If we are about to read from the same location as we would write,     then we are at the end of the PV.  Return special DI and PI     values indicate that we are at the end. */  if (pv->pvm->read_end == pv->pvm->write_end)    {      *di = *pi = -1;      return 0;    }  bytes_read += bow_pvm_read_unsigned_int (pv->pvm, &incr, &is_di);  if (is_di)    {      pv->read_last_di += incr;      pv->read_last_pi = -1;      bytes_read += bow_pvm_read_unsigned_int (pv->pvm, &incr, &is_di);      assert (!is_di);    }  pv->read_last_pi += incr;  *di = pv->read_last_di;  *pi = pv->read_last_pi;  return bytes_read;}/* Add "document index" DI and "position index" PI to PV by writing... */voidbow_pv_add_di_pi (bow_pv *pv, int di, int pi, FILE *fp){  /* Make sure that PV->PVM definitely has enough room in this PVM     segment to write another DI and PI.  Will grow the PVM segment if     necessary.  Assumes that both DI and PI are greater than or equal     to the last DI and PI written, respectively.  */  pv->word_count++;  //if (di != pv->write_last_di) pv->document_count++;  if (pv->pvm == NULL)    pv->pvm = bow_pvm_new (bow_pv_sizeof_first_segment);  if (pv->pvm->size - pv->pvm->write_end < bow_pv_max_sizeof_di_pi)    bow_pvm_grow (&(pv->pvm));  //pv->byte_count +=   bow_pv_write_next_di_pi (pv, di, pi);}/* Read the next "document index" DI and "position index" PI.  Does   not assume that FP is already seek'ed to the correct position.   Will jump to a new PV segment on disk if necessary. */voidbow_pv_next_di_pi (bow_pv *pv, int *di, int *pi, FILE *fp){  int byte_count;  /* If the special flag was set by bow_pv_unnext(), then return the     same values returned last time without reading the next entry,     and unset the flag. */  if (pv->read_seek_end < 0)    {      *di = pv->read_last_di;      *pi = pv->read_last_pi;      pv->read_seek_end = -pv->read_seek_end;      assert (pv->read_seek_end > 0);      return;    }  /* If we are about to read from the location of the tailer of the     last segment written, then we are at the end of the PV on disk.     Go look for the next entry in memory in the PVM, if the PVM exists. */  if (pv->read_seek_end == pv->write_seek_last_tailer)    {      if (pv->pvm)	bow_pvm_read_next_di_pi (pv, di, pi);      else	*di = *pi = -1;      return;    }  /* Make sure that there was definitely enough room in this segment     to have written another DI and PI.  If not, then it was written     in the next segment, so go there and get set up for reading from     it.  We know that there really is another segment because     otherwise the above test would have been true. */  if (pv->read_segment_bytes_remaining == 0)    {      off_t seek_new_segment;      /* Go to the "tailer" of this segment, and read the seek         position of the next segment. */      fseeko (fp, pv->read_seek_end, SEEK_SET);      bow_fread_off_t (&seek_new_segment, fp);      fseeko (fp, seek_new_segment, SEEK_SET);      /* Read the number of bytes in this segment, and remember it. */      bow_fread_int (&(pv->read_segment_bytes_remaining), fp);      /* Remember the new position from which to read the next DI and PI */      pv->read_seek_end = ftello (fp);#if 0      /* When would this happen now? */      /* If this segment has not yet been written to, we are at end of PV */      if (pv->read_seek_end == pv->write_seek_end)	goto return_end_of_pv;#endif    }  /* Seek to the correct position, read the DI and PI, decrement our     count of the number of bytes remaining in this segment, and     update the seek position for reading the next DI and PI. */  fseeko (fp, pv->read_seek_end, SEEK_SET);  byte_count =    bow_pv_read_next_di_pi (pv, di, pi, fp);  pv->read_segment_bytes_remaining -= byte_count;  pv->read_seek_end += byte_count;  assert (pv->read_segment_bytes_remaining >= 0);}/* Undo the effect of the last call to bow_pv_next_di_pi().  That is,   make the next call to bow_pv_next_di_pi() return the same DI and PI   as the last call did.  This function may not be called multiple   times in a row without calling bow_pv_next_di_pi() in between. */voidbow_pv_unnext (bow_pv *pv){  /* Make sure that this function wasn't called two times in a row. */  assert (pv->read_seek_end > 0);  pv->read_seek_end = -pv->read_seek_end;}/* Rewind the read position to the beginning of the PV */voidbow_pv_rewind (bow_pv *pv, FILE *fp){  /* If PV is already rewound, just return immediately */  if (pv->read_seek_end == pv->seek_start + sizeof (int)      && pv->read_last_di == -1 && pv->read_last_pi == -1)    return;  if (pv->seek_start != -1)    {      fseeko (fp, pv->seek_start, SEEK_SET);      bow_fread_int (&(pv->read_segment_bytes_remaining), fp);      assert (pv->read_segment_bytes_remaining > 0);      pv->read_seek_end = ftello (fp);    }  pv->read_last_di = -1;  pv->read_last_pi = -1;  if (pv->pvm)    bow_pvm_rewind (pv->pvm);}/* Write the in-memory portion of PV to FP */voidbow_pv_write (bow_pv *pv, FILE *fp, FILE *pvfp){  bow_pv_flush (pv, pvfp);#define FAST_PV_WRITE 1#if FAST_PV_WRITE  fwrite (pv, sizeof (bow_pv) - sizeof(void*), 1, fp);#else  //bow_fwrite_int (pv->byte_count, fp);  bow_fwrite_int (pv->word_count, fp);  //bow_fwrite_int (pv->document_count, fp);  bow_fwrite_off_t (pv->seek_start, fp);  bow_fwrite_off_t (pv->read_seek_end, fp);  bow_fwrite_int (pv->read_last_di, fp);  bow_fwrite_int (pv->read_last_pi, fp);  //bow_fwrite_int (pv->read_segment_bytes_remaining, fp);  bow_fwrite_int (pv->write_last_di, fp);  bow_fwrite_int (pv->write_last_pi, fp);  bow_fwrite_off_t (pv->write_seek_last_tailer, fp);#endif}/* Read the in-memory portion of PV from FP */voidbow_pv_read (bow_pv *pv, FILE *fp){#if FAST_PV_WRITE  fread (pv, sizeof (bow_pv) - sizeof(void*), 1, fp);#else  //bow_fread_int (&pv->byte_count, fp);  bow_fread_int (&pv->word_count, fp);  //bow_fread_int (&pv->document_count, fp);  bow_fread_off_t (&pv->seek_start, fp);  bow_fread_off_t (&pv->read_seek_end, fp);  bow_fread_int (&pv->read_last_di, fp);  bow_fread_int (&pv->read_last_pi, fp);  //bow_fread_int (&pv->read_segment_bytes_remaining, fp);  bow_fread_int (&pv->write_last_di, fp);  bow_fread_int (&pv->write_last_pi, fp);  bow_fread_off_t (&pv->write_seek_last_tailer, fp);#endif}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -