📄 dictcore.c
字号:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "pcdisk.h"
#include "dictcore.h"
static DICT dic;
static char IndexByteOrder;
static long CompareEntry(unsigned long, char *);
static long BinarySearch(unsigned long, unsigned long, long *, char *);
static unsigned int ArchSwap32(unsigned int D)
{
return ((D<<24)|((D<<8)&0x00FF0000)|((D>>8)&0x0000FF00)|(D>>24));
}
static unsigned short ArchSwap16(unsigned short D)
{
return ((D<<8)|(D>>8));
}
long InitDictCore(void)
{
if(fpidx != DICT_CLOSED) //字典已经打开
return TRUE;
//第一次打开字典
fpdic = NU_Open(DICT_FILE, PO_RDONLY, PS_IWRITE | PS_IREAD); //打开字典库文件
if(fpdic < DICT_CLOSED)
return FALSE;
fpidx = NU_Open(INDEX_FILE, PO_RDONLY, PS_IWRITE | PS_IREAD); //打开字典索引文件
if(fpidx < DICT_CLOSED)
return FALSE;
if(NU_Read(fpidx, &dic.header, sizeof(DICT_HEADER)) == 0) //读取索引文件头信息
return FALSE;
//判断字典索引文件的字节序是否与系统匹配
if(dic.header.symbol == BYTE_ORDER_SYM)
IndexByteOrder = SAME_ENDIAN_ORDER;
else if(dic.header.symbol == ArchSwap32(BYTE_ORDER_SYM))
IndexByteOrder = DIFF_ENDIAN_ORDER;
else
return FALSE;
//不检查字典文件的字节序, 默认与字典索引文件的字节序一致
NU_Seek(fpdic, sizeof(DICT_HEADER), SEEK_SET);
//填充字典数据结构
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
dic.header.num = ArchSwap32(dic.header.num);
else
{
dic.open_num = 1;
dic.alpha_tbl = sizeof(DICT_HEADER);
dic.entry_tbl = dic.alpha_tbl + TOP_INDEX_NUM * sizeof(ALPHABET_IDX);
}
return TRUE;
}
long ExitDictCore(void)
{
if(fpidx < DICT_CLOSED) //字典文件已关闭
return TRUE;
//关闭字典文件
NU_Close(fpidx);
NU_Close(fpdic);
fpidx = DICT_CLOSED;
fpdic = DICT_CLOSED;
return TRUE;
}
//参数说明: char* key 待查找字符串;
//支持模糊查找;
//返回值说明: 正常情况下返回匹配的词条索引;
// 若未找到匹配词条或输入参数错误, 返回值为-1;
//功能说明: 根据输入字符串查找匹配词条;
long SearchEntry(char * buffer)
{
long blk_id, offset;
long result, index;
char ch;
ALPHABET_IDX blk_info;
if(fpidx < DICT_CLOSED||fpdic < DICT_CLOSED) //字典未打开
return -1;
if(buffer == NULL)
return -1;
//分块查找二级索引, 获取分区号
ch = tolower(buffer[0]); //小写
if(ch < 'a')
blk_id = 0;
else if(ch > 'z')
blk_id = TOP_INDEX_NUM - 1;
else
blk_id = ch - 'a' + 1;
//根据分区号获取分区信息
offset = sizeof(DICT_HEADER) + blk_id * sizeof(ALPHABET_IDX);
NU_Seek(fpidx, offset, SEEK_SET);
NU_Read(fpidx, &blk_info, sizeof(ALPHABET_IDX));
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
{
blk_info.begin = ArchSwap32(blk_info.begin);
blk_info.end = ArchSwap32(blk_info.end);
}
//二分法查找词条
if(blk_info.begin != 0) //当前分区有词条
{
result = BinarySearch(blk_info.begin, blk_info.end, &index, buffer);
index = (index - dic.entry_tbl)/sizeof(ENTRY_IDX);
if((unsigned long)index >= dic.header.num)
index = dic.header.num - 1;
}
else //当前分区无词条
{
NU_Seek(fpidx, offset, SEEK_SET);
do //寻找以后的分区中第一个词条
{
NU_Read(fpidx, &blk_info, sizeof(ALPHABET_IDX));
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
blk_info.begin = ArchSwap32(blk_info.begin);
blk_id++;
}while(blk_info.begin == 0 && blk_id < TOP_INDEX_NUM);
if(blk_info.begin == 0) //无词条
index = -1;
else
index = (blk_info.begin - dic.entry_tbl)/sizeof(ENTRY_IDX);
}
return index;
}
// 函数原型:
// long BinarySearch( unsigned long begin, unsigned long end, long *index, char *key )
//
// 参数说明:
// unsigned long begin 区块在索引文件中的起始地址
// unsigned long end 区块在索引文件中的结束地址
// unsigned long * index 找到的匹配词条索引
// char * key 待查找的字符串
//
// 返回值说明:
// long 字符串比较结果。
//
// 功能说明:
// 对二级索引表分区进行二分查找
static long BinarySearch(unsigned long begin, unsigned long end, long *index, char *buffer)
{
volatile unsigned long a, b, mid;
long result;
a = begin;
b = end;
while(a < b)
{
mid = (a + b) / 2;
mid -= (mid - dic.entry_tbl) % sizeof(ENTRY_IDX);
result = CompareEntry(mid, buffer);
if(result == 0)
{
*index = (long)mid;
return result;
}
else if(result > 0)
{
if(mid == end)
{
*index = mid + sizeof(ENTRY_IDX);
return result;
}
else
a = mid + sizeof(ENTRY_IDX);
}
else
{
if(mid == begin)
{
*index = mid;
return result;
}
else
b = mid - sizeof(ENTRY_IDX);
}
}
mid = (a + b)/2;
mid -= (mid - dic.entry_tbl)%sizeof(ENTRY_IDX);
result = CompareEntry(mid, buffer);
if(result > 0) //获取二级索引
*index = (long)a + sizeof(ENTRY_IDX);
else
*index = (long)a;
return result;
}
// 函数原型:
// long CompareEntry( unsigned long offset, char *key )
//
// 参数说明:
// unsigned long offset 词条二级索引在索引文件中的偏移地址
// char * key 待比较的字符串
//
// 返回值说明:
// long 字符串比较结果。
//
// 功能说明:
// 比较词条名称和指定字符串。
static long CompareEntry(unsigned long offset, char *buffer)
{
char *buf;
long result;
unsigned short len[ENTRY_COMPOS_NUM];
ENTRY_IDX index;
//获取二级索引
NU_Seek(fpidx, offset, SEEK_SET);
NU_Read(fpidx, &index, sizeof(ENTRY_IDX));
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
index.begin = ArchSwap32(index.begin);
//获取词条名称
NU_Seek(fpdic, index.begin, SEEK_SET);
NU_Read(fpdic, len, sizeof(unsigned short)*ENTRY_COMPOS_NUM);
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
len[0] = ArchSwap16(len[0]);
buf = malloc(len[0]);
if(buf == NULL)
return -1;
NU_Read(fpdic, buf, len[0]);
buf[len[0] - 1] = '\0';
result = CompareVocab(buffer, buf);
free(buf);
return result;
}
static int CompareVocab(char *str1, char *str2)
{
int r1 = 0, r2 = 0;
while((*str1 != '\0') && (*str2 != '\0'))
{
if(r1 == 0)
r1 = tolower(*str1) - tolower(*str2);
if(r2 == 0)
r2 = *str1 - *str2;
if(r1 != 0 && r2 != 0)
break;
str1++;
str2++;
}
if(r1 == 0)
r1 = tolower(*str1) - tolower(*str2);
if(r2 == 0)
r2 = *str1 - *str2;
return (r1 == 0 ? r2 : r1);
}
// 参数说明:
// ENTRY * list 用于存放词条列表的空间(由用户
// 建立),不能为NULL
// long index 列表首词条的二级索引
// unsigned long len 词条列表的最大长度
//
// 返回值说明:
// long 正常情况下返回获取的词条列表的实际长度。
// 若输入参数错误或获取失败,返回值为-1。
//
// 功能说明:
// 获取从首词条开始不超过限定长度的词条列表。词条列表存放的
// 是词条名称和二级索引。
long GetEntryList(ENTRY *list, long index, unsigned long length)
{
unsigned long total_len;
unsigned long i, j;
unsigned long offset;
unsigned short len_info[ENTRY_COMPOS_NUM];
char *buf;
ENTRY_IDX entry;
if(fpidx < DICT_CLOSED||fpdic < DICT_CLOSED)
return -1;
if(list == NULL)
return -1;
if(index < 0 || (unsigned long)index >= dic.header.num)
return -1;
for(i = 0; i < length && (unsigned long)index < dic.header.num; i++, index++)
{
offset = dic.entry_tbl + index * sizeof(ENTRY_IDX);
NU_Seek(fpidx, offset, SEEK_SET);
if(NU_Read(fpidx, &entry, sizeof(ENTRY_IDX)) == 0)
return FALSE;
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
entry.begin = ArchSwap32(entry.begin);
NU_Seek(fpdic, entry.begin, SEEK_SET);
//获取词条内容的长度信息
if(NU_Read(fpdic, len_info, sizeof(unsigned short)*ENTRY_COMPOS_NUM) != ENTRY_COMPOS_NUM*sizeof(unsigned short))
return FALSE;
/*follow is modified to decrease memory require*/
/*
total_len = 0;
for(j = 0; j < ENTRY_COMPOS_NUM; j++)
{
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
len_info[j] = ArchSwap16(len_info[j]);
total_len += len_info[j];
}
buf = malloc(total_len);
if(buf == NULL)
return -1;
if(NU_Read(fpdic, buf, total_len) == 0)
return FALSE;
//获取词条名称
list[i].phrase = (char*)malloc(len_info[0]);
if(list[i].phrase == NULL)
return FALSE;
memcpy(list[i].phrase, buf, len_info[0]);
list[i].phrase[len_info[0] - 1] = '\0';
list[i].index = index;
free(buf);
buf = NULL;
*/
//获取词条名称
total_len = 0;
for(j = 0; j < ENTRY_COMPOS_NUM; j++)
{
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
len_info[j] = ArchSwap16(len_info[j]);
total_len += len_info[j];
}
list[i].phrase = (char*)malloc(len_info[0]);
if(list[i].phrase == NULL)
return FALSE;
if(NU_Read(fpdic, list[i].phrase, len_info[0]) == 0)
return FALSE;
list[i].phrase[len_info[0] - 1] = '\0';
list[i].index = index;
NU_Seek(fpdic, total_len - len_info[0], SEEK_CUR);
}
return i;
}
// 函数原型:
// char *GetEntryInfo( long index, unsigned long mode )
//
// 参数说明:
// unsigned long index 待查找词条的二级索引
// unsigned long mode 查找的模式,合法值为:
// WORD_PHRASE 词条名称
// PRONUNCE 单词发音
// PARAPHRASE 词义
// ENTRY_ALL 全部内容
// 以及上述模式的组合,如:WORD_PHRASE | PARAPHRASE
//
// 返回值说明:
// char * 词条内容,按照词条名称、音标和词义的顺序排列。
// 三部分用换行符(0x0A)分隔。
// 若未找到词条内容或输入参数错误,返回值为NULL。
//
// 功能说明:
// 根据词条索引获取词条内容。
char *GetEntryInfo(long index, unsigned long mode)
{
char *buf;
unsigned short len[ENTRY_COMPOS_NUM], cur_len;
unsigned char need[ENTRY_COMPOS_NUM];
unsigned long offset;
short total_len = 0, total_need = 0;
int i, k;
ENTRY_IDX entry;
if(fpidx < DICT_CLOSED || fpdic < DICT_CLOSED)
return NULL;
if(index < 0 || (unsigned long)index >= dic.header.num)
return NULL;
//获取词条在字典中的位置
offset = dic.entry_tbl + index * sizeof(ENTRY_IDX);
NU_Seek(fpidx, offset, SEEK_SET);
if(NU_Read(fpidx, &entry, sizeof(ENTRY_IDX)) == 0)
return NULL;
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
entry.begin = ArchSwap32(entry.begin);
NU_Seek(fpdic, entry.begin, SEEK_SET);
NU_Read(fpdic, len, sizeof(unsigned short)*ENTRY_COMPOS_NUM);
for(i = 0; i < ENTRY_COMPOS_NUM; i++)
{
if(IndexByteOrder == DIFF_ENDIAN_ORDER)
len[i] = ArchSwap16(len[i]);
if((mode >> i) & 1)
{
need[i] = 1;
total_len += len[i];
total_need++;
}
else
need[i] = 0;
}
buf = (char *)malloc(total_len);
if(buf == NULL)
return NULL;
for(i = 0, k = 0; i < ENTRY_COMPOS_NUM; i++)
{
cur_len = len[i];
if(need[i] && cur_len != 0) //根据模式和词条内容的长度信息读取词条内容
{
NU_Read(fpdic, &buf[k], cur_len);
k += cur_len;
total_need--;
if(total_need == 0)
{
buf[k - 1] = '\0';
break;
}
}
else
NU_Seek(fpdic, cur_len, SEEK_CUR);
}
buf[k - 1] = '\0';
return buf;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -