📄 gvctext.c
字号:
release_mutex();
return;
}
}
}
fclose(infile);
info_wait(IDS_NOWAIT);
gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
}
/***************************************************/
/* text extraction and searching based on pstotext */
/* This code assumes that pstotext has produced an output file
* named psfile.text_name.
* This file contains one line per word in the following format
* llx lly urx ury word
* A new line is identified by a blank line
* A new page is identified by a formfeed/newline
*
*/
char *
text_grab_word(char *line)
{
char *p, *q;
p = line;
while (*p && ((*p == ' ') || (*p == '\t')))
p++;
/* skip llx */
while (*p && !((*p == ' ') || (*p == '\t')))
p++;
while (*p && ((*p == ' ') || (*p == '\t')))
p++;
/* skip lly */
while (*p && !((*p == ' ') || (*p == '\t')))
p++;
while (*p && ((*p == ' ') || (*p == '\t')))
p++;
/* skip urx */
while (*p && !((*p == ' ') || (*p == '\t')))
p++;
while (*p && ((*p == ' ') || (*p == '\t')))
p++;
/* skip ury */
while (*p && !((*p == ' ') || (*p == '\t')))
p++;
while (*p && ((*p == ' ') || (*p == '\t')))
p++;
/* remove trailing newline */
q = p;
while (*q) {
if ( (*q == '\r') || (*q == '\n') )
*q = '\0';
else
q++;
}
return p;
}
void
text_extract_slow(FILE *outfile, FILE *infile, BOOL all)
{
char line[2048];
int page = 0;
BOOL thispage;
char *p;
/* pages are in correct order - no reversal needed */
thispage = (all || psfile.page_list.select[page]);
while (fgets(line, sizeof(line)-1, infile)) {
if (thispage) {
if ( (*line == '\r') || (*line == '\n') || (*line == '\f') )
fputs(line, outfile);
else {
p = text_grab_word(line);
fputs(p, outfile);
fputc(' ', outfile);
}
}
if (*line == '\f') {
page++;
thispage = (all || psfile.page_list.select[page]);
}
}
}
/* extract text from a range of pages */
void
gsview_text_extract_slow()
{
FILE *f;
FILE *infile;
static char output[MAXSTR];
if (!get_filename(output, TRUE, FILTER_TXT, 0, IDS_TOPICTEXT))
return;
if ((f = fopen(output, "wb")) == (FILE *)NULL)
return;
if (psfile.text_name[0] == '\0')
return;
/* Must read in binary mode, since TeX can use ^Z as a character code. */
if ((infile = fopen(psfile.text_name, "rb")) == (FILE *)NULL) {
message_box(TEXT("pstotext text extraction file is missing"), 0);
return;
}
info_wait(IDS_WAITWRITE);
if (psfile.dsc == (CDSC *)NULL) {
text_extract_slow(f, infile, TRUE); /* all pages */
}
else {
if (psfile.dsc->page_count != 0) {
text_extract_slow(f, infile, FALSE); /* selected pages */
}
else {
text_extract_slow(f, infile, TRUE); /* all pages */
}
}
fclose(f);
fclose(infile);
info_wait(IDS_NOWAIT);
return;
}
/* compare string w with string s */
/* case insensitive */
/* w may contain wildcards * and ? in any position */
/* return TRUE if match */
BOOL
wildmatch(const char *w, const char *s)
{
const char *lastw = ""; /* location of last possible '*' */
const char *lasts = s; /* location of character after the last matched by '*' */
BOOL nomatch;
while (*s && *w) {
nomatch = FALSE;
if (*w == '*') {
lastw = w;
w++; /* look at character after '*' */
while (*w && (*w == '*'))
w++; /* '**' is same as '*' */
if (*w == '\0')
return TRUE; /* matches all until end of word */
while (*s) {
if (*w == '?') {
break; /* '*?' is same as '?' */
}
else if (tolower_latin1(*w) == tolower_latin1(*s)) {
lasts = s; /* remember location in case we need to extend wildcard match */
break; /* break loop when match found */
}
s++;
}
if (*s == '\0')
break; /* end of s without matching next in w */
}
else if (*w != '?') { /* '?' always matches */
if (tolower_latin1(*w) != tolower_latin1(*s))
nomatch = TRUE;
}
if (nomatch && *lastw && *lastw=='*' && *lasts) {
/* try extending the length matched by the '*' wildcard */
w = lastw;
s = lasts + 1;
}
else if (nomatch) {
break;
}
else {
w++;
s++;
}
}
/* skip over trailing '*' */
while (*w && (*w == '*'))
w++; /* '**' is same as '*' */
if ((*s == '\0') && (*w == '\0'))
return TRUE; /* at end of both strings, so matched */
return FALSE;
}
#define MAX_FIND_WORD 10
/* returns TRUE if match found */
/* infile is text file from pstotext */
/* is assumed to have been positioned by fseek */
/* str is string to match */
/* all is TRUE if all pages to be searched */
BOOL
text_find_slow(FILE *infile, char *str, BOOL all)
{
char line[MAXSTR];
char *lineword;
char sbuf[MAXSTR];
const char *strword;
BOOL thispage;
char *words[MAX_FIND_WORD+1];
int i;
/* break up search string into words */
strcpy(sbuf, str);
words[0] = strtok(sbuf, " ");
i = 1;
while ( ((words[i] = strtok(NULL, " ")) != NULL) && (i<MAX_FIND_WORD) )
i++;
/* point at first word */
i = 0;
strword = words[i];
thispage = (all || psfile.page_list.select[psfile.text_page]);
while (fgets(line, sizeof(line)-1, infile)) {
if (thispage && (*line != '\n') && (*line != '\f')) {
/* get start of word */
lineword = text_grab_word(line);
if (wildmatch(strword, lineword)) {
/* matched first word */
psfile.text_offset = ftell(infile); /* remember location */
/* remember bounding box of word */
sscanf(line, "%d %d %d %d",
&psfile.text_bbox.llx, &psfile.text_bbox.lly,
&psfile.text_bbox.urx, &psfile.text_bbox.ury);
/* now check remaining words */
strword = words[++i];
while (strword) {
if (!fgets(line, sizeof(line)-1, infile))
return FALSE; /* match failed at EOF */
if ((*line != '\n') && (*line != '\f')) {
lineword = text_grab_word(line);
if (!wildmatch(strword, lineword)) {
/* partial match failed */
/* restart search from next word */
fseek(infile, psfile.text_offset, SEEK_SET);
i = 0;
strword = words[i];
break;
}
strword = words[++i];
/* extend bbox ?? */
}
}
/* all words matched */
if (!strword)
return TRUE;
}
}
if (*line == '\f') {
psfile.text_page++;
thispage = (all || psfile.page_list.select[psfile.text_page]);
}
}
return FALSE; /* reached EOF without match */
}
void
gsview_text_findnext_slow()
{
FILE *infile;
char find_text[MAXSTR];
if (not_dsc())
return;
if (strlen(szFindText)==0) {
gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
return;
}
if (psfile.text_name[0] == '\0') {
gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
return;
}
if ((infile = fopen(psfile.text_name, "rb")) == (FILE *)NULL) {
message_box(TEXT("pstotext text extraction file is missing"), 0);
return;
}
/* add wildcard to begin and end */
if ((szFindText[0] == '*') || (szFindText[0] == '?'))
strcpy(find_text, szFindText);
else {
strcpy(find_text, "*");
strcat(find_text, szFindText);
}
if ( (find_text[strlen(find_text)-1] != '*') &&
(find_text[strlen(find_text)-1] != '?') )
strcat(find_text, "*");
info_wait(IDS_WAITSEARCH);
fseek(infile, psfile.text_offset, SEEK_SET);
if (text_find_slow(infile, find_text, FALSE)) {
/* found it */
fclose(infile);
info_wait(IDS_NOWAIT);
/* signal that BBOX is valid and should be highlighted */
if ((psfile.pagenum == psfile.text_page+1)
&& (gsdll.state == GS_PAGE)) {
/* on correct page */
display.show_find = TRUE;
scroll_to_find();
post_img_message(WM_GSSYNC, 0); /* redraw */
}
else {
/* move to correct page */
request_mutex();
psfile.text_bbox.valid = TRUE;
pending.pagenum = psfile.text_page+1;
history_add(pending.pagenum);
pending.now = TRUE;
release_mutex();
/* scroll_to_find occurs after page is displayed */
}
return;
}
fclose(infile);
info_wait(IDS_NOWAIT);
gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
}
/* mouse text selection */
#define TEXT_INDEX_CHUNK 1000
TEXTINDEX *text_index;
unsigned int text_index_count; /* number of words in index */
unsigned int text_index_size; /* number of words storage allocated */
char *text_words;
unsigned int text_words_count; /* location for next word */
unsigned int text_words_size; /* maximum character count per page */
void
free_text_index(void)
{
if (text_index)
free(text_index);
text_index = NULL;
text_index_count = 0;
text_index_size = 0;
if (text_words)
free(text_words);
text_words = NULL;
text_words_count = 0;
text_words_size = 0;
return;
}
BOOL
make_text_index(void)
{
FILE *f;
int thispage;
int thisline;
char buf[2048];
TEXTINDEX *text;
int page = psfile.pagenum;
free_text_index();
if (psfile.text_name[0] == '\0')
return FALSE;
text_words_size = TEXT_INDEX_CHUNK * 8;
text_words = (char *)malloc(text_words_size);
if (text_words == (char *)NULL) {
free_text_index();
return FALSE;
}
text_words_count = 0;
text_index_size = TEXT_INDEX_CHUNK;
text_index = (TEXTINDEX *)malloc(text_index_size * sizeof(TEXTINDEX));
if (text_index == (TEXTINDEX *)NULL) {
free_text_index();
return FALSE;
}
text_index_count = 0;
/* find page */
f = fopen(psfile.text_name, "rb");
if (f == (FILE *)NULL) {
free_text_index();
return FALSE;
}
thispage = 1;
while ((thispage != page) && fgets(buf, sizeof(buf), f))
if (*buf == '\f')
thispage++;
if (thispage != page) {
free_text_index();
fclose(f);
return FALSE;
}
thisline = 0;
while ((thispage == page) && fgets(buf, sizeof(buf), f)) {
if (*buf == '\f') {
thispage++;
}
else if (*buf == '\n') {
thisline++;
}
else {
if (text_index_count >= text_index_size) {
text_index_size += TEXT_INDEX_CHUNK;
text_index = (TEXTINDEX *)realloc(text_index,
text_index_size * sizeof(TEXTINDEX));
if (text_index == (TEXTINDEX *)NULL) {
free_text_index();
fclose(f);
return FALSE;
}
}
text = &text_index[text_index_count];
if (text_words_count + strlen(buf) + 1 > text_words_size) {
text_words_size += TEXT_INDEX_CHUNK * 8;
text_words = (char *)realloc(text_words, text_words_size);
if (text_words == (char *)NULL) {
free_text_index();
fclose(f);
return FALSE;
}
}
text->word = text_words_count;
sscanf(buf, "%d %d %d %d %s",
&text->bbox.llx, &text->bbox.lly,
&text->bbox.urx, &text->bbox.ury,
text_words + text->word);
text->line = thisline;
text_words_count += strlen(text_words + text_words_count) + 1;
text_index_count++;
}
}
fclose(f);
return TRUE;
}
/* return TRUE if x,y is in bbox */
BOOL
in_word(PSBBOX *bbox, int x, int y)
{
return ( (bbox->llx <= x) && (x <= bbox->urx)
&& (bbox->lly <= y) && (y <= bbox->ury) );
}
/* search for word containing x,y */
/* if found, return pointer */
/* if no match, return NULL */
int
word_find(int x, int y)
{
unsigned int i;
for (i=0; i<text_index_count; i++)
if (in_word(&text_index[i].bbox, x, y))
return i;
return -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -