⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gvctext.c

📁 GSview 4.6 PostScript previewer。Ghostscript在MS-Windows, OS/2 and Unix下的图形化接口
💻 C
📖 第 1 页 / 共 2 页
字号:
		release_mutex();
		return;
	    }
	}
    }
    fclose(infile);
    info_wait(IDS_NOWAIT);
    gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
}


/***************************************************/
/* text extraction and searching based on pstotext */

/* This code assumes that pstotext has produced an output file
 * named psfile.text_name.
 * This file contains one line per word in the following format
 *   llx lly urx ury word 
 * A new line is identified by a blank line 
 * A new page is identified by a formfeed/newline
 *
 */

char *
text_grab_word(char *line)
{
char *p, *q;
    p = line;
    while (*p && ((*p == ' ') || (*p == '\t')))
      p++;
    /* skip llx */
    while (*p && !((*p == ' ') || (*p == '\t')))
      p++;
    while (*p && ((*p == ' ') || (*p == '\t')))
      p++;
    /* skip lly */
    while (*p && !((*p == ' ') || (*p == '\t')))
      p++;
    while (*p && ((*p == ' ') || (*p == '\t')))
      p++;
    /* skip urx */
    while (*p && !((*p == ' ') || (*p == '\t')))
      p++;
    while (*p && ((*p == ' ') || (*p == '\t')))
      p++;
    /* skip ury */
    while (*p && !((*p == ' ') || (*p == '\t')))
      p++;
    while (*p && ((*p == ' ') || (*p == '\t')))
      p++;
    /* remove trailing newline */
    q = p;
    while (*q) {
	if ( (*q == '\r') || (*q == '\n') )
	    *q = '\0';
	else
	    q++;
    }
    return p;
}

void
text_extract_slow(FILE *outfile, FILE *infile, BOOL all)
{
    char line[2048];
    int page = 0;
    BOOL thispage;
    char *p;

    /* pages are in correct order - no reversal needed */
    thispage = (all || psfile.page_list.select[page]);
    while (fgets(line, sizeof(line)-1, infile)) {
	if (thispage) {
	    if ( (*line == '\r') || (*line == '\n') || (*line == '\f') )
		fputs(line, outfile);
	    else {
		p = text_grab_word(line);
		fputs(p, outfile);
		fputc(' ', outfile);
	    }
	}
	if (*line == '\f') {
	    page++;
	    thispage = (all || psfile.page_list.select[page]);
	}
    }
}

/* extract text from a range of pages */
void
gsview_text_extract_slow()
{
    FILE *f;
    FILE *infile;
    static char output[MAXSTR];

    if (!get_filename(output, TRUE, FILTER_TXT, 0, IDS_TOPICTEXT))
	return;

    if ((f = fopen(output, "wb")) == (FILE *)NULL)
	return;

    if (psfile.text_name[0] == '\0')
	return;

    /* Must read in binary mode, since TeX can use ^Z as a character code. */
    if ((infile = fopen(psfile.text_name, "rb")) == (FILE *)NULL) {
	message_box(TEXT("pstotext text extraction file is missing"), 0);
	return;
    }

    info_wait(IDS_WAITWRITE);

    if (psfile.dsc == (CDSC *)NULL) {
	text_extract_slow(f, infile, TRUE);	/* all pages */
    }
    else {
	if (psfile.dsc->page_count != 0) {
	    text_extract_slow(f, infile, FALSE);	/* selected pages */
	}
	else {
	    text_extract_slow(f, infile, TRUE);	/* all pages */
	}
    }

    fclose(f);
    fclose(infile);

    info_wait(IDS_NOWAIT);
    return;
}

/* compare string w with string s */
/* case insensitive */
/* w may contain wildcards * and ? in any position */
/* return TRUE if match */
BOOL
wildmatch(const char *w, const char *s)
{
const char *lastw = "";	/* location of last possible '*' */
const char *lasts = s;	/* location of character after the last matched by '*' */
BOOL nomatch;
    while (*s && *w) {
	nomatch = FALSE;
	if (*w == '*') {
	    lastw = w;
	    w++;		/* look at character after '*' */
	    while (*w && (*w == '*'))
		w++;		/* '**' is same as '*' */
	    if (*w == '\0')
		return TRUE;	/* matches all until end of word */
	    while (*s) {
		if (*w == '?') {
		    break;	/* '*?' is same as '?' */
		}
		else if (tolower_latin1(*w) == tolower_latin1(*s)) {
		    lasts = s;	/* remember location in case we need to extend wildcard match */
		    break;	/* break loop when match found */
		}
		s++;
	    }
	    if (*s == '\0')
		break;		/* end of s without matching next in w */
	}
	else if (*w != '?') {  /* '?' always matches */
	    if (tolower_latin1(*w) != tolower_latin1(*s))
		nomatch = TRUE;
	}
	if (nomatch && *lastw && *lastw=='*' && *lasts) {
	    /* try extending the length matched by the '*' wildcard */
	    w = lastw;
	    s = lasts + 1;
	}
	else if (nomatch) {
	    break;
	}
	else {
	    w++;
	    s++;
	}
    }

    /* skip over trailing '*' */
    while (*w && (*w == '*'))
	w++;		/* '**' is same as '*' */

    if ((*s == '\0') && (*w == '\0'))
	return TRUE;		/* at end of both strings, so matched */

    return FALSE;
}

#define MAX_FIND_WORD 10

/* returns TRUE if match found */
/* infile is text file from pstotext */
/*   is assumed to have been positioned by fseek */
/* str is string to match */
/* all is TRUE if all pages to be searched */
BOOL
text_find_slow(FILE *infile, char *str, BOOL all)
{
char line[MAXSTR];
char *lineword;
char sbuf[MAXSTR];
const char *strword;
BOOL thispage;
char *words[MAX_FIND_WORD+1];
int i;
    /* break up search string into words */
    strcpy(sbuf, str);
    words[0] = strtok(sbuf, " ");
    i = 1;
    while ( ((words[i] = strtok(NULL, " ")) != NULL) && (i<MAX_FIND_WORD) )
	i++;

    /* point at first word */
    i = 0;
    strword = words[i];

    thispage = (all || psfile.page_list.select[psfile.text_page]);
    while (fgets(line, sizeof(line)-1, infile)) {
	if (thispage && (*line != '\n') && (*line != '\f')) {
	    /* get start of word */
	    lineword = text_grab_word(line);
	    if (wildmatch(strword, lineword)) {
		/* matched first word */
		psfile.text_offset = ftell(infile); /* remember location */
		/* remember bounding box of word */
		sscanf(line, "%d %d %d %d",
		   &psfile.text_bbox.llx, &psfile.text_bbox.lly, 
		   &psfile.text_bbox.urx, &psfile.text_bbox.ury);
		/* now check remaining words */
    		strword = words[++i];
		while (strword) {
		    if (!fgets(line, sizeof(line)-1, infile))
			return FALSE;	/* match failed at EOF */
		    if ((*line != '\n') && (*line != '\f')) {
			lineword = text_grab_word(line);
			if (!wildmatch(strword, lineword)) {
			    /* partial match failed */
			    /* restart search from next word */
			    fseek(infile, psfile.text_offset, SEEK_SET);
			    i = 0;
			    strword = words[i];
			    break;
			}
			strword = words[++i];
		        /* extend bbox ?? */
		    }
		}
		/* all words matched */
		if (!strword)
		    return TRUE;
	    }
	}
	if (*line == '\f') {
	    psfile.text_page++;
	    thispage = (all || psfile.page_list.select[psfile.text_page]);
	}
    }
    return FALSE;	/* reached EOF without match */
}

void
gsview_text_findnext_slow()
{
FILE *infile;
char find_text[MAXSTR];
	if (not_dsc())
	    return;
	if (strlen(szFindText)==0) {
	    gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
	    return;
	}

	if (psfile.text_name[0] == '\0') {
	    gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
	    return;
	}

	if ((infile = fopen(psfile.text_name, "rb")) == (FILE *)NULL) {
	    message_box(TEXT("pstotext text extraction file is missing"), 0);
	    return;
	}
	
	/* add wildcard to begin and end */
	if ((szFindText[0] == '*') || (szFindText[0] == '?'))
	    strcpy(find_text, szFindText);
	else {
	    strcpy(find_text, "*");
	    strcat(find_text, szFindText);
	}
	if ( (find_text[strlen(find_text)-1] != '*') &&
	     (find_text[strlen(find_text)-1] != '?') )
	    strcat(find_text, "*");

	info_wait(IDS_WAITSEARCH);
	fseek(infile, psfile.text_offset, SEEK_SET);
	if (text_find_slow(infile, find_text, FALSE)) {
	    /* found it */
	    fclose(infile);
	    info_wait(IDS_NOWAIT);
	    /* signal that BBOX is valid and should be highlighted */
	    if ((psfile.pagenum == psfile.text_page+1) 
		&& (gsdll.state == GS_PAGE)) {
		/* on correct page */
		display.show_find = TRUE;
		scroll_to_find();
	        post_img_message(WM_GSSYNC, 0); /* redraw */
	    }
	    else {
		/* move to correct page */
		request_mutex();
		psfile.text_bbox.valid = TRUE;
		pending.pagenum = psfile.text_page+1;
		history_add(pending.pagenum);
		pending.now = TRUE;
		release_mutex();
		/* scroll_to_find occurs after page is displayed */
	    }
	    return;
	}

	fclose(infile);
        info_wait(IDS_NOWAIT);
	gserror(IDS_TEXTNOTFIND, NULL, MB_ICONEXCLAMATION, 0);
}


/* mouse text selection */


#define TEXT_INDEX_CHUNK 1000
TEXTINDEX *text_index;
unsigned int text_index_count;	/* number of words in index */
unsigned int text_index_size;	/* number of words storage allocated */
char *text_words;
unsigned int text_words_count;	/* location for next word */
unsigned int text_words_size;	/* maximum character count per page */

void
free_text_index(void)
{
    if (text_index)
	free(text_index);
    text_index = NULL;
    text_index_count = 0;
    text_index_size = 0;
    if (text_words)
        free(text_words);
    text_words = NULL;
    text_words_count = 0;
    text_words_size = 0;
    return;
}

BOOL 
make_text_index(void)
{
FILE *f;
int thispage;
int thisline;
char buf[2048];
TEXTINDEX *text;
int page = psfile.pagenum;

    free_text_index();

    if (psfile.text_name[0] == '\0')
	return FALSE;
    
    text_words_size = TEXT_INDEX_CHUNK * 8;
    text_words = (char *)malloc(text_words_size);
    if (text_words == (char *)NULL) {
	free_text_index();
	return FALSE;
    }
    text_words_count = 0;

    text_index_size = TEXT_INDEX_CHUNK;
    text_index = (TEXTINDEX *)malloc(text_index_size * sizeof(TEXTINDEX));
    if (text_index == (TEXTINDEX *)NULL) {
	free_text_index();
	return FALSE;
    }
    text_index_count = 0;

    /* find page */
    f = fopen(psfile.text_name, "rb");
    if (f == (FILE *)NULL) {
	free_text_index();
	return FALSE;
    }
    thispage = 1;
    while ((thispage != page) && fgets(buf, sizeof(buf), f))
	if (*buf == '\f') 
	    thispage++;
    if (thispage != page) {
	free_text_index();
	fclose(f);
	return FALSE;
    }
    thisline = 0;
    while ((thispage == page) && fgets(buf, sizeof(buf), f))  {
	if (*buf == '\f') {
	    thispage++;
	}
	else if (*buf == '\n') {
	    thisline++;
	}
	else {
	    if (text_index_count >= text_index_size) {
		text_index_size += TEXT_INDEX_CHUNK;
		text_index = (TEXTINDEX *)realloc(text_index, 
			text_index_size * sizeof(TEXTINDEX));
		if (text_index == (TEXTINDEX *)NULL) {
		    free_text_index();
		    fclose(f);
		    return FALSE;
		}
	    }
	    text = &text_index[text_index_count];
	    if (text_words_count + strlen(buf) + 1 > text_words_size) {
		text_words_size += TEXT_INDEX_CHUNK * 8;
		text_words = (char *)realloc(text_words, text_words_size);
		if (text_words == (char *)NULL) {
		    free_text_index();
		    fclose(f);
		    return FALSE;
		}
	    }
	    text->word = text_words_count;
	    sscanf(buf, "%d %d %d %d %s",
		       &text->bbox.llx, &text->bbox.lly,
		       &text->bbox.urx, &text->bbox.ury,
		       text_words + text->word);
	    text->line = thisline;
	    text_words_count += strlen(text_words + text_words_count) + 1;
	    text_index_count++;
	}
    }
    fclose(f);
    return TRUE;
}

/* return TRUE if x,y is in bbox */
BOOL
in_word(PSBBOX *bbox, int x, int y)
{
    return ( (bbox->llx <= x) && (x <= bbox->urx)
	  && (bbox->lly <= y) && (y <= bbox->ury) );
}

/* search for word containing x,y */
/* if found, return pointer */
/* if no match, return NULL */
int
word_find(int x, int y)
{
unsigned int i;
    for (i=0; i<text_index_count; i++)
	if (in_word(&text_index[i].bbox, x, y))
	    return i;
    return -1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -