⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdf2index

📁 jboss规则引擎
💻
字号:
#!/usr/bin/perl -- # -*- Perl -*-# this needs some cleanup...my $PSTOTEXT = "pstotext";my $pdf = shift @ARGV;my $index = "";my $inindex = 0;open (F, "$PSTOTEXT $pdf |");while (<F>) {    if (/^<\/index/) {	$index .= $_;	$inindex = 0;    }    $inindex = 1 if /^<index/;    if ($inindex) {	$index .= $_ if /^\s*</;    }}my $cindex = "";while ($index =~ /^(.*?)((<phrase role=\"pageno\">.*?<\/phrase>\s*)+)/s) {    $cindex .= $1;    $_ = $2;    $index = $'; # '    my @pages = m/<phrase role=\"pageno\">.*?<\/phrase>\s*/sg;    # Expand ranges    if ($#pages >= 0) {	my @mpages = ();	foreach my $page (@pages) {	    my $pageno = &pageno($page);	    if ($pageno =~ /^([0-9]+)[^0-9]([0-9]+)$/) { # funky -		for (my $count = $1; $count <= $2; $count++) {		    push (@mpages, "<phrase role=\"$pageno\">$count</phrase>");		}	    } else {		push (@mpages, $page);	    }	}	@pages = sort rangesort @mpages;    }    # Remove duplicates...    if ($#pages > 0) {	my @mpages = ();	my $current = "";	foreach my $page (@pages) {	    my $pageno = &pageno($page);	    if ($pageno ne $current) {		push (@mpages, $page);		$current = $pageno;	    }	}	@pages = @mpages;    }    # Collapse ranges...    if ($#pages > 1) {	my @cpages = ();	while (@pages) {	    my $count = 0;	    my $len = &rangelen($count, @pages);	    if ($len <= 2) {		my $page = shift @pages;		push (@cpages, $page);	    } else {		my $fpage = shift @pages;		my $lpage = "";		while ($len > 1) {		    $lpage = shift @pages;		    $len--;		}		my $fpno = &pageno($fpage);		my $lpno = &pageno($lpage);		$fpage =~ s/>$fpno</>${fpno}-$lpno</s;		push (@cpages, $fpage);	    }	}	@pages = @cpages;    }    my $page = shift @pages;    $page =~ s/\s*$//s;    $cindex .= $page;    while (@pages) {	$page = shift @pages;	$page =~ s/\s*$//s;	$cindex .= ", $page";    }}$cindex .= $index;print "$cindex\n";sub pageno {    my $page = shift;    $page =~ s/^<phrase.*?>//;    $page =~ s/^<link.*?>//;    return $1 if $page =~ /^([^<>]+)/;    return "?";}sub rangesort {    my $apno = &pageno($a);    my $bpno = &pageno($b);    # Make sure roman pages come before arabic ones, otherwise sort them in order    return -1 if ($apno !~ /^\d+/ && $bpno =~ /^\d+/);    return  1 if ($apno =~ /^\d+/ && $bpno !~ /^\d+/);    return $apno <=> $bpno;}sub rangelen {    my $count = shift;    my @pages = @_;    my $len = 1;    my $inrange = 1;    my $current = &pageno($pages[$count]);    while ($count < $#pages && $inrange) {	$count++;	my $next = &pageno($pages[$count]);	if ($current + 1 eq $next) {	    $current = $next;	    $inrange = 1;	    $len++;	} else {	    $inrange = 0;	}    }    return $len;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -