⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charconv.tcl

📁 harvest是一个下载html网页得机器人
💻 TCL
字号:
#!/bin/sh# the next line restats using tclsh \exec tclsh "$0" "$@"## $Id: charconv.tcl,v 1.3 2003/06/02 22:17:20 adam Exp $proc usage {} {    puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }    exit 1}proc ins_trie {from to} {    global trie    if {![info exists trie(no)]} {        set trie(no) 1        set trie(size) 0    }    incr trie(size)    ins_trie_r [split $from] $to 0}proc split_trie {this} {    global trie    set trie($this,type) d    foreach e $trie($this,content) {        set from [lindex $e 0]        set to [lindex $e 1]                set ch [lindex $from 0]        set rest [lrange $from 1 end]                if {[llength $rest]} {            if {![info exist trie($this,ptr,$ch)]} {                set trie($this,ptr,$ch) $trie(no)                incr trie(no)            }            ins_trie_r $rest $to $trie($this,ptr,$ch)        } else {            set trie($this,to,$ch) $to        }    }    set trie($this,content) missing}proc ins_trie_r {from to this} {    global trie    if {![info exist trie($this,type)]} {        set trie($this,type) f    }    if {$trie($this,type) == "f"} {        lappend trie($this,content) [list $from $to]                # split ?        if {[llength $trie($this,content)] > $trie(split)} {            split_trie $this            return [ins_trie_r $from $to $this]        }    } else {        set ch [lindex $from 0]        set rest [lrange $from 1 end]        if {[llength $rest]} {            if {![info exist trie($this,ptr,$ch)]} {                set trie($this,ptr,$ch) $trie(no)                incr trie(no)            }            ins_trie_r $rest $to $trie($this,ptr,$ch)        } else {            set trie($this,to,$ch) $to        }    }}proc dump_trie {ofile} {    global trie    set f [open $ofile w]    puts $f "/* TRIE: size $trie(size) */"    puts $f "\#include <string.h>"    puts $f {        struct yaz_iconv_trie_flat {            char *from;            int to;        };        struct yaz_iconv_trie_dir {            struct yaz_iconv_trie *ptr;            int to;        };                struct yaz_iconv_trie {            struct yaz_iconv_trie_flat *flat;            struct yaz_iconv_trie_dir *dir;        };    }    set this $trie(no)    while { [incr this -1] >= 0 } {        puts $f "/* PAGE $this */"        if {$trie($this,type) == "f"} {            puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"            foreach m $trie($this,content) {                puts -nonewline $f "  \{\""                foreach d [lindex $m 0] {                    puts -nonewline $f "\\x$d"                }                puts -nonewline $f "\", 0x[lindex $m 1]"                puts $f "\},"            }            puts $f "  \{0, 0\}"            puts $f "\};"            puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"            puts $f "  $trie(prefix)page${this}_flat, 0"            puts $f "\};"        } else {            puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"            for {set i 0} {$i < 256} {incr i} {                puts -nonewline $f "  \{"                set ch [format %02X $i]                set null 1                if {[info exist trie($this,ptr,$ch)]} {                    puts -nonewline $f "&$trie(prefix)page$trie($this,ptr,$ch), "                    set null 0                } else {                    puts -nonewline $f "0, "                }                if {[info exist trie($this,to,$ch)]} {                    puts -nonewline $f "0x$trie($this,to,$ch)\}"                    set null 0                } else {                    puts -nonewline $f "0\}"                }                if {!$null} {                    puts -nonewline $f " /* $ch */"                }                if {$i < 255} {                    puts $f ","                } else {                    puts $f ""                }            }            puts $f "\};"            puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"            puts $f "  0, $trie(prefix)page${this}_dir"            puts $f "\};"        }    }    puts $f {        static unsigned long lookup(struct yaz_iconv_trie *t, unsigned char *inp,                                    size_t inbytesleft, size_t *no_read)        {            if (!t || inbytesleft < 1)            return 0;            if (t->dir)            {                size_t ch = inp[0] & 0xff;                unsigned long code =                lookup(t->dir[ch].ptr, inp+1, inbytesleft-1, no_read);                if (code)                {                    (*no_read)++;                    return code;                }                if (t->dir[ch].to)                {                    code = t->dir[ch].to;                    *no_read = 1;                    return code;                }            }            else            {                struct yaz_iconv_trie_flat *flat = t->flat;                while (flat->from)                {                    size_t len = strlen(flat->from);                    if (len <= inbytesleft)                    {                        if (memcmp(flat->from, inp, len) == 0)                        {                            *no_read = len;                            return flat->to;                        }                    }                    flat++;                }            }            return 0;        }            }    puts $f "unsigned long yaz_$trie(prefix)_conv            (unsigned char *inp, size_t inbytesleft, size_t *no_read)        {            unsigned long code;                        code = lookup(&$trie(prefix)page0, inp, inbytesleft, no_read);            if (!code)            {                *no_read = 1;                code = *inp;            }            return code;        }    "    close $f}proc readfile {fname} {    set lineno 0    set f [open $fname r]    while {1} {        incr lineno        set cnt [gets $f line]        if {$cnt < 0} {            break        }        set hex {}        set uni {}        regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Z]*)</unientity>} $line s hex uni        # puts "$lineno hex=$hex uni=$uni $line"        if {[string length $uni]} {            ins_trie $hex $uni        }    }    close $f}set verbose 0set ifile {}set ofile out.cset trie(split) 40set trie(prefix) {}# Parse command lineset l [llength $argv]set i 0while {$i < $l} {    set arg [lindex $argv $i]    switch -glob -- $arg {        -v {            incr verbose        }        -s {            if {[string length $arg]} {                set arg [lindex $argv [incr i]]            }            set trie(split) $arg        }        -p {            if {[string length $arg]} {                set arg [lindex $argv [incr i]]            }            set trie(prefix) $arg        }	-o {            if {[string length $arg]} {                set arg [lindex $argv [incr i]]            }            set ofile $arg	}	        default {	    lappend ifiles $arg        }    }    incr i}if {![info exists ifiles]} {    puts "charconv.tcl: missing input file(s)"    usage}foreach ifile $ifiles {    readfile $ifile}dump_trie $ofile

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -