📄 webautomation.mht
字号:
color=3D#00ffff>]</FONT>,
<FONT color=3D#00ffff>[</FONT> img, <FONT color=3D#00cd00>src</FONT> =
=3D> <FONT color=3D#00cd00>"images/big.gif"</FONT>,
<FONT color=3D#00cd00>lowsrc</FONT> =3D> <FONT =
color=3D#00cd00>"images/big-lowres.gif"</FONT> <FONT =
color=3D#00ffff>]</FONT>
<FONT color=3D#00ffff>]</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>if</FONT> <FONT =
color=3D#00ffff>(</FONT>$elt_type <FONT color=3D#98fb98>eq</FONT> <FONT =
color=3D#00cd00>'a'</FONT> && $attr_name <FONT =
color=3D#98fb98>eq</FONT> <FONT color=3D#00cd00>'href'</FONT><FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
<FONT color=3D#ff7f50>print</FONT> <FONT color=3D#00cd00>"ANCHOR: =
$attr_value\n"</FONT><FONT color=3D#43cd80> </FONT>
<FONT color=3D#ffa500>if</FONT> $attr_value->scheme =3D~ =
/http|ftp/<FONT color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#ffa500>if</FONT> <FONT color=3D#00ffff>(</FONT>$elt_type =
<FONT color=3D#98fb98>eq</FONT> <FONT color=3D#00cd00>'img'</FONT> =
&& $attr_name <FONT color=3D#98fb98>eq</FONT> <FONT =
color=3D#00cd00>'src'</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT>
<FONT color=3D#ff7f50>print</FONT> <FONT color=3D#00cd00>"IMAGE: =
$attr_value\n"</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
# <FONT size=3D-1><A =
href=3D"http://pleac.sourceforge.net/include/perl/ch20/xurl">download =
the following standalone program</A></FONT>
#!/usr/bin/perl -w
# xurl - extract unique, sorted list of links from URL
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::LinkExtor</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>LWP::Simple</FONT><FONT color=3D#00ffff>;</FONT>
$base_url =3D <FONT color=3D#ff7f50>shift</FONT><FONT =
color=3D#00ffff>;</FONT>
$parser =3D HTML::LinkExtor->new<FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>undef</FONT>, $base_url<FONT color=3D#00ffff>);</FONT>
$parser->parse<FONT color=3D#00ffff>(</FONT>get<FONT =
color=3D#00ffff>(</FONT>$base_url<FONT =
color=3D#00ffff>))</FONT>-><FONT color=3D#98fb98>eof</FONT><FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#cdad00>@links</FONT> =3D $parser->links<FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>foreach</FONT> <FONT =
color=3D#9ac0cd>$linkarray</FONT> <FONT color=3D#00ffff>(</FONT><FONT =
color=3D#cdad00>@links</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT>
<FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#cdad00>@element</FONT> =3D @$linkarray<FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#9ac0cd>$elt_type</FONT> =3D <FONT color=3D#ff7f50>shift</FONT> =
<FONT color=3D#cdad00>@element</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>while</FONT> <FONT =
color=3D#00ffff>(</FONT><FONT color=3D#cdad00>@element</FONT><FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
<FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#00ffff>(</FONT><FONT color=3D#9ac0cd>$attr_name</FONT> , <FONT =
color=3D#9ac0cd>$attr_value</FONT><FONT color=3D#00ffff>)</FONT> =3D =
<FONT color=3D#ff7f50>splice</FONT><FONT color=3D#00ffff>(</FONT><FONT =
color=3D#cdad00>@element</FONT>, <FONT color=3D#cdcd00>0</FONT>, <FONT =
color=3D#cdcd00>2</FONT><FONT color=3D#00ffff>);</FONT>
<FONT color=3D#cdcd00>$seen</FONT><FONT =
color=3D#00ffff>{</FONT>$attr_value<FONT color=3D#00ffff>}</FONT>++<FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#ffa500>for</FONT> <FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>sort</FONT> <FONT color=3D#ff7f50>keys</FONT> <FONT =
color=3D#cdcd00>%seen</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT> <FONT color=3D#ff7f50>print</FONT> $_, <FONT =
color=3D#00cd00>"\n"</FONT> <FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
#% xurl http://www.perl.com/CPAN
#ftp://ftp</FONT><FONT color=3D#cdad00>@ftp</FONT><FONT =
color=3D#bebebe>.perl.com/CPAN/CPAN.html
#
#http://language.perl.com/misc/CPAN.cgi
#
#http://language.perl.com/misc/cpan_module
#
#http://language.perl.com/misc/getcpan
#
#http://www.perl.com/index.html
#
#http://www.perl.com/gifs/lcb.xbm
#-----------------------------
</FONT><URL:http://www.perl.com>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#cdad00>@URLs</FONT> =3D <FONT =
color=3D#00ffff>(</FONT>$message =3D~ /<URL:<FONT =
color=3D#00ffff>(</FONT>.*?<FONT color=3D#00ffff>)</FONT>>/g<FONT =
color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1066>Converting ASCII to HTML</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
<TBODY>
<TR>
<TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
# <FONT size=3D-1><A =
href=3D"http://pleac.sourceforge.net/include/perl/ch20/text2html">downloa=
d the following standalone program</A></FONT>
#!/usr/bin/perl -w -p00
# text2html - trivial html encoding of normal text
# -p means apply this script to each record.
# -00 mean that a record is now a paragraph
</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Entities</FONT><FONT color=3D#00ffff>;</FONT>
$_ =3D encode_entities<FONT color=3D#00ffff>(</FONT>$_, <FONT =
color=3D#00cd00>"\200-\377"</FONT><FONT color=3D#00ffff>);</FONT>
<FONT color=3D#ffa500>if</FONT> <FONT color=3D#00ffff>(</FONT>/^\s/<FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
<FONT color=3D#bebebe># Paragraphs beginning with whitespace are =
wrapped in <PRE></FONT><FONT color=3D#43cd80> </FONT><FONT =
color=3D#bebebe>
</FONT> <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{(</FONT>.*<FONT color=3D#00ffff>)</FONT>$<FONT =
color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT><PRE>\n$<FONT =
color=3D#cdcd00>1</FONT></PRE>\n<FONT =
color=3D#00ffff>}</FONT><FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>;</FONT> <FONT color=3D#bebebe># indented =
verbatim
</FONT><FONT color=3D#00ffff>}</FONT> <FONT color=3D#ffa500>else</FONT> =
<FONT color=3D#00ffff>{</FONT>
<FONT color=3D#ff7f50>s</FONT><FONT color=3D#00ffff>{</FONT>^<FONT =
color=3D#00ffff>(</FONT>>.*<FONT color=3D#00ffff>)}</FONT> =
<FONT color=3D#00ffff>{</FONT>$<FONT =
color=3D#cdcd00>1</FONT><BR><FONT color=3D#00ffff>}</FONT>gm<FONT =
color=3D#00ffff>;</FONT> <FONT color=3D#bebebe># =
quoted text
</FONT> <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT><URL:<FONT color=3D#00ffff>(</FONT>.*?<FONT =
color=3D#00ffff>)</FONT>><FONT color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT><A HREF=3D<FONT =
color=3D#00cd00>"$1"</FONT>>$<FONT =
color=3D#cdcd00>1</FONT></A><FONT color=3D#00ffff>}</FONT>gs =
<FONT color=3D#bebebe># embedded URL (good)
</FONT> ||
<FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{(</FONT>http:\S+<FONT color=3D#00ffff>)}</FONT> <FONT =
color=3D#00ffff>{</FONT><A HREF=3D<FONT =
color=3D#00cd00>"$1"</FONT>>$<FONT =
color=3D#cdcd00>1</FONT></A><FONT color=3D#00ffff>}</FONT>gs<FONT =
color=3D#00ffff>;</FONT> <FONT color=3D#bebebe># guessed URL =
(bad)
</FONT> <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>\*<FONT color=3D#00ffff>(</FONT>\S+<FONT =
color=3D#00ffff>)</FONT>\*<FONT color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT><STRONG>$<FONT =
color=3D#cdcd00>1</FONT></STRONG><FONT =
color=3D#00ffff>}</FONT>g<FONT color=3D#00ffff>;</FONT> <FONT =
color=3D#bebebe># this is *bold* here
</FONT> <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>\b_<FONT color=3D#00ffff>(</FONT>\S+<FONT =
color=3D#00ffff>)</FONT>\_\b<FONT color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT><EM>$<FONT =
color=3D#cdcd00>1</FONT></EM><FONT color=3D#00ffff>}</FONT>g<FONT =
color=3D#00ffff>;</FONT> <FONT color=3D#bebebe># this is =
_italics_ here
</FONT> <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>^<FONT color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT><P>\n<FONT color=3D#00ffff>};</FONT> =
<FONT color=3D#bebebe># add paragraph tag
</FONT><FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>BEGIN</FONT> <FONT color=3D#00ffff>{</FONT>
<FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"<TABLE>"</FONT><FONT color=3D#00ffff>;</FONT>
$_ =3D encode_entities<FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>scalar</FONT> <><FONT color=3D#00ffff>);</FONT>
<FONT color=3D#ff7f50>s</FONT>/\n\s+/ /g<FONT =
color=3D#00ffff>;</FONT> <FONT color=3D#bebebe># continuation lines
</FONT> <FONT color=3D#ffa500>while</FONT> <FONT =
color=3D#00ffff>(</FONT> /^<FONT color=3D#00ffff>(</FONT>\S+?:<FONT =
color=3D#00ffff>)</FONT>\s*<FONT color=3D#00ffff>(</FONT>.*<FONT =
color=3D#00ffff>)</FONT>$/gm <FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT> <FONT color=3D#bebebe># parse =
heading
</FONT> <FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"<TR><TH =
ALIGN=3D'LEFT'>$1</TH><TD>$2</TD></TR>\n"</FON=
T><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"</TABLE><HR>"</FONT><FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1069>Converting HTML to ASCII</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
<TBODY>
<TR>
<TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
</FONT>$ascii =3D <FONT color=3D#00cd00>`lynx -dump =
$filename`</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Parse</FONT><FONT color=3D#00ffff>;</FONT>
$html =3D parse_htmlfile<FONT color=3D#00ffff>(</FONT>$filename<FONT =
color=3D#00ffff>);</FONT>
$formatter =3D HTML::FormatText->new<FONT =
color=3D#00ffff>(</FONT><FONT color=3D#00cd00>leftmargin</FONT> =3D> =
<FONT color=3D#cdcd00>0</FONT>, <FONT color=3D#00cd00>rightmargin</FONT> =
=3D> <FONT color=3D#cdcd00>50</FONT><FONT color=3D#00ffff>);</FONT>
$ascii =3D $formatter-><FONT color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>$html<FONT color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::TreeBuilder</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>
$html =3D HTML::TreeBuilder->new<FONT color=3D#00ffff>();</FONT>
$html->parse<FONT color=3D#00ffff>(</FONT>$document<FONT =
color=3D#00ffff>);</FONT>
$formatter =3D HTML::FormatText->new<FONT =
color=3D#00ffff>(</FONT><FONT color=3D#00cd00>leftmargin</FONT> =3D> =
<FONT color=3D#cdcd00>0</FONT>, <FONT color=3D#00cd00>rightmargin</FONT> =
=3D> <FONT color=3D#cdcd00>50</FONT><FONT color=3D#00ffff>);</FONT>
$ascii =3D $formatter-><FONT color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>$html<FONT color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1072>Extracting or Removing HTML =
Tags</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
<TBODY>
<TR>
<TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#00ffff>(</FONT>$plain_text =3D $html_text<FONT =
color=3D#00ffff>)</FONT> =3D~ <FONT color=3D#ff7f50>s</FONT>/<<FONT =
color=3D#00ffff>[</FONT>^><FONT =
color=3D#00ffff>]</FONT>*>//gs<FONT color=3D#00ffff>;</FONT> =
<FONT color=3D#bebebe>#WRONG
#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Parse</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>
$plain_text =3D HTML::FormatText->new-><FONT =
color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>parse_html<FONT =
color=3D#00ffff>(</FONT>$html_text<FONT color=3D#00ffff>));</FONT>
<FONT color=3D#bebebe>#-----------------------------
#% perl -pe 's/<[^>]*>//g' file
#-----------------------------
#<IMG SRC =3D "foo.gif"
# ALT =3D "Flurp!">
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -