⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webautomation.mht

📁 Perl 编程技巧大全。适合初学者阅读。
💻 MHT
📖 第 1 页 / 共 5 页
字号:
color=3D#00ffff>]</FONT>,
  <FONT color=3D#00ffff>[</FONT> img, <FONT color=3D#00cd00>src</FONT>   =
 =3D&gt; <FONT color=3D#00cd00>"images/big.gif"</FONT>,
         <FONT color=3D#00cd00>lowsrc</FONT> =3D&gt; <FONT =
color=3D#00cd00>"images/big-lowres.gif"</FONT> <FONT =
color=3D#00ffff>]</FONT>
<FONT color=3D#00ffff>]</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>if</FONT> <FONT =
color=3D#00ffff>(</FONT>$elt_type <FONT color=3D#98fb98>eq</FONT> <FONT =
color=3D#00cd00>'a'</FONT> &amp;&amp; $attr_name <FONT =
color=3D#98fb98>eq</FONT> <FONT color=3D#00cd00>'href'</FONT><FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
    <FONT color=3D#ff7f50>print</FONT> <FONT color=3D#00cd00>"ANCHOR: =
$attr_value\n"</FONT><FONT color=3D#43cd80> </FONT>
        <FONT color=3D#ffa500>if</FONT> $attr_value-&gt;scheme =3D~ =
/http|ftp/<FONT color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#ffa500>if</FONT> <FONT color=3D#00ffff>(</FONT>$elt_type =
<FONT color=3D#98fb98>eq</FONT> <FONT color=3D#00cd00>'img'</FONT> =
&amp;&amp; $attr_name <FONT color=3D#98fb98>eq</FONT> <FONT =
color=3D#00cd00>'src'</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT>
    <FONT color=3D#ff7f50>print</FONT> <FONT color=3D#00cd00>"IMAGE:  =
$attr_value\n"</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
# <FONT size=3D-1><A =
href=3D"http://pleac.sourceforge.net/include/perl/ch20/xurl">download =
the following standalone program</A></FONT>
#!/usr/bin/perl -w
# xurl - extract unique, sorted list of links from URL
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::LinkExtor</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>LWP::Simple</FONT><FONT color=3D#00ffff>;</FONT>

$base_url =3D <FONT color=3D#ff7f50>shift</FONT><FONT =
color=3D#00ffff>;</FONT>
$parser =3D HTML::LinkExtor-&gt;new<FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>undef</FONT>, $base_url<FONT color=3D#00ffff>);</FONT>
$parser-&gt;parse<FONT color=3D#00ffff>(</FONT>get<FONT =
color=3D#00ffff>(</FONT>$base_url<FONT =
color=3D#00ffff>))</FONT>-&gt;<FONT color=3D#98fb98>eof</FONT><FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#cdad00>@links</FONT> =3D $parser-&gt;links<FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>foreach</FONT> <FONT =
color=3D#9ac0cd>$linkarray</FONT> <FONT color=3D#00ffff>(</FONT><FONT =
color=3D#cdad00>@links</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT>
    <FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#cdad00>@element</FONT>  =3D @$linkarray<FONT =
color=3D#00ffff>;</FONT>
    <FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#9ac0cd>$elt_type</FONT> =3D <FONT color=3D#ff7f50>shift</FONT> =
<FONT color=3D#cdad00>@element</FONT><FONT color=3D#00ffff>;</FONT>
    <FONT color=3D#ffa500>while</FONT> <FONT =
color=3D#00ffff>(</FONT><FONT color=3D#cdad00>@element</FONT><FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
        <FONT color=3D#ffa500>my</FONT> <FONT =
color=3D#00ffff>(</FONT><FONT color=3D#9ac0cd>$attr_name</FONT> , <FONT =
color=3D#9ac0cd>$attr_value</FONT><FONT color=3D#00ffff>)</FONT> =3D =
<FONT color=3D#ff7f50>splice</FONT><FONT color=3D#00ffff>(</FONT><FONT =
color=3D#cdad00>@element</FONT>, <FONT color=3D#cdcd00>0</FONT>, <FONT =
color=3D#cdcd00>2</FONT><FONT color=3D#00ffff>);</FONT>
        <FONT color=3D#cdcd00>$seen</FONT><FONT =
color=3D#00ffff>{</FONT>$attr_value<FONT color=3D#00ffff>}</FONT>++<FONT =
color=3D#00ffff>;</FONT>
    <FONT color=3D#00ffff>}</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#ffa500>for</FONT> <FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>sort</FONT> <FONT color=3D#ff7f50>keys</FONT> <FONT =
color=3D#cdcd00>%seen</FONT><FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT> <FONT color=3D#ff7f50>print</FONT> $_, <FONT =
color=3D#00cd00>"\n"</FONT> <FONT color=3D#00ffff>}</FONT>

<FONT color=3D#bebebe>#-----------------------------
#% xurl http://www.perl.com/CPAN
#ftp://ftp</FONT><FONT color=3D#cdad00>@ftp</FONT><FONT =
color=3D#bebebe>.perl.com/CPAN/CPAN.html
#
#http://language.perl.com/misc/CPAN.cgi
#
#http://language.perl.com/misc/cpan_module
#
#http://language.perl.com/misc/getcpan
#
#http://www.perl.com/index.html
#
#http://www.perl.com/gifs/lcb.xbm
#-----------------------------
</FONT>&lt;URL:http://www.perl.com&gt;
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#cdad00>@URLs</FONT> =3D <FONT =
color=3D#00ffff>(</FONT>$message =3D~ /&lt;URL:<FONT =
color=3D#00ffff>(</FONT>.*?<FONT color=3D#00ffff>)</FONT>&gt;/g<FONT =
color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1066>Converting ASCII to HTML</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
  <TBODY>
  <TR>
    <TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
# <FONT size=3D-1><A =
href=3D"http://pleac.sourceforge.net/include/perl/ch20/text2html">downloa=
d the following standalone program</A></FONT>
#!/usr/bin/perl -w -p00
# text2html - trivial html encoding of normal text
# -p means apply this script to each record.
# -00 mean that a record is now a paragraph
</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Entities</FONT><FONT color=3D#00ffff>;</FONT>
$_ =3D encode_entities<FONT color=3D#00ffff>(</FONT>$_, <FONT =
color=3D#00cd00>"\200-\377"</FONT><FONT color=3D#00ffff>);</FONT>

<FONT color=3D#ffa500>if</FONT> <FONT color=3D#00ffff>(</FONT>/^\s/<FONT =
color=3D#00ffff>)</FONT> <FONT color=3D#00ffff>{</FONT>
    <FONT color=3D#bebebe># Paragraphs beginning with whitespace are =
wrapped in &lt;PRE&gt;</FONT><FONT color=3D#43cd80> </FONT><FONT =
color=3D#bebebe>
</FONT>    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{(</FONT>.*<FONT color=3D#00ffff>)</FONT>$<FONT =
color=3D#00ffff>}</FONT>        <FONT =
color=3D#00ffff>{</FONT>&lt;PRE&gt;\n$<FONT =
color=3D#cdcd00>1</FONT>&lt;/PRE&gt;\n<FONT =
color=3D#00ffff>}</FONT><FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>;</FONT>           <FONT color=3D#bebebe># indented =
verbatim
</FONT><FONT color=3D#00ffff>}</FONT> <FONT color=3D#ffa500>else</FONT> =
<FONT color=3D#00ffff>{</FONT>
    <FONT color=3D#ff7f50>s</FONT><FONT color=3D#00ffff>{</FONT>^<FONT =
color=3D#00ffff>(</FONT>&gt;.*<FONT color=3D#00ffff>)}</FONT>       =
<FONT color=3D#00ffff>{</FONT>$<FONT =
color=3D#cdcd00>1</FONT>&lt;BR&gt;<FONT color=3D#00ffff>}</FONT>gm<FONT =
color=3D#00ffff>;</FONT>                    <FONT color=3D#bebebe># =
quoted text
</FONT>    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>&lt;URL:<FONT color=3D#00ffff>(</FONT>.*?<FONT =
color=3D#00ffff>)</FONT>&gt;<FONT color=3D#00ffff>}</FONT>    <FONT =
color=3D#00ffff>{</FONT>&lt;A HREF=3D<FONT =
color=3D#00cd00>"$1"</FONT>&gt;$<FONT =
color=3D#cdcd00>1</FONT>&lt;/A&gt;<FONT color=3D#00ffff>}</FONT>gs       =
  <FONT color=3D#bebebe># embedded URL  (good)
</FONT>                    ||
    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{(</FONT>http:\S+<FONT color=3D#00ffff>)}</FONT>   <FONT =
color=3D#00ffff>{</FONT>&lt;A HREF=3D<FONT =
color=3D#00cd00>"$1"</FONT>&gt;$<FONT =
color=3D#cdcd00>1</FONT>&lt;/A&gt;<FONT color=3D#00ffff>}</FONT>gs<FONT =
color=3D#00ffff>;</FONT>        <FONT color=3D#bebebe># guessed URL   =
(bad)
</FONT>    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>\*<FONT color=3D#00ffff>(</FONT>\S+<FONT =
color=3D#00ffff>)</FONT>\*<FONT color=3D#00ffff>}</FONT>    <FONT =
color=3D#00ffff>{</FONT>&lt;STRONG&gt;$<FONT =
color=3D#cdcd00>1</FONT>&lt;/STRONG&gt;<FONT =
color=3D#00ffff>}</FONT>g<FONT color=3D#00ffff>;</FONT>         <FONT =
color=3D#bebebe># this is *bold* here
</FONT>    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>\b_<FONT color=3D#00ffff>(</FONT>\S+<FONT =
color=3D#00ffff>)</FONT>\_\b<FONT color=3D#00ffff>}</FONT> <FONT =
color=3D#00ffff>{</FONT>&lt;EM&gt;$<FONT =
color=3D#cdcd00>1</FONT>&lt;/EM&gt;<FONT color=3D#00ffff>}</FONT>g<FONT =
color=3D#00ffff>;</FONT>                 <FONT color=3D#bebebe># this is =
_italics_ here
</FONT>    <FONT color=3D#ff7f50>s</FONT><FONT =
color=3D#00ffff>{</FONT>^<FONT color=3D#00ffff>}</FONT>            <FONT =
color=3D#00ffff>{</FONT>&lt;P&gt;\n<FONT color=3D#00ffff>};</FONT>       =
                 <FONT color=3D#bebebe># add paragraph tag
</FONT><FONT color=3D#00ffff>}</FONT>

<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>BEGIN</FONT> <FONT color=3D#00ffff>{</FONT>
    <FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"&lt;TABLE&gt;"</FONT><FONT color=3D#00ffff>;</FONT>
    $_ =3D encode_entities<FONT color=3D#00ffff>(</FONT><FONT =
color=3D#ff7f50>scalar</FONT> &lt;&gt;<FONT color=3D#00ffff>);</FONT>
    <FONT color=3D#ff7f50>s</FONT>/\n\s+/ /g<FONT =
color=3D#00ffff>;</FONT>  <FONT color=3D#bebebe># continuation lines
</FONT>    <FONT color=3D#ffa500>while</FONT> <FONT =
color=3D#00ffff>(</FONT> /^<FONT color=3D#00ffff>(</FONT>\S+?:<FONT =
color=3D#00ffff>)</FONT>\s*<FONT color=3D#00ffff>(</FONT>.*<FONT =
color=3D#00ffff>)</FONT>$/gm <FONT color=3D#00ffff>)</FONT> <FONT =
color=3D#00ffff>{</FONT>                <FONT color=3D#bebebe># parse =
heading
</FONT>        <FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"&lt;TR&gt;&lt;TH =
ALIGN=3D'LEFT'&gt;$1&lt;/TH&gt;&lt;TD&gt;$2&lt;/TD&gt;&lt;/TR&gt;\n"</FON=
T><FONT color=3D#00ffff>;</FONT>
    <FONT color=3D#00ffff>}</FONT>
    <FONT color=3D#ff7f50>print</FONT> <FONT =
color=3D#00cd00>"&lt;/TABLE&gt;&lt;HR&gt;"</FONT><FONT =
color=3D#00ffff>;</FONT>
<FONT color=3D#00ffff>}</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1069>Converting HTML to ASCII</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
  <TBODY>
  <TR>
    <TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
</FONT>$ascii =3D <FONT color=3D#00cd00>`lynx -dump =
$filename`</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Parse</FONT><FONT color=3D#00ffff>;</FONT>

$html =3D parse_htmlfile<FONT color=3D#00ffff>(</FONT>$filename<FONT =
color=3D#00ffff>);</FONT>
$formatter =3D HTML::FormatText-&gt;new<FONT =
color=3D#00ffff>(</FONT><FONT color=3D#00cd00>leftmargin</FONT> =3D&gt; =
<FONT color=3D#cdcd00>0</FONT>, <FONT color=3D#00cd00>rightmargin</FONT> =
=3D&gt; <FONT color=3D#cdcd00>50</FONT><FONT color=3D#00ffff>);</FONT>
$ascii =3D $formatter-&gt;<FONT color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>$html<FONT color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::TreeBuilder</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>

$html =3D HTML::TreeBuilder-&gt;new<FONT color=3D#00ffff>();</FONT>
$html-&gt;parse<FONT color=3D#00ffff>(</FONT>$document<FONT =
color=3D#00ffff>);</FONT>

$formatter =3D HTML::FormatText-&gt;new<FONT =
color=3D#00ffff>(</FONT><FONT color=3D#00cd00>leftmargin</FONT> =3D&gt; =
<FONT color=3D#cdcd00>0</FONT>, <FONT color=3D#00cd00>rightmargin</FONT> =
=3D&gt; <FONT color=3D#cdcd00>50</FONT><FONT color=3D#00ffff>);</FONT>

$ascii =3D $formatter-&gt;<FONT color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>$html<FONT color=3D#00ffff>);</FONT>
<FONT color=3D#bebebe>#-----------------------------
</FONT></PRE></FONT></TD></TR></TBODY></TABLE></DIV>
<DIV class=3DSECT2>
<H2 class=3DSECT2><A name=3DAEN1072>Extracting or Removing HTML =
Tags</A></H2>
<TABLE width=3D"100%" bgColor=3D#2f4f4f border=3D0>
  <TBODY>
  <TR>
    <TD><PRE class=3DSCREEN><FONT color=3D#f5deb3 size=3D+1><FONT =
color=3D#bebebe>#-----------------------------
</FONT><FONT color=3D#00ffff>(</FONT>$plain_text =3D $html_text<FONT =
color=3D#00ffff>)</FONT> =3D~ <FONT color=3D#ff7f50>s</FONT>/&lt;<FONT =
color=3D#00ffff>[</FONT>^&gt;<FONT =
color=3D#00ffff>]</FONT>*&gt;//gs<FONT color=3D#00ffff>;</FONT>     =
<FONT color=3D#bebebe>#WRONG
#-----------------------------
</FONT><FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::Parse</FONT><FONT color=3D#00ffff>;</FONT>
<FONT color=3D#ffa500>use</FONT> <FONT =
color=3D#b2dfee>HTML::FormatText</FONT><FONT color=3D#00ffff>;</FONT>
$plain_text =3D HTML::FormatText-&gt;new-&gt;<FONT =
color=3D#ff7f50>format</FONT><FONT =
color=3D#00ffff>(</FONT>parse_html<FONT =
color=3D#00ffff>(</FONT>$html_text<FONT color=3D#00ffff>));</FONT>
<FONT color=3D#bebebe>#-----------------------------
#% perl -pe 's/&lt;[^&gt;]*&gt;//g' file
#-----------------------------
#&lt;IMG SRC =3D "foo.gif"
#     ALT =3D "Flurp!"&gt;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -