⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lw2.pm

📁 Ubuntu packages of security software。 相当不错的源码
💻 PM
📖 第 1 页 / 共 5 页
字号:
=item B<cookie_write>Params: $jar, \%request, $overrideReturn: nothingGoes through the given $jar and sets the Cookie header in %req pending the correct domain and path.  If $override is true, then the secure, domain and path restrictions of the cookies are ignored and all cookies are essentiallyincluded.Notice: cookie expiration is currently not implemented.  URL restrictioncomparision is also case-insensitive.=cutsub cookie_write {    my ( $jarref, $hin, $override ) = @_;    my ( $name, $out ) = ( '', '' );    return if ( !( defined $jarref && ref($jarref) ) );    return if ( !( defined $hin    && ref($hin) ) );    $override = $override || 0;    $$hin{'whisker'}->{'ssl'} = $$hin{'whisker'}->{'ssl'} || 0;    foreach $name ( keys %$jarref ) {        next if ( $name eq '' );        if($override){            $out .= "$name=$$jarref{$name}->[0];";            next;        }        next if ( $$hin{'whisker'}->{'ssl'} == 0 && $$jarref{$name}->[4] > 0 );        if ( $$hin{'whisker'}->{'host'} =~ /$$jarref{$name}->[1]$/i                && $$hin{'whisker'}->{'uri'} =~ /^$$jarref{$name}->[2])/ )        {            $out .= "$name=$$jarref{$name}->[0];";        }    }    if ( $out ne '' ) { $$hin{'Cookie'} = $out; }}########################################################################=item B<cookie_get>Params: $jar, $nameReturn: @elementsFetch the named cookie from the $jar, and return the components.  Thereturned items will be an array in the following order:value, domain, path, expire, securevalue  = cookie value, should always be non-empty stringdomain = domain root for cookie, can be undefinedpath   = URL path for cookie, should always be a non-empty stringexpire = undefined (depreciated, but exists for backwards-compatibility)secure = whether or not the cookie is limited to HTTPs; value is 0 or 1=cutsub cookie_get {    my ( $jarref, $name ) = @_;    return undef if ( !( defined $jarref && ref($jarref) ) );    if ( defined $$jarref{$name} ) {        return @{ $$jarref{$name} };    }    return undef;}########################################################################=item B<cookie_get_names>Params: $jarReturn: @namesFetch all the cookie names from the jar, which then let you cooke_get()them individually.=cutsub cookie_get_names {    my ( $jarref, $name ) = @_;    return undef if ( !( defined $jarref && ref($jarref) ) );    return keys %$jarref;}########################################################################=item B<cookie_get_valid_names>Params: $jar, $domain, $url, $sslReturn: @namesFetch all the cookie names from the jar which are valid for the given$domain, $url, and $ssl values.  $domain should be string scalar of thetarget host domain ('www.example.com', etc.).  $url should be the absolute URL for the page ('/index.html', '/cgi-bin/foo.cgi', etc.).  $ssl should be 0 for non-secure cookies, or 1 for all (secure and normal) cookies.  The return value is an array of names compatible with cookie_get().=cutsub cookie_get_valid_names {    my ( $jarref, $domain, $url, $ssl ) = @_;    return () if ( !( defined $jarref && ref($jarref) ) );		return () if ( !defined $domain || $domain eq '' );		return () if ( !defined $url || $url eq '' );		$ssl ||= 0;		my (@r, $name);    foreach $name ( keys %$jarref ) {        next if ( $name eq '' );        next if ( $$jarref{$name}->[4] > 0 && $ssl == 0 );        if ( $domain =~ /$$jarref{$name}->[1]$/i                && $url =~ /^$$jarref{$name}->[2])/i ) {            push @r, $name;        }    }        return @r;}########################################################################=item B<cookie_set>Params: $jar, $name, $value, $domain, $path, $expire, $secureReturn: nothingSet the named cookie with the provided values into the %jar.  $name is required to be a non-empty string.  $value is required, and will deletethe named cookie from the $jar if it is an empty string.  $domain and$path can be strings or undefined.  $expire is ignored (but existsfor backwards-compatibility).  $secure should be the numeric value of0 or 1.=cutsub cookie_set {    my ( $jarref, $name, $value, $domain, $path, $expire, $secure ) = @_;    my @construct;    return if ( !( defined $jarref && ref($jarref) ) );    return if ( $name eq '' );    if ( !defined $value || $value eq '' ) {        delete $$jarref{$name};        return;    }    $path   = $path   || '/';    $secure = $secure || 0;    @construct = ( $value, $domain, $path, undef, $secure );    $$jarref{$name} = \@construct;}############################################################################################################################## cluster global variables%_crawl_config = (    'save_cookies'         => 0,    'reuse_cookies'        => 1,    'save_offsites'        => 0,    'save_non_http'        => 0,    'follow_moves'         => 1,    'url_limit'            => 1000,    'use_params'           => 0,    'params_double_record' => 0,    'skip_ext'             => {        gif => 1,        jpg => 1,        png => 1,        gz  => 1,        swf => 1,        pdf => 1,        zip => 1,        wav => 1,        mp3 => 1,        asf => 1,        tgz => 1    },    'save_skipped'    => 0,    'save_referrers'  => 0,    'use_referrers'   => 1,    'do_head'         => 0,    'callback'        => 0,    'netloc_bug'      => 1,    'normalize_uri'   => 1,    'source_callback' => 0);%_crawl_linktags = (    'a'          => 'href',    'applet'     => [qw(codebase archive code)],    'area'       => 'href',    'base'       => 'href',    'bgsound'    => 'src',    'blockquote' => 'cite',    'body'       => 'background',    'del'        => 'cite',    'embed'      => [qw(src pluginspage)],    'form'       => 'action',    'frame'      => [qw(src longdesc)],    'iframe'     => [qw(src longdesc)],    'ilayer'     => 'background',    'img'        => [qw(src lowsrc longdesc usemap)],    'input'      => [qw(src usemap)],    'ins'        => 'cite',    'isindex'    => 'action',    'head'       => 'profile',    'layer'      => [qw(background src)],    'link'       => 'href',    #	 'meta'    => 'http-equiv',    'object' => [qw(codebase data archive usemap)],    'q'      => 'cite',    'script' => 'src',    'table'  => 'background',    'td'     => 'background',    'th'     => 'background',    'xmp'    => 'href',);#####################################################=item B<crawl_new>Params: $START, $MAX_DEPTH, \%request_hash [, \%tracking_hash ]Return: $crawl_objectThe crawl_new() functions initializes a crawl object (hash) to the defaultvalues, and then returns it for later use by crawl().  $START is the startingURL (in the form of 'http://www.host.com/url'), and MAX_DEPTH is the maximumnumber of levels to crawl (the START URL counts as 1, so a value of 2 willcrawl the START URL and all URLs found on that page).  The request_hashis a standard initialized request hash to be used for requests; you shouldset any authentication information or headers in this hash in order forthe crawler to use them.  The optional tracking_hash lets you supply ahash for use in tracking URL results (otherwise crawl_new() will allocatea new anon hash).=cutsub crawl_new {    my ( $start, $depth, $reqref, $trackref ) = @_;    my %X;    return undef if ( !defined $start  || !defined $depth );    return undef if ( !defined $reqref || !ref($reqref) );    $trackref = {} if ( !defined $trackref || !ref($trackref) );    $X{track}   = $trackref;    $X{request} = $reqref;    $X{depth}   = $depth || 2;    $X{start}   = $start;    $X{magic}   = 7340;    $X{reset} = sub {        $X{errors}      = [];    # all errors encountered        $X{urls}        = [];    # temp; used to hold all URLs on page        $X{server_tags} = {};    # all server tags found        $X{referrers}   = {};    # who refers to what URLs        $X{offsites}    = {};    # all URLs that point offsite        $X{response}    = {};    # temp; the response hash        $X{non_http}    = {};    # all non_http URLs found        $X{cookies}     = {};    # all cookies found        $X{forms}       = {};    # all forms found        $X{jar}         = {};    # temp; cookie jar        $X{url_queue}   = [];    # temp; URLs to still fetch        $X{config} = {};        %{ $X{config} } = %_crawl_config;        %{ $X{track} } = ();        $X{parsed_page_count} = 0;    };    $X{crawl} = sub { crawl( \%X, @_ ) };    $X{reset}->();    return \%X;}#####################################################=item B<crawl>Params: $crawl_object [, $START, $MAX_DEPTH ]Return: $count [ undef on error ] The heart of the crawl package.  Will perform an HTTP crawl on thespecified HOST, starting at START URI, proceeding up to MAX_DEPTH. Crawl_object needs to be the variable returned by crawl_new().  You canalso indirectly call crawl() via the crawl_object itself:	$crawl_object->{crawl}->($START,$MAX_DEPTH)Returns the number of URLs actually crawled (not including those skipped).=cut{    # START OF CRAWL CONTAINER    sub crawl {        my ( $C, $START, $MAX_DEPTH ) = @_;        return undef if ( !defined $C || !ref($C) || $C->{magic} != 7340 );        # shortcuts, to reduce dereferences and typing        my $CONFIG = $C->{config};        my $TRACK  = $C->{track};        my $URLS   = $C->{urls};        my $RESP   = $C->{response};        my $REQ    = $C->{request};        my $Q      = $C->{url_queue};        $START ||= $C->{start};        $C->{depth} = $MAX_DEPTH || $C->{depth};        my ( $COUNT, $T, @ST ) = ( 0, '' );        # ST[] = [ 0.HOST, 1.PORT, 2.URL, 3.DEPTH, 4.CWD, 5.REF ]        my @v = uri_split($START);        my $error = undef;        $error = 'Start protocol not http or https'          if ( $v[1] ne 'http' && $v[1] ne 'https' );        $error = 'Bad start host' if ( !defined $v[2] || $v[2] eq '' );        push( @{ $C->{errors} }, $error ) && return undef if ( defined $error );        @ST = ( $v[2], $v[3], $v[0], 1, '', '' );        $REQ->{whisker}->{ssl}  = 1 if ( $v[1] eq 'https' );        $REQ->{whisker}->{host} = $ST[0];        $REQ->{whisker}->{port} = $ST[1];        $REQ->{whisker}->{lowercase_incoming_headers} = 1;        $REQ->{whisker}->{ignore_duplicate_headers}   = 0;        delete $REQ->{whisker}->{parameters};        http_fixup_request($REQ);        push @$Q, \@ST;        while (@$Q) {            @ST = @{ shift @$Q };            next if ( defined $TRACK->{ $ST[2] } && $TRACK->{ $ST[2] } ne '?' );            if ( $ST[3] > $C->{depth} ) {                $TRACK->{ $ST[2] } = '?' if ( $CONFIG->{save_skipped} > 0 );                next;            }            $ST[4] = uri_get_dir( $ST[2] );            $REQ->{whisker}->{uri} = $ST[2];            if ( $ST[5] ne '' && $CONFIG->{use_referrers} > 0 ) {                $REQ->{Referrer} = $ST[5];            }            my $result = _crawl_do_request( $REQ, $RESP, $C );            if ( $result == 1 || $result == 2 ) {                push @{ $C->{errors} }, "$ST[2]: $RESP->{whisker}->{error}";                next;            }            $COUNT++;            $TRACK->{ $ST[2] } = $RESP->{whisker}->{code}              if ( $result == 0 || $result == 4 );            $TRACK->{ $ST[2] } = '?'              if ( ( $result == 3 || $result == 5 )                && $CONFIG->{save_skipped} > 0 );            if ( defined $RESP->{server} && !ref( $RESP->{server} ) ) {                $C->{server_tags}->{ $RESP->{server} }++;            }            if ( defined $RESP->{'set-cookie'} ) {                if ( $CONFIG->{save_cookies} > 0 ) {                    if ( ref( $RESP->{'set-cookie'} ) ) {                        $C->{cookies}->{$_}++                          foreach ( @{ $RESP->{'set-cookie'} } );                    }                    else {                        $C->{cookies}->{ $RESP->{'set-cookie'} }++;                    }                }                cookie_read( $C->{jar}, $RESP )                  if ( $CONFIG->{reuse_cookies} > 0 );            }            next if ( $result == 4 || $result == 5 );            next if ( scalar @$Q > $CONFIG->{url_limit} );            if ( $result == 0 ) {    # page should be parsed                if ( $CONFIG->{source_callback} != 0                    && ref( $CONFIG->{source_callback} ) eq 'CODE' )                {                    &{ $CONFIG->{source_callback} }($C);                }                html_find_tags( \$RESP->{whisker}->{data},                    \&_crawl_extract_links_test, 0, $C, \%_crawl_linktags );                $C->{parsed_page_count}++;            }            push @$URLS, $RESP->{location} if ( $result == 3 );            foreach $T (@$URLS) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -