⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mapping.xml

📁 网络爬虫源码
💻 XML
字号:
<?xml version="1.0"?>

<!-- DO NOT EDIT THIS FILE -->

<mapping>

 <!-- WebRobot class -->
 <class name="net.matuschek.spider.WebRobot">
   <map-to xml="Robot"/>

   <!-- AgentName -->
   <field name="agentName" type="string">
     <bind-xml name="AgentName" node="element"/>
   </field>

   <!-- Start Referrer -->
   <field name="startReferer" type="string">
     <bind-xml name="StartReferer" node="element"/>
   </field>

   <!-- ignore Robots.txt  -->
   <field name="ignoreRobotsTxt" type="boolean">
     <bind-xml name="IgnoreRobotsTxt" node="element"/>
   </field>

   <!-- time to sleep after every request  -->
   <field name="sleepTime" type="integer">
     <bind-xml name="SleepTime" node="element"/>
   </field>

   <!-- maximal search depth  -->
   <field name="maxDepth" type="integer">
     <bind-xml name="MaxDepth" node="element"/>
   </field>

   <!-- walk to other hosts ?  -->
   <field name="walkToOtherHosts" type="boolean">
     <bind-xml name="WalkToOtherHosts" node="element"/>
   </field>

   <!-- walk to whole start host ?  -->
   <field name="allowWholeHost" type="boolean">
     <bind-xml name="AllowWholeHost" node="element"/>
   </field>

   <!-- walk to all hosts in the domain of the start host ?  -->
   <field name="allowWholeDomain" type="boolean">
     <bind-xml name="AllowWholeDomain" node="element"/>
   </field>

   <!-- allow host with and without starting "www." -->
   <field name="flexibleHostCheck" type="boolean">
     <bind-xml name="FlexibleHostCheck" node="element"/>
   </field>

   <!--  Proxy URL in the format host:port  -->
   <field name="proxy" type="string">
     <bind-xml name="Proxy" node="element"/>
   </field>

   <!--  Maximal usable bandwidth -->
   <field name="bandwidth" type="integer">
     <bind-xml name="Bandwidth" node="element"/>
   </field>
  
   <!--  Maximum document age -->
   <field name="maxDocumentAge" type="long">
     <bind-xml name="MaxDocumentAge" node="element"/>
   </field>
  
   <!-- Enable cookies -->
   <field name="enableCookies" type="boolean">
     <bind-xml name="EnableCookies" node="element"/>
   </field>


   <!-- Allow Caching -->
   <field name="allowCaching" type="boolean">
     <bind-xml name="AllowCaching" node="element"/>
   </field>


   <!-- URLs that can be visited more then once  -->
   <field name="visitMany" 
          type="string"
	  collection="vector">
     <bind-xml name="VisitMany" mode="element"/>
   </field>

   <!-- URLs that are allowed even if WalkToOtherHosts is false  -->
   <field name="allowedURLs" 
          type="string"
	  collection="vector">
     <bind-xml name="AllowedURL" mode="element"/>
   </field>

   <!-- form handlers -->
   <field name="formHandlers" 
          type="net.matuschek.html.FormHandler"
	  collection="vector">
     <bind-xml name="FormHandler"/>
   </field>

 </class>


 <!-- FormHandler -->
 <class name="net.matuschek.html.FormHandler">
   <map-to xml="FormHandler"/>

   <!-- Form URL -->
   <field name="url" type="string">
     <bind-xml name="url" node="attribute"/>
   </field>

   <!-- Defaults -->
   <field name="defaults" 
          type="net.matuschek.html.FormField"
	  collection="vector">
     <bind-xml name="FormField"/>
   </field>
  
 </class>


 <!-- FormField -->
 <class name="net.matuschek.html.FormField">
   <map-to xml="FormField"/>

   <!-- field name -->
   <field name="fieldname" type="string">
     <bind-xml name="name" node="attribute"/>
   </field>
   
   <!-- field value -->
   <field name="value" type="string">
     <bind-xml name="value" node="attribute"/>
   </field>   
 </class>



 <!-- RegExpURLCheck -->
 <class name="net.matuschek.spider.RegExpURLCheck">
   <map-to xml="RegExpURLCheck"/>

   <!-- Default -->
   <field name="defaultResult" type="boolean">
     <bind-xml name="default" node="attribute"/>
   </field>

   <!-- Rules -->
   <field name="rules" 
          type="net.matuschek.spider.RegExpRule"
	  collection="vector">
     <bind-xml name="RegExpRule"/>
   </field>
 </class>
 

 <!-- RegExpRule -->
 <class name="net.matuschek.spider.RegExpRule">
   <map-to xml="RegExpRule"/>

   <!-- Allow/Deny -->
   <field name="allow" type="boolean">
     <bind-xml name="allow" node="attribute"/>
   </field>

   <!-- Pattern -->
   <field name="pattern" type="string">
     <bind-xml name="pattern" node="attribute"/>
   </field>

 </class>
 
 <!-- DownloadRuleSet -->
 <class name="net.matuschek.http.DownloadRuleSet">
   <map-to xml="DownloadRuleSet"/>

   <!-- Default -->
   <field name="default" type="boolean">
     <bind-xml name="default" node="attribute"/>
   </field>

   <!-- Rules -->
   <field name="downloadRules" 
          type="net.matuschek.http.DownloadRule"
	  collection="vector">
     <bind-xml name="DownloadRule"/>
   </field>
 </class>
 
 <!-- DownloadRule -->
 <class name="net.matuschek.http.DownloadRule">
   <map-to xml="DownloadRule"/>

   <!-- Allow/Deny -->
   <field name="allow" type="boolean">
     <bind-xml name="allow" node="attribute"/>
   </field>

   <!-- Min size -->
   <field name="minSize" type="integer">
     <bind-xml name="minSize" node="attribute"/>
   </field>

   <!-- Max size -->
   <field name="maxSize" type="integer">
     <bind-xml name="maxSize" node="attribute"/>
   </field>

   <!-- MIME type -->
   <field name="mimeType" type="string">
     <bind-xml name="mimeType" node="attribute"/>
   </field>
 </class>
 

 <!-- JoBoBase -->
 <class name="net.matuschek.jobo.JoBoBase">
  <map-to xml="JoBo"/>

  <field name="robot" type="net.matuschek.spider.WebRobot">
    <bind-xml name="Robot"/>
  </field>

  <field name="URLCheck" type="net.matuschek.spider.RegExpURLCheck">
    <bind-xml name="URLCheck"/>
  </field>

  <field name="DownloadRuleSet" 
	 type="net.matuschek.http.DownloadRuleSet">
    <bind-xml name="DownloadRuleSet"/>
  </field>

  <field name="localizeLinks" type="boolean">
    <bind-xml name="LocalizeLinks" node="element"/>
  </field>

  <field name="storeCGI" type="boolean">
    <bind-xml name="StoreCGI" node="element"/>
  </field>

 </class>


</mapping>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -