⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 13424391616.shtml.html

📁 一个java+Mysql写的主题爬虫,可以通过给定初始的url爬取到相关的网页
💻 HTML
📖 第 1 页 / 共 5 页
字号:
<div class="HSpace_10"></div>
<div class="HSpace_2"></div>
<script language="JavaScript">DisplayAd();</script>
<!-- footer begin -->
<div class="blkContentFooter">
	<p><a href="http://corp.sina.com.cn/chn/" target="_blank">新浪简介</a>┊<a href="http://corp.sina.com.cn/eng/" target="_blank">About Sina</a>┊<a href="http://ads.sina.com.cn/" target="_blank">广告服务</a>┊<a href="http://www.sina.com.cn/contactus.html" target="_blank">联系我们</a>┊<a href="http://corp.sina.com.cn/chn/sina_job.html" target="_blank">招聘信息</a>┊<a href="http://www.sina.com.cn/intro/lawfirm.shtml" target="_blank">网站律师</a>┊<a href="http://english.sina.com/" target="_blank">SINA English</a>┊<a href="http://members.sina.com.cn/apply/" target="_blank">会员注册</a>┊<a href="http://tech.sina.com.cn/focus/sinahelp.shtml" target="_blank">产品答疑</a>┊Copyright &copy; 1996-2009 SINA Corporation, All Rights Reserved</p>
	<p>新浪公司 <a href="http://www.sina.com.cn/intro/copyright.shtml" target="_blank">版权所有</a></p>
</div>
<!-- footer end -->
<!-- Start  Wrating  --><script language="javascript">var wrUrl="//sina.wrating.com/";var wrDomain="sina.com.cn";var wratingDefaultAcc="860010-0323010000";var wratingAccArray={"torch.2008.sina.com.cn":"860010-0308070000","video.sina.com.cn":"860010-0309010000","cctv.sina.com.cn":"860010-0309020000","chat.sina.com.cn":"860010-0311010000","ent.sina.com.cn":"860010-0312010000","tech.sina.com.cn":"860010-0313010000","mobile.sina.com.cn":"860010-0313020000","house.sina.com.cn":"860010-0315010000","bj.house.sina.com.cn":"860010-0315020000","auto.sina.com.cn":"860010-0316010000","eladies.sina.com.cn":"860010-0317010000","bj.sina.com.cn":"860010-0317020000","woman.sina.com.cn":"860010-0317010000","women.sina.com.cn":"860010-0317010000","lady.sina.com.cn":"860010-0317010000","man.eladies.sina.com.cn":"860010-0317030000","games.sina.com.cn":"860010-0318010000","game.sina.com.cn":"860010-0318010000","edu.sina.com.cn":"860010-0307010000","baby.sina.com.cn":"860010-0320010000","kid.sina.com.cn":"860010-0320020000","astro.sina.com.cn":"860010-0321020000","news.sina.com.cn":"860010-0310010000","weather.news.sina.com.cn":"860010-0310020000","mil.news.sina.com.cn":"860010-0310030000","www.sina.com.cn":"860010-0322010000","home.sina.com.cn":"860010-0322010000","sports.sina.com.cn":"860010-0308010000","shidefc.sina.com.cn":"860010-0308020000","weiqi.sina.com.cn":"860010-0308030000","f1.sina.com.cn":"860010-0308040000","golf.sina.com.cn":"860010-0308050000","2002.sina.com.cn":"860010-0308060000","2004.sina.com.cn":"860010-0308060000","2006.sina.com.cn":"860010-0308060000","2008.sina.com.cn":"860010-0308070000","yayun2002.sina.com.cn":"860010-0308060000","yayun2006.sina.com.cn":"860010-0308060000","inter.sina.com.cn":"860010-0308080000","chelsea.sina.com.cn":"860010-0308090000","book.sina.com.cn":"860010-0319010000","cul.book.sina.com.cn":"860010-0319020000","comic.book.sina.com.cn":"860010-0319030000","finance.sina.com.cn":"860010-0314010000","money.sina.com.cn":"860010-0314020000","yue.sina.com.cn":"860010-0324010000","www.sina.com":"860010-0322010000"};function vjTrack(){var U=1800;var T=false;var S=false;var R="";var Q="0";var P="";var N;var L;var K;var J;var I;var H="expires=Fri, 1 Jan 2038 00:00:00 GMT;";var G=0;if(document.location.protocol=="file:"){return }T=navigator.cookieEnabled?"1":"0";S=navigator.javaEnabled()?"1":"0";var F="0";var E;var C=-1;var D=document.cookie;if(T=="1"){C=D.indexOf("vjuids=");if(C<0){E=vjVisitorID();document.cookie="vjuids="+escape(E)+";"+H+";domain="+wrDomain+";path=/;";if(document.cookie.indexOf("vjuids=")<0){T="0"}else{Q="1"}}else{E=vjGetCookie("vjuids")}}L=document.referrer;if(!L||L==""){L=""}R=vjFlash();if(self.screen){N=screen.width+"x"+screen.height+"x"+screen.colorDepth}else{if(self.java){var M=java.awt.Toolkit.getDefaultToolkit();var O=M.getScreenSize();N=O.width+"x"+O.height+"x0"}}if(navigator.language){K=navigator.language.toLowerCase()}else{if(navigator.browserLanguage){K=navigator.browserLanguage.toLowerCase()}else{K="-"}}I="";var B;var X;X=new Date();J=X.getTimezoneOffset()/-60;J=X.getTimezoneOffset()/-60;B="&s="+N+"&l="+K+"&z="+J+"&j="+S+"&f="+R;if(T=="1"){C=document.cookie.indexOf("vjlast=");if(C<0){G=0}else{G=parseInt(vjGetCookie("vjlast"))}}if((X.getTime()/1000)-G>U){F="1";document.cookie="vjlast="+Math.round(X.getTime()/1000)+";"+H+";domain="+wrDomain+";path=/;"}if(L!=""){B=B+"&r="+escape(L)}if(F!="0"){B=B+"&n="+G}if(Q!="0"){B=B+"&u="+Q}var V;var A=vjGetAcc();var W=vjGetDomain();V=wrUrl+"a.gif?a="+X.getTime().toString(16)+"&t="+escape(I)+"&i="+escape(E)+"&b="+escape(document.location)+"&c="+A+B+"&ck="+W;document.write('<img src="'+V+'" width="1" height="1" />')}function vjGetAcc(){var B=document.location.toString().toLowerCase();var C=(B.split("/"))[2];var A=wratingAccArray[C];if(typeof (A)=="undefined"){A=wratingDefaultAcc}return A}function vjFlash(){var _wr_f="-",_wr_n=navigator;if(_wr_n.plugins&&_wr_n.plugins.length){for(var ii=0;ii<_wr_n.plugins.length;ii++){if(_wr_n.plugins[ii].name.indexOf("Shockwave Flash")!=-1){_wr_f=_wr_n.plugins[ii].description.split("Shockwave Flash ")[1];break}}}else{if(window.ActiveXObject){for(var ii=10;ii>=2;ii--){try{var fl=eval("new ActiveXObject('ShockwaveFlash.ShockwaveFlash."+ii+"');");if(fl){_wr_f=ii+".0";break}}catch(e){}}}}return _wr_f}function vjHash(B){if(!B||B==""){return 0}var D=0;for(var C=B.length-1;C>=0;C--){var A=parseInt(B.charCodeAt(C));D=(D<<5)+D+A}return D}function vjVisitorID(){var B=vjHash(document.location+document.cookie+document.referrer).toString(16);var A;A=new Date();return B+"."+A.getTime().toString(16)+"."+Math.random().toString(16)}function vjGetCookieVal(B){var A=document.cookie.indexOf(";",B);if(A==-1){A=document.cookie.length}return unescape(document.cookie.substring(B,A))}function vjGetCookie(C){var B=C+"=";var F=B.length;var A=document.cookie.length;var E=0;while(E<A){var D=E+F;if(document.cookie.substring(E,D)==B){return vjGetCookieVal(D)}E=document.cookie.indexOf(" ",E)+1;if(E==0){break}}return null}function vjGetDomain(){var A=0;try{if(window.self.parent!=self){var D=/sina.com/i;var C=document.location.toString().toLowerCase();var B=parent.location.toString().toLowerCase();if(D.test(C)&&D.test(B)){A=1}}}catch(e){A=1}return A}vjTrack();</script><!-- End Wrating--><!-- START Nielsen//NetRatings SiteCensus V5.2 --><!-- COPYRIGHT 2006 Nielsen//NetRatings --><script type="text/javascript">	var _rsCI="cn-sina2006";	 	var _rsCG="0";		var _rsDN="//secure-cn.imrworldwide.com/";		var _rsCC=0;		var _rsSE=1;		var _rsSM=0.01;	 	var _rsSS=1500;	 </script><script type="text/javascript" src="//secure-cn.imrworldwide.com/v52.js"></script><noscript>	<img src="//secure-cn.imrworldwide.com/cgi-bin/m?ci=cn-sina2006&amp;cg=0" alt=""/></noscript><!-- END Nielsen//NetRatings SiteCensus V5.2 --></body>
<!-- 顶部导航 统一登录 begin -->
<script type="text/javascript">
if(getCookie("SE")==""){
	print_stand_unipro_head();
	GetObj("content_mainNav").className = "hd_nav";
}else{
	print_stand_unipro_welcome();
	GetObj("content_mainNav").className = "hd_nav hd_nav_log";
}
GetObj("content_mainNav_userQuitBtn").onclick = function(){
	return getUniproUrl_welcome();
}
</script>
<!-- 顶部导航 统一登录 end -->
<script type="text/javascript" language="javascript" src="http://i3.sinaimg.cn/cha/news/yq.js"></script>
<!-- 071127 ws begin -->
<script language="javascript">var iask_keywords_bid="artibody";var iask_keywords_lid="hotwords_link";var iask_keywords_fid="hotwords";var iask_keywords_min=24;var iask_keywords_len=72;</script>
<script type="text/javascript" src="http://keyword.sina.com.cn/js/iaskkeywords.js"></script>
<!-- 071127 ws end -->
<!--页面最底部包含读取留言数接口文件-->
<script id="COUNTER_FORIE" type="text/javascript"></script>
<script type="text/javascript" src="http://news.sina.com.cn/comment/cmnt_counter.js"></script>
<!--页面最底部包含计数器接口文件-->
<!----------------------------------------------------------------------------->
      <!--首先必须指定要读取的留言频道、新闻和是否专题模式,需设置-->
      <script type="text/javascript">
		var cmnt_channel	= "ty";		// 频道名,例如 gn、gj、kj 等
		var cmnt_newsid		= "6-12-4391616"; 	// 新闻ID,例如 1-1-xxx、download-yyy 等  28-3-1502969
		var cmnt_group		= 0; 			// 是否专题模式,1为专题
      </script>
      <!--设置结束-->
      <!----------------------------------------------------------------------------->
      <!--然后包含评论V4数据接口文件,该部分不需要改动保持原样即可-->
      <script id="COMMENT_DATAISLAND_FORIE" type="text/javascript" src=""></script>
      <!--包含接口结束-->
<!----------------------------------------------------------------------------->
          <!--现在已经读取到了JS格式的数据,以下为格式化并输出评论数据的代码-->
			<script type="text/javascript">
			function cmnt_print()
			{
			// 显示留言列表范例,即用JS遍历输出CmsgList数组
			if ( CmsgList != null ) {
				// 需控制显示条数时修改循环上限即可,CmsgList.length为实际数组元素数量,最多可能是40条
				if( CmsgList.length > 10 )
				{
					var ttt = 10;
				}
				else{
					var ttt=CmsgList.length;
				}
				var comment_html="";
				var c_content_end='';
				var num;
				var len;
					for ( var i=0; i<ttt; i++ ) { 
						// 发帖用户
							if ( CmsgList[i].m_user == " " ) CmsgList[i].m_user="新浪网友";
						//content
							num=CmsgList[i].m_content.indexOf("&lt;br&gt;");
							if( num==-1){
								c_content_end=CmsgList[i].m_content
							}else{
								c_content_end=CmsgList[i].m_content.substring(0,num)
							}
							if(c_content_end.length>94){
								c_content_end=CmsgList[i].m_content.substring(0,94)+"…"
							}
						/*	 CmsgList[i].m_content=substr(CmsgList[i].m_content,40)
							if ( CmsgList[i].m_content.length > 100 )
								CmsgList[i].m_content	=CmsgList[i].m_content + "……";
								c_content_end=CmsgList[i].m_content
						*/		
							comment_html=comment_html+"<div class='t_info'>" + CmsgList[i].m_user + " " + CmsgList[i].m_datetime + "</div><div class='t_txt'>" + c_content_end +"</div>";		
					}
				document.getElementById("comment").innerHTML = comment_html;
				document.getElementById("comment_t_show1").innerHTML = '<a href="http://comment4.news.sina.com.cn/comment/comment4.html?channel=ty&newsid=6-12-4391616">欢迎发表评论</a>';
				if(Count.c_count!="" && Count.c_count!=0){
					document.getElementById("comment_t_show1").innerHTML = '<a href="http://comment4.news.sina.com.cn/comment/comment4.html?channel=ty&newsid=6-12-4391616">已有<font class="f_c00">'+Count.c_count+'</font>位网友发表评论</a>';
					document.getElementById("comment_t_show2").innerHTML = '<div class="bPCB_t1" style="_float:right;_padding-right:9px;"><a href="http://comment4.news.sina.com.cn/comment/comment4.html?channel=ty&newsid=6-12-4391616">已有<font class="f_c00">'+Count.c_count+'</font>位网友发表评论</a></div>';
				}
			}
			}
			//comment_show("comment");
						////////////////////////////////////////////////////////////////////////////////
			</script>
			<script type="text/javascript" src="http://news.sina.com.cn/comment/cmnt_xml.js"></script> 
<script type="text/javascript" src="http://news.sina.com.cn/comment/cmnt_embed.js"></script>
<!-- google begin -->
<!-- Google 广告 begin --><script language="javascript" type="text/javascript"><!--//--><![CDATA[//><!--function google_ad_request_done(google_ads) {	/*	 * 此函数为必需函数,用于展示	 * JavaScript 请求返回的	 * 广告。您需要修改 document.write	 * 命令,使其写出的 HTML 符合	 * 所需广告布局。	 */	var s = '';	var i;	/*	 * 验证存在可以展示的广告。	 */	if (google_ads.length == 0) {	  return;	}/*	 * 如果返回图片或 Flash 广告,则展示该广告。	 * 否则,创建一个包含所有广告的字符串,	 * 然后使用 document.write() 命令写出该字符串。	 */if (google_ads[0].type == "image") {	  s += '<a href="' + google_ads[0].url +			  '" target="_top" title="go to ' + google_ads[0].visible_url +			  '"><img border="0" src="' + google_ads[0].image_url +			  '"width="' + google_ads[0].image_width +			  '"height="' + google_ads[0].image_height + '"></a>';	} else if (google_ads[0].type == "flash") {	  s += '<object classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"' +			  ' codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,0,0"' +			  ' width="' + google_ad.image_width +			  '" height="' + google_ad.image_height + '">' +			  '<param name="movie" value="' + google_ad.image_url + '">' +			  '<param name="quality" value="high">' +			  '<param name="AllowScriptAccess" value="never">' +			  '<embed src="' + google_ad.image_url +			  '" width="' + google_ad.image_width +			  '" height="' + google_ad.image_height + 			  '" type="application/x-shockwave-flash"' + 			  ' allowScriptaccess="never" ' + 			  ' pluginspage="http://www.macromedia.com/go/getflashplayer"></embed></object>';	}else if (google_ads[0].type="text") {		s += '<div class="MTitle_01"><h2 class="title"><a href="https://adwords.google.com/select/Login?hl=zh_CN" style="text-decoration:none;font-size:14px;color:000000;font-weight:bold">Google提供的广告</a></h2></div>';	   if (google_ads.leng

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -