抓取chinaren.com校友录留言的PHP小程序

类别:软件工程 点击:0 评论:0 推荐:

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<meta http-equiv="pragma" content="no-cache">
<title>提取留言</title>
<style>
.head { color: red; font-weight: bold; }
body { font-size: 9pt; background-color: #cccccc; }
</style>
</head>
<body>
<?php
set_time_limit(600);

function getMessage($url,$history=false)
{
 $match_msg  = "/<script>do.*\('[^\n]*/";
 $match_date  = "/\d{4}-\d{2}-\d{2}\040\d{2}:\d{2}:\d{2}/";
 
 if($history==false){
 $match_names   = "/&cid=.{2,6}&msg=/"; 
 $replace_names = "/&cid=|&msg=/";
 }else{
 $match_names   = "/class\=\"cr5\"[^\n]*/";
 $replace_names = "/class\=\"cr5\" target\=\"_blank\">|<\/a>/";
 }

 $replace_msg   = "/<script>|<\/script>|'\d*\'|doFlatTxt\('|doStr\('|&nbsp[^\n]*|\'\)|\\\\/";
 $handle = fopen ($url, "r");
 $contensts = "";
 $times="";
 $names="";
 $messages="";
 while ($line=fgets($handle,1024))
 {
  $contents .= $line;
 }
 //$contents = fread ($handle, 100000);
 //echo $contents;

 fclose ($handle);
 preg_match_all($match_date,$contents,$times);
 preg_match_all($match_names,$contents,$names);
 preg_match_all($match_msg,$contents,$messages);
 for($i=0;$i<count($messages[0]);$i++)
 {
  echo "<p><b>". preg_replace($replace_names,"",$names[0][$i]) ."</b>(";
  echo $times[0][$i]."):<br>\n";
  $message=preg_replace($replace_msg,"",$messages[0][$i])."\n\n";
  echo $message;
 }
}

$begin=time();
echo "<p class=\"head\">最新留言:</p>\n";
getMessage("http://alumni.chinaren.com/class/class_index.jsp?classuuid=2815032345960598103");

echo "<p class=\"head\">更多留言:</p>\n";
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=1");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=2");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=3");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=4");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=5");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=6");
getMessage("http://alumni.chinaren.com/class/class_leaveword.jsp?classuuid=2815032345960598103&p=7");

echo "<p class=\"head\">历史留言:</p>\n";
for($i=0;$i<100;$i++)
{
 getMessage("http://alumni.chinaren.com/class/class_leaveword2.jsp?p=".$i."&classuuid=2815032345960598103&msgtype=1&type=3",true);
}

echo "\n<br><center><b>执行本程序用的时间是<font color=red>";
echo time()-$begin;
echo "</font>秒钟</b></center>";
?>
</body>
</html>

本文地址:http://com.8s8s.com/it/it31700.htm