Java：使用java.net的方法得到网站页面内的文章并生成文件的方法

热点排行

使用java.net的方法得到网站页面内的文章并生成文件的方法

类别：Java 点击：0 评论：0 推荐：

国庆闲暇时间，写了一个捕获csdn文章的工具。采用了一些简单的算法，希望csdn 不要见怪。
本来想实现图片自动上传，但是没有空，连文章的doc说明也没有仔细写。：）
开发工具：Eclipse3.0
工作平台：WindowXp

/************************************************
* csdn文章采集工具
* csdn文章采集工具
* CreateData: 2004-10-3 19:59:54
* Description:
* Copyright: Copyright (c) 2004
* Company: 秋水工作室
* @author 王凯
* @version 1.0
***********************************************/
import java.net.*;
import java.sql.*;
import java.io.*;

public class OpenUrl
{
/**
*得到一个网页地址的方法
**/
public String getContent(String strUrl)
// 一个public方法，返回字符串，错误则返回"error open url"
{
try{

 URL url=new URL(strUrl);
 BufferedReader br=new BufferedReader(new InputStreamReader(url.openStream()));
 String s="";
 StringBuffer sb=new StringBuffer("");
 while((s=br.readLine())!=null)
 {
 sb.append(s+"\r\n");
 }
 br.close();
 return sb.toString();
}
catch(Exception e){
 return "error open url" + strUrl;

}
}

/**
*得到文章并生成页面
*/
public static String GetNews(String Path,String addname,String names){
 String body = "";
OpenUrl ou=new OpenUrl();
String htmlbody = ou.getContent(Path);
String title=GetSkip(htmlbody,"",
"");
String aboutkey = GetSkip(htmlbody,"","");
String content = GetSkip(htmlbody,"","");

System.out.println("title="+title);
// System.out.println("aboutkey="+aboutkey);
// System.out.println("content="+content);
content.replaceAll("'","''");
// content=skipp(content);
// OpenUrl.addnew(title,aboutkey,Path,content);
body = "<html><body><title>"+title+"</title></body></html>"
 + "<body><csdntitle>标题:"+title+"</csdntitle> "
 + "<csdnaboutkey>"+aboutkey+"</csdnaboutkey>"
 + "<csdnbody>"+content+"</csdnbody>" +"<body></html>";
OpenUrl.scwj("c:\\csnd\\"+addname,title+".htm",body);
return body;
}

/**
*过滤原代码，已经取消
**/
public static String skipp(String body){
 System.out.println("skipi="+body.indexOf("=0){
 int i = body.indexOf("");
 if(k>=0){
 hou = hou.substring(k+1);
 }
 body = qian+hou;
 }
while(body.indexOf("=0){
 int i = body.indexOf("");
 if(k>=0){
 hou = hou.substring(k+1);
 }
 body = qian+hou;
}
body.replaceAll("","");
body.replaceAll("","");
 return body;
}

/**
* 得到从spath到epath的内容
**/
public static String GetSkip(String body,String spath,String ePath){
 int i = body.indexOf(spath);
String skbody="";
 if(i>=0){
 skbody = body.substring(i+spath.length(),body.length());
 int k = skbody.indexOf(ePath);
 if(k>=0){
 skbody = skbody.substring(0,k);
 }else{
 skbody="";
 }
 }else{
 skbody="";
 }
 return skbody;
}

//具体使用方法
public static void test2(){
OpenUrl ou=new OpenUrl();
String htmlbody = ou.getContent("http://dev.csdn.net/articlelist.aspx?c=6");
while(htmlbody.indexOf("article/")>=0){
 int longs = htmlbody.length();
 htmlbody = htmlbody.substring(htmlbody.indexOf("article/")+8,longs);
 String names = htmlbody.substring(0,htmlbody.indexOf("\" target="));
 String path = "http://dev.csdn.net/article/"+names;
 System.out.println(path);
 int i = names.indexOf("/");
 String addname = "";
 if (i>=0){
 addname = names.substring(0,1);
 names =names.substring(i+1);
 }
 String url = OpenUrl.GetNews(path,addname,names);

}

}

/**
*assess参考方法。
**/
public static boolean addnew(String title,String aboutkey,String pathurl,String body){
boolean addok = false;
String odbcQuery;
Connection odbcconn;
Statement odbcstmt;
ResultSet odbcrs;

  try{
  Class.forName("sun.jdbc.odbc.JdbcOdbcDriver");
}catch (ClassNotFoundException e)
{ System.out.print ("驱动程序不存在");
}
try{
odbcconn = DriverManager.getConnection("jdbc:odbc:csdn");
odbcstmt = odbcconn.createStatement();
odbcQuery="insert into develop (title,aboutkey,pathurl,body)values('" +
   title+"','" +
   aboutkey+"','" +
   pathurl+"','" +
   body+"')";
// System.out.println(odbcQuery);
addok=odbcstmt.execute(odbcQuery);
odbcstmt.close();
odbcconn.close();
}catch (SQLException e)
{ System.out.print (e);
}

  return addok;
}

/***
生成页面文章
***/
public static boolean scwj(String path,String FileName,String body){
try {
   File f = new File(path);
   f.mkdirs();
   path=path+"\\"+FileName;
   f = new File(path);
   PrintWriter out;
   out = new PrintWriter(new FileWriter(f));
   out.print(body + "\n");
   out.close();
} catch (IOException e) {
   e.printStackTrace();
} catch (Exception e){
   e.printStackTrace();
}
return false;
}

public static void main(String args[])
{
  OpenUrl.test2();
// OpenUrl.GetNews("http://dev.csdn.net/article/40/40149.shtm");
}

}
完毕

本文地址：http://com.8s8s.com/it/it14920.htm

Java：使用java.net的方法得到网站页面内的文章 并生成文件的方法

使用java.net的方法得到网站页面内的文章 并生成文件的方法

Java：使用java.net的方法得到网站页面内的文章并生成文件的方法

使用java.net的方法得到网站页面内的文章并生成文件的方法