如何用java实现抓取网页？

真爱图书 2010-07-06

展开全文

import java.net.*;
import java.io.*;

public class Catch1 {
   public void test(){
StringBuffer document= new StringBuffer();
try {
   URL url = new URL(http://www.sohu.com);
   URLConnection conn = url.openConnection();
   BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
   String line = null;
   while ((line = reader.readLine()) != null)
      document.append(line + "\n");
   reader.close();
} catch (MalformedURLException e) {
   e.printStackTrace();
} catch (IOException e) {
   e.printStackTrace();
}

//pzy add
String str = document.toString();
String strDir = "E:\\text";
String strFile = "test.html";
File myFile = new File(strDir, strFile);

try {
   myFile.createNewFile();
   BufferedWriter bw = new BufferedWriter(
                        new FileWriter(myFile.getPath(), true));
   bw.write(str);
   bw.flush();
   bw.close();
} catch (Exception ex) {
   ex.printStackTrace();
}
  }

   public static void main(String[] args){
   Catch1 catch2=new Catch1();
      catch2.test();
         }
}