view plaincopy to clipboardprint? package com.util.file;
public class Files {
/*** * 获取应用程序的根目录 * @return 应用程序根目录 */ public static String getSysPath(){ return System.getProperty("user.dir"); }
}
package com.util.file;
public class Files {
/*** * 获取应用程序的根目录 * @return 应用程序根目录 */ public static String getSysPath(){ return System.getProperty("user.dir"); }
}view plaincopy to clipboardprint?
view plaincopy to clipboardprint? <PRE class=csharp name="code">package com.core.crawl;
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL;
import com.core.http.Http;
public class WebSpider implements Runnable{
private Http http = new Http();
private String webAddress = ""; private String destFile = "";
public void setWebAddress(String webAddress){ this.webAddress = webAddress; }
public void setDestFile (String destFile){ this.destFile = destFile; }
public boolean download() throws IOException, InterruptedException {
HttpURLConnection httpConn = null;
try { URL url = new URL(webAddress);
httpConn = (HttpURLConnection) url.openConnection(); httpConn.setRequestMethod("GET"); httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14"); InputStream in = httpConn.getInputStream(); String fileType = http.fileType(httpConn.getContentType()); System.out.println(fileType); FileOutputStream out = new FileOutputStream(new File(destFile + fileType)); int chByte = in.read(); while (chByte != -1) { out.write(chByte); //System.out.println(chByte); chByte = in.read(); } } catch (Exception ex) { System.out.println(ex.toString()); } finally { httpConn.disconnect(); } return true; }
public void run() { try { //System.out.println(Thread.currentThread().getName()); download(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } </PRE>
view plaincopy to clipboardprint?package com.core.crawl; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import com.core.http.Http; public class WebSpider implements Runnable{ private Http http = new Http(); private String webAddress = ""; private String destFile = ""; public void setWebAddress(String webAddress){ this.webAddress = webAddress; } public void setDestFile (String destFile){ this.destFile = destFile; } public boolean download() throws IOException, InterruptedException { HttpURLConnection httpConn = null; try { URL url = new URL(webAddress); httpConn = (HttpURLConnection) url.openConnection(); httpConn.setRequestMethod("GET"); httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14"); InputStream in = httpConn.getInputStream(); String fileType = http.fileType(httpConn.getContentType()); System.out.println(fileType); FileOutputStream out = new FileOutputStream(new File(destFile + fileType)); int chByte = in.read(); while (chByte != -1) { out.write(chByte); //System.out.println(chByte); chByte = in.read(); } } catch (Exception ex) { System.out.println(ex.toString()); } finally { httpConn.disconnect(); } return true; } public void run() { try { //System.out.println(Thread.currentThread().getName()); download(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } package com.core.crawl;
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL;
import com.core.http.Http;
public class WebSpider implements Runnable{
private Http http = new Http();
private String webAddress = ""; private String destFile = "";
public void setWebAddress(String webAddress){ this.webAddress = webAddress; }
public void setDestFile (String destFile){ this.destFile = destFile; }
public boolean download() throws IOException, InterruptedException {
HttpURLConnection httpConn = null;
try { URL url = new URL(webAddress);
httpConn = (HttpURLConnection) url.openConnection(); httpConn.setRequestMethod("GET"); httpConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14"); InputStream in = httpConn.getInputStream(); String fileType = http.fileType(httpConn.getContentType()); System.out.println(fileType); FileOutputStream out = new FileOutputStream(new File(destFile + fileType)); int chByte = in.read(); while (chByte != -1) { out.write(chByte); //System.out.println(chByte); chByte = in.read(); } } catch (Exception ex) { System.out.println(ex.toString()); } finally { httpConn.disconnect(); } return true; }
public void run() { try { //System.out.println(Thread.currentThread().getName()); download(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } }
view plaincopy to clipboardprint? <PRE class=csharp name="code">package com.core.crawl;
import java.io.IOException;
import com.util.file.Files;
public class Crawl {
/** * @param args * @throws IOException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException {
long begin = System.currentTimeMillis(); WebSpider spider2 = new WebSpider(); WebSpider spider1 = new WebSpider(); spider1.setWebAddress("http://www.163.com"); spider1.setDestFile(Files.getSysPath() + "/"+"spider1.");
spider2.setWebAddress("http://blog.csdn.net/longronglin"); spider2.setDestFile(Files.getSysPath() + "/"+"spider2.");
Thread t1 = new Thread(spider1); Thread t2 = new Thread(spider2); t1.start(); t2.start();
t1.join(); t2.join();
System.out.println("the end"); System.out.println(System.currentTimeMillis() - begin); } }</PRE> <PRE class=csharp name="code"> </PRE> <PRE class=csharp name="code">测试通过:</PRE> <PRE class=csharp name="code"></PRE> |