package cn.ksource.core.util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import org.apache.commons.lang.StringUtils; public class HtmlDownloader { //URL will be downloaded. private static String url = "http://www.modaotea.com.cn/business/website/favoritetea.html?mold=2"; //workspace folder. private static String workspace = "download"; //sub css and js resources sign private static String urlSign = "Note: don't use Java Character Writers, * otherwise you can't get pictures correctly. * * @param subUrl */ private static void downloadChild (String subUrl) { if (StringUtils.isNotEmpty (subUrl)) { if (subUrl.startsWith ("http:")) { System.out.println ("subUrl not support yet."); } else { long start = System.nanoTime (); try { String forUrl = subUrl.replace (" ", "%20"); if (!forUrl.startsWith ("/")) { forUrl = "/" + forUrl; } URL u = new URL (rootUrl + forUrl); InputStream reader = u.openStream (); File f = createDownloadFile (subUrl); FileOutputStream writer = new FileOutputStream (f); byte[] buff = new byte[1024]; int size = -1; while ((size = reader.read (buff)) != -1) { writer.write (buff, 0, size); } reader.close (); writer.close (); } catch (Exception e) { e.printStackTrace (); } System.out.println ("Source:" + subUrl +"download time(s):" + String.format ("%.3f", (double)(System.nanoTime () - start)/ 1000000000.00)); } } else { System.out.println ("subUrl is Empty."); } } /** * create sub file,create parent folders if necessary. * * @param url related path of a url source. * @return created file. */ private static File createDownloadFile (String url) { File f = new File (workspace, url); f.getParentFile ().mkdirs (); return f; } }