import java.io.BufferedWriter; |
import java.io.File; |
import java.io.FileWriter; |
import org.jsoup.Jsoup; |
import org.jsoup.nodes.Document; |
import org.jsoup.safety.Whitelist; |
public class App { |
static Document doc; |
public static String getContent( int id) throws Exception { |
doc = Jsoup.connect( "http://www.xstxt.com/fanrenxiuxianchuan/" + id) |
.timeout( 30000 ).get(); |
String title = doc.title(); |
title = title.substring( 6 , title.length() - 8 ); |
// System.out.println(title); |
String txt = doc.getElementById( "booktext" ).toString(); |
txt = Jsoup.clean(txt, Whitelist.none()); |
txt = txt.replaceAll( " " , "\\n" ); |
txt = txt.replace( "\\n\\n" , "\\n" ).replace( "\\n\\n" , "\\n" ) |
.replace( "\\n\\n" , "\\n" ).replace( "\\n \\n " , "\\n" ) |
.replace( "\\n\\n" , "\\n" ); |
// System.out.println(txt); |
return title + txt; |
} |
public static void main(String[] args) throws Exception { |
String filename = "z:/dd.txt" ; |
BufferedWriter bw = new BufferedWriter( new FileWriter(filename)); |
String str = "" ; |
for ( int i = 0 ; i < 1000 ; i++) { |
System.out.println(i); |
str = getContent( 1071907 +i); |
bw.write(str); |
bw.write( "\\n\\n" ); |
} |
bw.close(); |
} |
} |
//源代码片段来自云代码http://yuncode.net |
|