用户注册



邮箱:

密码:

用户登录


邮箱:

密码:
记住登录一个月忘记密码?

发表随想


还能输入:200字
云代码 - java代码库

Java读取html页面文件解析邮箱地址

2015-01-07 作者: java源代码大全举报

[java]代码库

package com.alpha.test;import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/**
 * 读取html页面文件解析邮箱地址
 * 
 * @author JavaAlpha 2012-12-19 13:45:11
 */
public class ReadHtmlToTxt { // 读取文件
 public static String readHtml(String path) {  StringBuffer emailCont = new StringBuffer();  File htmlFile = new File(path);
  if (htmlFile.exists() && htmlFile.isFile() && htmlFile.canRead()) {
   Reader in;
   try {
    in = new FileReader(htmlFile);
    char[] buff = new char[4096];
    int nch;
    while ((nch = in.read(buff, 0, buff.length)) != -1) {
     emailCont.append(checkEmail(new String(buff, 0, nch)));
    }
   } catch (FileNotFoundException e) {
    e.printStackTrace();
   } catch (IOException e) {
    e.printStackTrace();
   }  }  return emailCont.toString();
 } // 判断字符串里面是否包括@符号
 public static String checkEmail(String str) {  String postCont = "";
  // 判断是否回复的内容
  if (str.indexOf("@") > -1) {   postCont = str.substring(str.indexOf("@") - 10,
     str.indexOf("@") + 10);   if (postCont.indexOf(">") > -1 || postCont.indexOf("<") > -1) {
    postCont = postCont.replaceAll(">", "");
    postCont = postCont.replaceAll("<", "");
    postCont = postCont.replaceAll("/", "");
   }   if (postCont.indexOf(",") > -1 || postCont.indexOf(",") > -1
     || postCont.indexOf("。") > -1 || postCont.indexOf(";") > -1) {
    postCont = postCont.replaceAll(",", "");
    postCont = postCont.replaceAll(",", "");
    postCont = postCont.replaceAll("。", "");
   }   postCont = postCont.substring(0, postCont.indexOf(".com") + 4);   System.out.println(postCont);
  }  return postCont;
 }

 //过滤汉字
 public static boolean checkChinese(String str) {

  String regEx = "[\\u4e00-\\u9fa5]";
  Pattern p = Pattern.compile(regEx);
  Matcher m = p.matcher(str);
  if (m != null && m.find()){
   return true;//是汉字
  }
  return false;
 } // 将整理是邮箱地址写入文件
 public static void writerFile(String cont, String path) {  File emailFile = new File(path);  try {
   //如果文件不存在,创建文件
   if (!emailFile.exists()) {
    emailFile.createNewFile();
   }

   Writer out = new FileWriter(emailFile);   out.write(cont);
   out.flush();
   out.close();
  } catch (Exception e) {
   e.printStackTrace();
  } }

 /**
  * 读取网络内容 
  */
 public static void readUrlCont(String strUrl) {

  StringBuffer cont = new StringBuffer();//内容

  try {
   URL url = new URL(strUrl);
   URLConnection conn = url.openConnection();
   BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
   String lineCont = "";
   while ((lineCont = reader.readLine())!= null) {
    cont.append(lineCont+"</br>");
   }

   reader.close();

  } catch (MalformedURLException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }

  System.out.println(cont.toString());
 } public static void main(String[] args) {

  //String cont = readHtml("e://test.htm");//读取文件

  //writerFile(cont, "e://test.txt");//写文件

  //checkChinese("qwe123");

  readUrlCont("http://www.163.com");

 }}

//源代码片段来自云代码http://yuncode.net
			


网友评论    (发表评论)


发表评论:

评论须知:

  • 1、评论每次加2分,每天上限为30;
  • 2、请文明用语,共同创建干净的技术交流环境;
  • 3、若被发现提交非法信息,评论将会被删除,并且给予扣分处理,严重者给予封号处理;
  • 4、请勿发布广告信息或其他无关评论,否则将会删除评论并扣分,严重者给予封号处理。


扫码下载

加载中,请稍后...

输入口令后可复制整站源码

加载中,请稍后...