import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PraseSort0htm {
public static void main(String[] args) {
String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm" ; // 正确匹配中文
String str = "奇异果@#/Sort-0-012345.htm,橙子@#/Sort-0-012345.htm,苹果@#/Sort-0-012345.htm,热带水果@#/Sort-0-012345.htm," ;
Pattern pat = Pattern.compile(regEx);
Matcher mat = pat.matcher(str);
while (mat.find()) {
System.out.println(mat.group( 0 ));
}
System.out.println( "xxxxxxxxxxxxxxxxx" );
String filePath = "d:/xxxx.txt" ;
readTxtFile(filePath);
}
public static void readTxtFile(String filePath) {
try {
Set<String> idSet = new HashSet<String>();
Set<String> nameSet = new HashSet<String>();
String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm" ; // 正确匹配
String regEx2 = "[0-9]{2,}" ;
String regEx3 = "[\u4E00-\u9FA5]{1,}" ;
Pattern pat = Pattern.compile(regEx);
Pattern pat2 = Pattern.compile(regEx2);
Pattern pat3 = Pattern.compile(regEx3);
String encoding = "gbk" ;
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding); // 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null ;
int i = 1 ;
while ((lineTxt = bufferedReader.readLine()) != null ) {
// System.out.println(lineTxt);
Matcher mat = pat.matcher(lineTxt);
while (mat.find()) {
// System.out.println(mat.group(0));//验证可以打印成功
String temp = mat.group( 0 );
Matcher mat2 = pat2.matcher(temp);
String id = "" ;
while (mat2.find()) {
id = mat2.group( 0 );
}
if (idSet.add(id)) {
System.out.print(id);
} else {
break ;
}
System.out.print( "\t" );
Matcher mat3 = pat3.matcher(temp);
while (mat3.find()) {
System.out.print(mat3.group( 0 ));
}
System.out.print( "\n" );
i++;
}
}
// System.out.println(i);
read.close();
} else {
System.out.println( "找不到指定的文件" );
}
} catch (Exception e) {
System.out.println( "读取文件内容出错" );
e.printStackTrace();
}
}
}
|