分享

Java_功能实现_WordCount

 funson 2014-10-23
代码:
package com.test.wordcount;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

public class WordCount {
    private static String path_src = "/home/CORPUSERS/xp009835/shenma.txt";
    private static String path_result = "/home/CORPUSERS/xp009835/shenma_result.txt";
    private static BufferedReader br = null;
    private static BufferedWriter bw = null;
    private static String line_current = null;
    private static String[] words = null;
    private static List<String> word_list = new ArrayList<String>();

    public static void main(String[] args) {
        File file = new File(path_src);
        if (!file.exists()) {
            System.out.println("file " + file + " is not existed, exit");
            return;
        }

        try {
            br = new BufferedReader(new FileReader(file.getPath()));
            line_current = br.readLine();
            while (line_current != null) {
                words = line_current.split(" |,|\\.");
                for (String s : words) {
                    if (!s.equals(""))
                        word_list.add(s);
                }

                line_current = br.readLine();
            }

            for (String temp : word_list) {
                System.out.println(temp);
            }

            // HashSet
            Set<String> hashSet = new HashSet<String>(word_list);
            for (String str : hashSet) {
                System.out.println("word: " + str +
                        ", occur times: " + Collections.frequency(word_list, str));
            }

            // HashMap
            Map<String, Integer> hashMap = new HashMap<String, Integer>();
            for (String temp : word_list) {
                Integer count = hashMap.get(temp);
                hashMap.put(temp, (count == null) ? 1 : count + 1);
            }

            // TreeMap
            TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>(
                    hashMap);

            // Record result to another file
            printMap(treeMap);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            closeInputStream(br);
            closeOutputStream(bw);
        }
    }

    public static void printMap(Map<String, Integer> map) throws IOException {

        bw = new BufferedWriter(new FileWriter(path_result));

        Set<String> keys = map.keySet();
        for (String s : keys) {
            System.out.println("word: " + s +
                    ", times: " + map.get(s));
            writeResult("word: " + s +
                    ", times: " + map.get(s));
        }
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            System.out.println("word: " + entry.getKey() + ", number : "
                    + entry.getValue());
            writeResult("word: " + entry.getKey() + ", number : "
                    + entry.getValue());
        }

    }

    public static void writeResult(String line) throws IOException {

        try {
            if (bw != null) {
                bw.write(line);
                bw.newLine();
                bw.flush();
            }
        } catch (IOException e) {
            e.printStackTrace();
            closeOutputStream(bw);
        }
    }

    public static void closeOutputStream(BufferedWriter writer) {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void closeInputStream(BufferedReader reader) {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

shenma.txt文件内容:
ni hao ma, ni
aaa bb mam ma
li hao yu
luan tao, very good
luan luan ni is a bad man.

输出:
word: a, times: 1
word: aaa, times: 1
word: bad, times: 1
word: bb, times: 1
word: good, times: 1
word: hao, times: 2
word: is, times: 1
word: li, times: 1
word: luan, times: 3
word: ma, times: 2
word: mam, times: 1
word: man, times: 1
word: ni, times: 3
word: tao, times: 1
word: very, times: 1
word: yu, times: 1

 

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多