Java单词计数器

类别:Java 点击:0 评论:0 推荐:

花了一个中午搞定了一个单词计数器,可以按照字典和频率两种排序,功能还比较强大。
package treeroot.util;
//anthor treeroot
//since  2004-12-3
public class Word{
 private String value;
 public Word(String value){
  this.value=value.toLowerCase();
 }
 private int count=1;
 protected void increase(){
  count++;
 }
 
 public String getWord(){
  return value;
 }
 public int getCount(){
  return count;
 }
 public boolean equals(Object o){
  return (o instanceof Word)&&(((Word)o).value.equals(value));
 }
 public int hashCode(){
  return value.hashCode();
 }
}


package treeroot.util;
//author treeroot
//since  2004-12-3
import java.util.*;

public class WordCount
{
 private static class WordSet implements Set{
 
  private Map map=new HashMap();

  public int size(){
   return map.size();
  }
  public boolean isEmpty(){
   return map.isEmpty();
  }
  public boolean contains(Object o){
   return map.containsKey(o);
  }
  public Iterator iterator(){
   return map.keySet().iterator();
  } 
  public Object[] toArray(){
   return map.keySet().toArray();
  }
  public Object[] toArray(Object[] a){
   return map.keySet().toArray(a);
  }
  public boolean add(Object o){
   if(map.containsKey(o)){
    ((Word)map.get(o)).increase();
   }
   else {
    map.put(o,o);
   }
   return true;
  }
  public boolean remove(Object o){
   return false;
  }
  public boolean addAll(Collection c){
   return false;
  }
  public boolean removeAll(Collection c){
   return false;
  }
  public boolean retainAll(Collection c){
   return false;
  }
  public boolean containsAll(Collection c){
   return map.keySet().containsAll(c);
  }
  public void clear(){}
  public boolean equals(Object c){
   return map.keySet().equals(c);
  }
  public int hashCode(){
   return map.keySet().hashCode();
  } 
 }

 

 
 public static Set getWordCount(String s,Comparator order){
  Set set=new WordSet();
  String split1="[^a-zA-Z\\-_']+";
  String split2="[^a-zA-Z]+[\\-_'][^a-zA-Z]*";
  String split3="[^a-zA-Z]*[\\-_'][^a-zA-Z]+";
  
  String regex = "("+split2+")|("+split3+")|("+split1+")";
  String[] words = s.split(regex);
  for(int i=0;i<words.length;i++){
   set.add(new Word(words[i]));
  }
  Set sort=new TreeSet(order);
  sort.addAll(set);
  
  return Collections.synchronizedSet(sort);
 }
 public static Comparator DICTIONARY_ORDER=new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   return w1.getWord().compareTo(w2.getWord());
  }
 };
 public static Comparator FREQUENCY_ORDER =new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   int i=w2.getCount()-w1.getCount();
   if(i==0){
    return w1.getWord().compareTo(w2.getWord());
   }
   return i;
  }
 };
 public static void main(String[] args)
 {
  String s="A regular expression, specified as a string, must first be compiled into an instance of this class. The resulting pattern can then be used to create a Matcher object that can match arbitrary character sequences against the regular expression. All of the state involved in performing a match resides in the matcher, so many matchers can share the same pattern. ";
    Set set=WordCount.getWordCount(s,WordCount.FREQUENCY_ORDER);
    for(Iterator it=set.iterator();it.hasNext();){
     Word w=(Word)it.next();
     int i=4-w.getWord().length()/8;
     String tab="";
     for(int j=0;j<i;j++)
      tab+="\t";
     System.out.println(w.getWord()+tab+w.getCount());
    }
 }
}

 

本文地址:http://com.8s8s.com/it/it12949.htm