⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 porter.java

📁 这是一个用于测试用的搜索引擎的案例
💻 JAVA
字号:
package ir.utilities;import java.io.*;/* author:   Fotis Lazarinis (actually I translated from C to Java)   date:     June 1997   address:  Psilovraxou 12, Agrinio, 30100   comments: Compile it, import the Porter class into you program and create an instance.	     Then use the stripAffixes method of this method which takes a String as              input and returns the stem of this String again as a String.*/ class NewString {  public String str;  NewString() {     str = "";  }}/** The Porter stemmer for reducing words to their base stem form. * * @author Fotis Lazarinis */public class Porter {  private String Clean( String str ) {     int last = str.length();          Character ch = new Character( str.charAt(0) );     String temp = "";     for ( int i=0; i < last; i++ ) {         if ( ch.isLetterOrDigit( str.charAt(i) ) )            temp += str.charAt(i);     }        return temp;  } //clean   private boolean hasSuffix( String word, String suffix, NewString stem ) {     String tmp = "";     if ( word.length() <= suffix.length() )        return false;     if (suffix.length() > 1)         if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) )           return false;       stem.str = "";     for ( int i=0; i<word.length()-suffix.length(); i++ )         stem.str += word.charAt( i );     tmp = stem.str;     for ( int i=0; i<suffix.length(); i++ )         tmp += suffix.charAt( i );     if ( tmp.compareTo( word ) == 0 )        return true;     else        return false;  }  private boolean vowel( char ch, char prev ) {     switch ( ch ) {        case 'a': case 'e': case 'i': case 'o': case 'u':           return true;        case 'y': {          switch ( prev ) {            case 'a': case 'e': case 'i': case 'o': case 'u':               return false;            default:               return true;          }        }                default :           return false;     }  }  private int measure( String stem ) {        int i=0, count = 0;    int length = stem.length();    while ( i < length ) {       for ( ; i < length ; i++ ) {           if ( i > 0 ) {              if ( vowel(stem.charAt(i),stem.charAt(i-1)) )                 break;           }           else {                if ( vowel(stem.charAt(i),'a') )                break;            }       }       for ( i++ ; i < length ; i++ ) {           if ( i > 0 ) {              if ( !vowel(stem.charAt(i),stem.charAt(i-1)) )                  break;              }           else {                if ( !vowel(stem.charAt(i),'?') )                 break;           }       }       if ( i < length ) {         count++;         i++;      }    } //while        return(count);  }  private boolean containsVowel( String word ) {     for (int i=0 ; i < word.length(); i++ )         if ( i > 0 ) {            if ( vowel(word.charAt(i),word.charAt(i-1)) )               return true;         }         else {              if ( vowel(word.charAt(0),'a') )               return true;         }             return false;  }  private boolean cvc( String str ) {     int length=str.length();     if ( length < 3 )        return false;         if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) )        && (str.charAt(length-1) != 'w') && (str.charAt(length-1) != 'x') && (str.charAt(length-1) != 'y')        && (vowel(str.charAt(length-2),str.charAt(length-3))) ) {        if (length == 3) {           if (!vowel(str.charAt(0),'?'))               return true;           else              return false;        }        else {           if (!vowel(str.charAt(length-3),str.charAt(length-4)) )               return true;            else              return false;        }      }          return false;  }  private String step1( String str ) {      NewString stem = new NewString();     if ( str.charAt( str.length()-1 ) == 's' ) {        if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){           String tmp = "";           for (int i=0; i<str.length()-2; i++)               tmp += str.charAt(i);           str = tmp;        }        else {           if ( ( str.length() == 1 ) && ( str.charAt(str.length()-1) == 's' ) ) {              str = "";              return str;           }           if ( str.charAt( str.length()-2 ) != 's' ) {              String tmp = "";              for (int i=0; i<str.length()-1; i++)                  tmp += str.charAt(i);              str = tmp;           }        }       }     if ( hasSuffix( str,"eed",stem ) ) {           if ( measure( stem.str ) > 0 ) {              String tmp = "";              for (int i=0; i<str.length()-1; i++)                  tmp += str.charAt( i );              str = tmp;           }     }     else {          if (  (hasSuffix( str,"ed",stem )) || (hasSuffix( str,"ing",stem )) ) {            if (containsVowel( stem.str ))  {              String tmp = "";              for ( int i = 0; i < stem.str.length(); i++)                  tmp += str.charAt( i );              str = tmp;              if ( str.length() == 1 )                 return str;              if ( ( hasSuffix( str,"at",stem) ) || ( hasSuffix( str,"bl",stem ) ) || ( hasSuffix( str,"iz",stem) ) ) {                 str += "e";                         }              else {                    int length = str.length();                  if ( (str.charAt(length-1) == str.charAt(length-2))                     && (str.charAt(length-1) != 'l') && (str.charAt(length-1) != 's') && (str.charAt(length-1) != 'z') ) {                                         tmp = "";                    for (int i=0; i<str.length()-1; i++)                        tmp += str.charAt(i);                    str = tmp;                 }                 else                    if ( measure( str ) == 1 ) {                       if ( cvc(str) )                           str += "e";                    }              }           }        }     }     if ( hasSuffix(str,"y",stem) )         if ( containsVowel( stem.str ) ) {           String tmp = "";           for (int i=0; i<str.length()-1; i++ )               tmp += str.charAt(i);           str = tmp + "i";        }     return str;    }  private String step2( String str ) {     String[][] suffixes = { { "ational", "ate" },                                    { "tional",  "tion" },                                    { "enci",    "ence" },                                    { "anci",    "ance" },                                    { "izer",    "ize" },                                    { "iser",    "ize" },                                    { "abli",    "able" },                                    { "alli",    "al" },                                    { "entli",   "ent" },                                    { "eli",     "e" },                                    { "ousli",   "ous" },                                    { "ization", "ize" },                                    { "isation", "ize" },                                    { "ation",   "ate" },                                    { "ator",    "ate" },                                    { "alism",   "al" },                                    { "iveness", "ive" },                                    { "fulness", "ful" },                                    { "ousness", "ous" },                                    { "aliti",   "al" },                                    { "iviti",   "ive" },                                    { "biliti",  "ble" }};     NewString stem = new NewString();          for ( int index = 0 ; index < suffixes.length; index++ ) {         if ( hasSuffix ( str, suffixes[index][0], stem ) ) {            if ( measure ( stem.str ) > 0 ) {               str = stem.str + suffixes[index][1];               return str;            }         }     }     return str;  }  private String step3( String str ) {        String[][] suffixes = { { "icate", "ic" },                                       { "ative", "" },                                       { "alize", "al" },                                       { "alise", "al" },                                       { "iciti", "ic" },                                       { "ical",  "ic" },                                       { "ful",   "" },                                       { "ness",  "" }};        NewString stem = new NewString();        for ( int index = 0 ; index<suffixes.length; index++ ) {            if ( hasSuffix ( str, suffixes[index][0], stem ))               if ( measure ( stem.str ) > 0 ) {                  str = stem.str + suffixes[index][1];                  return str;               }        }        return str;  }  private String step4( String str ) {             String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion",                           "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"};          NewString stem = new NewString();             for ( int index = 0 ; index<suffixes.length; index++ ) {         if ( hasSuffix ( str, suffixes[index], stem ) ) {                       if ( measure ( stem.str ) > 1 ) {               str = stem.str;               return str;            }         }     }     return str;  }  private String step5( String str ) {     if ( str.charAt(str.length()-1) == 'e' ) {         if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */           String tmp = "";           for ( int i=0; i<str.length()-1; i++ )                tmp += str.charAt( i );           str = tmp;        }        else           if ( measure(str) == 1 ) {              String stem = "";              for ( int i=0; i<str.length()-1; i++ )                   stem += str.charAt( i );              if ( !cvc(stem) )                 str = stem;           }     }          if ( str.length() == 1 )        return str;     if ( (str.charAt(str.length()-1) == 'l') && (str.charAt(str.length()-2) == 'l') && (measure(str) > 1) )        if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */           String tmp = "";           for ( int i=0; i<str.length()-1; i++ )                tmp += str.charAt( i );           str = tmp;        }      return str;  }  private String stripPrefixes ( String str) {     String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo"};     int last = prefixes.length;     for ( int i=0 ; i<last; i++ ) {         if ( str.startsWith( prefixes[i] ) ) {            String temp = "";            for ( int j=0 ; j< str.length()-prefixes[i].length(); j++ )                temp += str.charAt( j+prefixes[i].length() );            return temp;         }     }          return str;  }  private String stripSuffixes( String str ) {     str = step1( str );     if ( str.length() >= 1 )        str = step2( str );     if ( str.length() >= 1 )        str = step3( str );     if ( str.length() >= 1 )        str = step4( str );     if ( str.length() >= 1 )        str = step5( str );      return str;   }    /**  Takes a String as input and returns its stem as a String.*/  public String stripAffixes( String str ) {    str = str.toLowerCase();    str = Clean(str);      if (( str != "" ) && (str.length() > 2)) {       str = stripPrefixes(str);       if (str != "" )           str = stripSuffixes(str);    }       return str;    } //stripAffixes    /** For testing, print the stemmed version of a word */    public static void main(String[] args) throws IOException {	String word = args[0];	Porter stemmer = new Porter();	String stem = stemmer.stripAffixes(word);	System.out.println(stem);    }} //class

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -