rulebasedcollator.java

来自「kaffe Java 解释器语言,源码,Java的子集系统,开放源代码」· Java 代码 · 共 948 行 · 第 1/2 页

JAVA
948
字号
	    throw new ParseException	      ("operator missing at " + (i+base_offset), i+base_offset);	  if (!eatingChars &&	      ((c >= 0x21 && c <= 0x2F) 	       || (c >= 0x3A && c <= 0x40)	       || (c >= 0x5B && c <= 0x60)	       || (c >= 0x7B && c <= 0x7E)))	    throw new ParseException	      ("unquoted punctuation character '"+c+"'", i+base_offset);	  //type = ignoreChars ? CollationSorter.IGNORE : -1;	  sb.append(c);	  break;	}	if (type  < 0)	  continue;	if (operator < 0)	  {	    operator = type;	    continue;	  }	if (sb.length() == 0 && !isModifier)	  throw new ParseException	    ("text element empty at " + (i+base_offset), i+base_offset);	if (operator == CollationSorter.RESET)	  {	    /* Reposition in the sorting list at the position	     * indicated by the text element.	     */	    String subrules = rules.substring(i);	    Vector sorted_rules = new Vector();	    int idx;	    // Parse the subrules but do not iterate through all	    // sublist. This is the priviledge of the first call.	    idx = subParseString(true, sorted_rules, base_offset+i, subrules);    	    // Merge new parsed rules into the list.	    mergeRules(base_offset+i, sb.toString(), v, sorted_rules);	    sb.setLength(0);	    	    // Reset state to none.	    operator = -1;	    type = -1;	    // We have found a new subrule at 'idx' but it has not been parsed.	    if (idx >= 0)	      {		i += idx-1;		continue main_parse_loop;	      }	    else		// No more rules.		break main_parse_loop;	  }	CollationSorter sorter = new CollationSorter();		sorter.comparisonType = operator;	sorter.textElement = sb.toString();	sorter.hashText = sorter.textElement.hashCode();	sorter.offset = base_offset+rules.length();	sb.setLength(0);	v.add(sorter);	operator = type;      }    if (operator >= 0)      {	CollationSorter sorter = new CollationSorter();	int pos = rules.length() + base_offset;	if ((sb.length() != 0 && nextIsModifier)	    || (sb.length() == 0 && !nextIsModifier && !eatingChars))	  throw new ParseException("text element empty at " + pos, pos);	sorter.comparisonType = operator;	sorter.textElement = sb.toString(); 	sorter.hashText = sorter.textElement.hashCode();	sorter.offset = base_offset+pos;	v.add(sorter);      }    if (i == rules.length())      return -1;    else      return i;  }  /**   * This method creates a copy of this object.   *   * @return A copy of this object.   */  public Object clone()  {    return super.clone();  }  /**   * This method completely parses a string 'rules' containing sorting rules.   *   * @param rules String containing the rules to be parsed.    * @return A set of sorting instructions stored in a Vector.   * @throws ParseException if something turned wrong during the parsing. To get details   * decode the message.   */  private Vector parseString(String rules)     throws ParseException  {    Vector v = new Vector();    // result of the first subParseString is not absolute (may be -1 or a    // positive integer). But we do not care.    subParseString(false, v, 0, rules);        return v;  }  /**   * This method uses the sorting instructions built by {@link #parseString}   * to build collation elements which can be directly used to sort strings.   *   * @param parsedElements Parsed instructions stored in a Vector.   * @throws ParseException if the order of the instructions are not valid.   */  private void buildCollationVector(Vector parsedElements)    throws ParseException  {    int primary_seq = 0;    short secondary_seq = 0;    short tertiary_seq = 0;    short equality_seq = 0;    boolean inverseComparisons = false;    final boolean DECREASING = false;    final boolean INCREASING = true;    boolean secondaryType = INCREASING;    Vector v = new Vector();    // elts is completely sorted.element_loop:    for (int i = 0; i < parsedElements.size(); i++)      {	CollationSorter elt = (CollationSorter) parsedElements.elementAt(i);	boolean ignoreChar = false;	switch (elt.comparisonType)	  {	  case CollationSorter.GREATERP:	    primary_seq++;	    if (inverseComparisons)	      {		secondary_seq = Short.MAX_VALUE;		secondaryType = DECREASING;	      }	    else	      {		secondary_seq = 0;		secondaryType = INCREASING;	      }	    tertiary_seq = 0;	    equality_seq = 0;	    inverseComparisons = false;	    break;	  case CollationSorter.GREATERS:	    if (secondaryType == DECREASING)	      secondary_seq--;	    else	      secondary_seq++;	    tertiary_seq = 0;	    equality_seq = 0;	    break;	  case CollationSorter.INVERSE_SECONDARY:	    inverseComparisons = true;	    continue element_loop;	  case CollationSorter.GREATERT:	    tertiary_seq++;	    equality_seq = 0;	    break;	  case CollationSorter.IGNORE:	    ignoreChar = true;	  case CollationSorter.EQUAL:	    equality_seq++;	    break;	  case CollationSorter.RESET:	    throw new ParseException	      ("Invalid reached state 'RESET'. Internal error", elt.offset);	  default:	    throw new ParseException	      ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);	  }	CollationElement e;	if (!ignoreChar)	  {	    e = new CollationElement(elt.textElement, primary_seq,				     secondary_seq, tertiary_seq,				     equality_seq, elt.expansionOrdering);	  }	else	  e = new CollationElement(elt.textElement);	v.add(e);      }    this.inverseAccentComparison = inverseComparisons;     ce_table = v.toArray();    last_primary_value = primary_seq+1;  }  /**   * Build a tree where all keys are the texts of collation elements and data is   * the collation element itself. The tree is used when extracting all prefix   * for a given text.   */  private void buildPrefixAccess()  {    prefix_tree = new HashMap();    for (int i = 0; i < ce_table.length; i++)      {	CollationElement e = (CollationElement) ce_table[i];	prefix_tree.put(e.key, e);      }  }  /**   * This method returns an integer which indicates whether the first   * specified <code>String</code> is less than, greater than, or equal to   * the second.  The value depends not only on the collation rules in   * effect, but also the strength and decomposition settings of this object.   *   * @param source The first <code>String</code> to compare.   * @param target A second <code>String</code> to compare to the first.   *   * @return A negative integer if source &lt; target, a positive integer   * if source &gt; target, or 0 if source == target.   */  public int compare(String source, String target)  {    CollationElementIterator cs, ct;    cs = getCollationElementIterator(source);    ct = getCollationElementIterator(target);    for(;;)      {        CollationElement ord1block = cs.nextBlock();         CollationElement ord2block = ct.nextBlock(); 	int ord1;	int ord2;	if (ord1block != null)	  ord1 = ord1block.getValue();	else	  {	    if (ord2block == null)	      return 0;	    return -1;	  }	if (ord2block == null)	  return 1;		ord2 = ord2block.getValue();		// We know chars are totally equal, so skip        if (ord1 == ord2)	  {	    if (getStrength() == IDENTICAL)	      if (!ord1block.key.equals(ord2block.key))		return ord1block.key.compareTo(ord2block.key);	    continue;	  }        // Check for primary strength differences        int prim1 = cs.primaryOrder(ord1);         int prim2 = ct.primaryOrder(ord2);         if (prim1 < prim2)          return -1;        else if (prim1 > prim2)          return 1;        else if (getStrength() == PRIMARY)          continue;        // Check for secondary strength differences        int sec1 = cs.secondaryOrder(ord1);        int sec2 = ct.secondaryOrder(ord2);        if (sec1 < sec2)          return -1;        else if (sec1 > sec2)          return 1;        else if (getStrength() == SECONDARY)          continue;        // Check for tertiary differences        int tert1 = cs.tertiaryOrder(ord1);        int tert2 = ct.tertiaryOrder(ord2);        if (tert1 < tert2)          return -1;        else if (tert1 > tert2)          return 1;	else if (getStrength() == TERTIARY)	  continue;	// Apparently JDK does this (at least for my test case).	return ord1block.key.compareTo(ord2block.key);          }  }  /**   * This method tests this object for equality against the specified    * object.  This will be true if and only if the specified object is   * another reference to this object.   *   * @param obj The <code>Object</code> to compare against this object.   *   * @return <code>true</code> if the specified object is equal to this object,   * <code>false</code> otherwise.   */  public boolean equals(Object obj)  {    if (obj == this)      return true;    else      return false;  }  /**   * This method builds a default collation element without invoking   * the database created from the rules passed to the constructor.   *   * @param c Character which needs a collation element.   * @return A valid brand new CollationElement instance.   */  CollationElement getDefaultElement(char c)  {    int v;    // Preliminary support for generic accent sorting inversion (I don't know if all    // characters in the range should be sorted backward). This is the place    // to fix this if needed.    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))      v = 0x0361 - ((int) c - 0x02B9);    else      v = (short) c;    return new CollationElement("" + c, last_primary_value + v,				(short) 0, (short) 0, (short) 0, null);  }  /**   * This method returns an instance for <code>CollationElementIterator</code>   * for the specified <code>String</code> under the collation rules for this   * object.   *   * @param source The <code>String</code> to return the   * <code>CollationElementIterator</code> instance for.   *   * @return A <code>CollationElementIterator</code> for the specified   * <code>String</code>.   */  public CollationElementIterator getCollationElementIterator(String source)  {    return new CollationElementIterator(this, source);  }  /**   * This method returns an instance of <code>CollationElementIterator</code>   * for the <code>String</code> represented by the specified   * <code>CharacterIterator</code>.   *   * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.   *   * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.   */  public CollationElementIterator getCollationElementIterator(CharacterIterator source)  {    StringBuffer expand = new StringBuffer("");        // Right now we assume that we will read from the beginning of the string.    for (char c = source.first();	 c != CharacterIterator.DONE;	 c = source.next())      decomposeCharacter(c, expand);    return getCollationElementIterator(expand.toString());  }  /**   * This method returns an instance of <code>CollationKey</code> for the   * specified <code>String</code>.  The object returned will have a   * more efficient mechanism for its comparison function that could   * provide speed benefits if multiple comparisons are performed, such   * as during a sort.   *   * @param source The <code>String</code> to create a <code>CollationKey</code> for.   *   * @return A <code>CollationKey</code> for the specified <code>String</code>.   */  public CollationKey getCollationKey(String source)  {    CollationElementIterator cei = getCollationElementIterator(source);    Vector vect = new Vector(25);    int ord = cei.next();    cei.reset(); //set to start of string    while (ord != CollationElementIterator.NULLORDER)      {        switch (getStrength())          {            case PRIMARY:               ord = cei.primaryOrder(ord);               break;            case SECONDARY:               ord = cei.secondaryOrder(ord);            default:               break;          }        vect.add(new Integer(ord)); 	ord = cei.next(); //increment to next key      }    Object[] objarr = vect.toArray();    byte[] key = new byte[objarr.length * 4];    for (int i = 0; i < objarr.length; i++)      {        int j = ((Integer) objarr[i]).intValue();        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);        key [i * 4 + 3] = (byte) (j & 0x000000FF);      }    return new CollationKey(this, source, key);  }  /**   * This method returns a <code>String</code> containing the collation rules   * for this object.   *   * @return The collation rules for this object.   */  public String getRules()  {    return rules;  }  /**   * This method returns a hash value for this object.   *   * @return A hash value for this object.   */  public int hashCode()  {    return System.identityHashCode(this);  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?