xmlreader.java

来自「一个很不错的词频统计程序,目前只支持英文,中文的本人正在修改中.改好后上传给大家」· Java 代码 · 共 672 行 · 第 1/2 页
JAVA
672 行
            if (c == -1)
                exception(UNEXPECTED_EOF);

            String attrName = readName();

            if (attrName.length() == 0)
                exception("attr name expected");

            skip();
            read('=');

            skip();
            int delimiter = read();

            if (delimiter != '\'' && delimiter != '"') {
                if (!relaxed)
                    exception("<" + name + ">: invalid delimiter: " + (char) delimiter);

                delimiter = ' ';
            }

            int i = (attributeCount++) << 1;

            attributes = ensureCapacity(attributes, i + 4);

            attributes[i++] = attrName;

            int p = txtPos;
            pushText(delimiter);

            attributes[i] = pop(p);

            if (delimiter != ' ')
                read(); // skip endquote
        }
    }

    /**
     * result: isWhitespace; if the setName parameter is set, the name of the
     * entity is stored in "name"
     */

    public final boolean pushEntity() throws IOException {

        read(); // &

        int pos = txtPos;

        while (!eof && peek0 != ';')
            push(read());

        String code = pop(pos);

        read();

        if (code.length() > 0 && code.charAt(0) == '#') {
            int c = (code.charAt(1) == 'x' ? Integer.parseInt(code.substring(2), 16) : Integer.parseInt(code
                    .substring(1)));
            push(c);
            return c <= ' ';
        }

        String result = (String) entityMap.get(code);
        boolean whitespace = true;

        if (result == null)
            result = "&" + code + ";";

        for (int i = 0; i < result.length(); i++) {
            char c = result.charAt(i);
            if (c > ' ')
                whitespace = false;
            push(c);
        }

        return whitespace;
    }

    /**
     * types: '<': parse to any token (for nextToken ()) '"': parse to quote ' ':
     * parse to whitespace or '>'
     */

    private final boolean pushText(int delimiter) throws IOException {

        boolean whitespace = true;
        int next = peek0;

        while (!eof && next != delimiter) { // covers eof, '<', '"'

            if (delimiter == ' ')
                if (next <= ' ' || next == '>')
                    break;

            if (next == '&') {
                if (!pushEntity())
                    whitespace = false;

            } else {
                if (next > ' ')
                    whitespace = false;

                push(read());
            }

            next = peek0;
        }

        return whitespace;
    }

    // --------------- public part starts here... ---------------

    public XmlReader(Reader reader) throws IOException {
        this.reader = reader;

        peek0 = reader.read();
        peek1 = reader.read();

        eof = peek0 == -1;

        entityMap = new Hashtable();
        entityMap.put("amp", "&");
        entityMap.put("apos", "'");
        entityMap.put("gt", ">");
        entityMap.put("lt", "<");
        entityMap.put("quot", "\"");

        line = 1;
        column = 1;
    }

    public void defineCharacterEntity(String entity, String value) {
        entityMap.put(entity, value);
    }

    public int getDepth() {
        return depth;
    }

    public String getPositionDescription() {

        StringBuffer buf = new StringBuffer(type < TYPES.length ? TYPES[type] : "Other");

        buf.append(" @" + line + ":" + column + ": ");

        if (type == START_TAG || type == END_TAG) {
            buf.append('<');
            if (type == END_TAG)
                buf.append('/');

            buf.append(name);
            buf.append('>');
        } else if (isWhitespace)
            buf.append("[whitespace]");
        else
            buf.append(getText());

        return buf.toString();
    }

    public int getLineNumber() {
        return line;
    }

    public int getColumnNumber() {
        return column;
    }

    public boolean isWhitespace() {
        return isWhitespace;
    }

    public String getText() {

        if (text == null)
            text = pop(0);

        return text;
    }

    public String getName() {
        return name;
    }

    public boolean isEmptyElementTag() {
        return degenerated;
    }

    public int getAttributeCount() {
        return attributeCount;
    }

    public String getAttributeName(int index) {
        if (index >= attributeCount)
            throw new IndexOutOfBoundsException();
        return attributes[index << 1];
    }

    public String getAttributeValue(int index) {
        if (index >= attributeCount)
            throw new IndexOutOfBoundsException();
        return attributes[(index << 1) + 1];
    }

    public String getAttributeValue(String name) {

        for (int i = (attributeCount << 1) - 2; i >= 0; i -= 2) {
            if (attributes[i].equals(name))
                return attributes[i + 1];
        }

        return null;
    }

    public int getType() {
        return type;
    }

    public int next() throws IOException {

        if (degenerated) {
            type = END_TAG;
            degenerated = false;
            depth--;
            return type;
        }

        txtPos = 0;
        isWhitespace = true;

        do {
            attributeCount = 0;

            name = null;
            text = null;
            type = peekType();

            switch (type) {

            case ENTITY_REF:
                isWhitespace &= pushEntity();
                type = TEXT;
                break;

            case START_TAG:
                parseStartTag();
                break;

            case END_TAG:
                parseEndTag();
                break;

            case END_DOCUMENT:
                break;

            case TEXT:
                isWhitespace &= pushText('<');
                break;

            case CDSECT:
                parseLegacy(true);
                isWhitespace = false;
                type = TEXT;
                break;

            default:
                parseLegacy(false);
            }
        } while (type > TEXT || type == TEXT && peekType() >= TEXT);

        isWhitespace &= type == TEXT;

        return type;
    }

    // -----------------------------------------------------------------------------
    // utility methods to mak XML parsing easier ...

    /**
     * test if the current event is of the given type and if the name do match.
     * null will match any namespace and any name. If the current event is TEXT
     * with isWhitespace()= true, and the required type is not TEXT, next () is
     * called prior to the test. If the test is not passed, an exception is
     * thrown. The exception text indicates the parser position, the expected
     * event and the current event (not meeting the requirement.
     * 
     * <p>
     * essentially it does this
     * 
     * <pre>
     *     if (getType() == TEXT &amp;&amp; type != TEXT &amp;&amp; isWhitespace ())
     *       next ();
     *   
     *     if (type != getType
     *     || (name != null &amp;&amp; !name.equals (getName ())
     *        throw new XmlPullParserException ( &quot;....&quot;);
     * </pre>
     */
    public void require(int type, String name) throws IOException {

        if (this.type == TEXT && type != TEXT && isWhitespace())
            next();

        if (type != this.type || (name != null && !name.equals(getName())))
            exception("expected: " + TYPES[type] + "/" + name);
    }

    /**
     * If the current event is text, the value of getText is returned and next()
     * is called. Otherwise, an empty String ("") is returned. Useful for
     * reading element content without needing to performing an additional check
     * if the element is empty.
     * 
     * <p>
     * essentially it does this
     * 
     * <pre>
     *      if (getType != TEXT) return &quot;&quot;
     *       String result = getText ();
     *       next ();
     *       return result;
     * </pre>
     */

    public String readText() throws IOException {

        if (type != TEXT)
            return "";

        String result = getText();
        next();
        return result;
    }
}
xmlreader.java - 源码说明

本页面展示了「一个很不错的词频统计程序,目前只支持英文,中文的本人正在修改中.改好后上传给大家分享」中的 xmlreader.java 源码文件，采用 Java 编程语言编写，共 672 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与词频统计相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?