📄 dtdparser.java

📁 XML的DTD的解析、对XML文件作用的JAVA源代码。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package com.wutka.dtd;import java.util.*;import java.io.*;import java.net.*;/** Parses a DTD file and returns a DTD object * * @author Mark Wutka * @version $Revision: 1.19 $ $Date: 2002/10/01 12:48:47 $ by $Author: wutka $ */public class DTDParser implements EntityExpansion{    protected Scanner scanner;    protected DTD dtd;    protected Object defaultLocation;/** Creates a parser that will read from the specified Reader object */    public DTDParser(Reader in)    {        scanner = new Scanner(in, false, this);        dtd = new DTD();    }/** Creates a parser that will read from the specified Reader object * @param in The input stream to read * @param trace True if the parser should print out tokens as it reads them *  (used for debugging the parser) */    public DTDParser(Reader in, boolean trace)    {        scanner = new Scanner(in, trace, this);        dtd = new DTD();    }/** Creates a parser that will read from the specified File object */    public DTDParser(File in)        throws IOException    {        defaultLocation = in.getParentFile();        scanner = new Scanner(new BufferedReader(new FileReader(in)),            false, this);        dtd = new DTD();    }/** Creates a parser that will read from the specified File object * @param in The file to read * @param trace True if the parser should print out tokens as it reads them *  (used for debugging the parser) */    public DTDParser(File in, boolean trace)        throws IOException    {        defaultLocation = in.getParentFile();        scanner = new Scanner(new BufferedReader(new FileReader(in)),            trace, this);        dtd = new DTD();    }/** Creates a parser that will read from the specified URL object */    public DTDParser(URL in)        throws IOException    {    //LAM: we need to set the defaultLocation to the directory where    //the dtd is found so that we don't run into problems parsing any    //relative external files referenced by the dtd.        String file = in.getFile();        defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));        scanner = new Scanner(new BufferedReader(            new InputStreamReader(in.openStream())), false, this);        dtd = new DTD();    }/** Creates a parser that will read from the specified URL object * @param in The URL to read * @param trace True if the parser should print out tokens as it reads them *  (used for debugging the parser) */    public DTDParser(URL in, boolean trace)        throws IOException    {    //LAM: we need to set the defaultLocation to the directory where    //the dtd is found so that we don't run into problems parsing any    //relative external files referenced by the dtd.        String file = in.getFile();        defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));        scanner = new Scanner(new BufferedReader(            new InputStreamReader(in.openStream())), trace, this);        dtd = new DTD();    }/** Parses the DTD file and returns a DTD object describing the DTD.    This invocation of parse does not try to guess the root element    (for efficiency reasons) */    public DTD parse()        throws IOException    {        return parse(false);    }/** Parses the DTD file and returns a DTD object describing the DTD. * @param guessRootElement If true, tells the parser to try to guess the          root element of the document by process of elimination */    public DTD parse(boolean guessRootElement)        throws IOException    {        Token token;        for (;;)        {            token = scanner.peek();            if (token.type == Scanner.EOF) break;            parseTopLevelElement();        }        if (guessRootElement)        {            Hashtable roots = new Hashtable();            Enumeration e = dtd.elements.elements();            while (e.hasMoreElements())            {                DTDElement element = (DTDElement) e.nextElement();                roots.put(element.name, element);            }            e = dtd.elements.elements();            while (e.hasMoreElements())            {                DTDElement element = (DTDElement) e.nextElement();                if (!(element.content instanceof DTDContainer)) continue;                Enumeration items = ((DTDContainer) element.content).                    getItemsVec().  elements();                while (items.hasMoreElements())                {                    removeElements(roots, dtd, (DTDItem) items.nextElement());                }            }            if (roots.size() == 1)            {                e = roots.elements();                dtd.rootElement = (DTDElement) e.nextElement();            }            else            {                dtd.rootElement = null;            }        }        else        {            dtd.rootElement = null;        }        return dtd;    }    protected void removeElements(Hashtable h, DTD dtd, DTDItem item)    {        if (item instanceof DTDName)        {            h.remove(((DTDName) item).value);        }        else if (item instanceof DTDContainer)        {            Enumeration e = ((DTDContainer) item).getItemsVec().elements();            while (e.hasMoreElements())            {                removeElements(h, dtd, (DTDItem) e.nextElement());            }        }    }    protected void parseTopLevelElement()        throws IOException    {        Token token = scanner.get();// Is <? xxx ?> even valid in a DTD?  I'll ignore it just in case it's there        if (token.type == Scanner.LTQUES)        {            StringBuffer textBuffer = new StringBuffer();            for (;;)            {                String text = scanner.getUntil('?');                textBuffer.append(text);                token = scanner.peek();                if (token.type == Scanner.GT)                {                    scanner.get();                    break;                }                textBuffer.append('?');            }            DTDProcessingInstruction instruct =                new DTDProcessingInstruction(textBuffer.toString());            dtd.items.addElement(instruct);            return;        }        else if (token.type == Scanner.CONDITIONAL)        {            token = expect(Scanner.IDENTIFIER);            if (token.value.equals("IGNORE"))            {                scanner.skipConditional();            }            else            {                if (token.value.equals("INCLUDE"))                {                    scanner.skipUntil('[');                }                else                {                    throw new DTDParseException(scanner.getUriId(),                        "Invalid token in conditional: "+token.value,                        scanner.getLineNumber(), scanner.getColumn());                }            }        }        else if (token.type == Scanner.ENDCONDITIONAL)        {            // Don't need to do anything for this token        }        else if (token.type == Scanner.COMMENT)        {            dtd.items.addElement(                new DTDComment(token.value));        }        else if (token.type == Scanner.LTBANG)        {            token = expect(Scanner.IDENTIFIER);            if (token.value.equals("ELEMENT"))            {                parseElement();            }            else if (token.value.equals("ATTLIST"))            {                parseAttlist();            }            else if (token.value.equals("ENTITY"))            {                parseEntity();            }            else if (token.value.equals("NOTATION"))            {                parseNotation();            }            else            {                skipUntil(Scanner.GT);            }        }        else        {// MAW Version 1.17// Previously, the parser would skip over unexpected tokens at the// upper level. Some invalid DTDs would still show up as valid.            throw new DTDParseException(scanner.getUriId(),                        "Unexpected token: "+ token.type.name+"("+token.value+")",                        scanner.getLineNumber(), scanner.getColumn());        }    }    protected void skipUntil(TokenType stopToken)        throws IOException    {        Token token = scanner.get();        while (token.type != stopToken)        {            token = scanner.get();        }    }    protected Token expect(TokenType expected)        throws IOException    {        Token token = scanner.get();        if (token.type != expected)        {            if (token.value == null)            {                throw new DTDParseException(scanner.getUriId(),                            "Expected "+expected.name+" instead of "+token.type.name,                            scanner.getLineNumber(), scanner.getColumn());            }            else            {                throw new DTDParseException(scanner.getUriId(),                            "Expected "+expected.name+                                " instead of "+ token.type.name+"("+token.value+")",                            scanner.getLineNumber(), scanner.getColumn());            }        }        return token;    }    protected void parseElement()        throws IOException    {        Token name = expect(Scanner.IDENTIFIER);        DTDElement element = (DTDElement) dtd.elements.get(name.value);        if (element == null)        {            element = new DTDElement(name.value);            dtd.elements.put(element.name, element);        }        else if (element.content != null)        {// 070501 MAW: Since the ATTLIST tag can also cause an element to be created,// only throw this exception if the element has content defined, which// won't happen when you just create an ATTLIST. Thanks to// Jags Krishnamurthy of Object Edge for pointing out this problem - // originally the parser would let you define an element more than once.            throw new DTDParseException(scanner.getUriId(),                "Found second definition of element: "+name.value,                        scanner.getLineNumber(), scanner.getColumn());        }        dtd.items.addElement(element);        parseContentSpec(scanner, element);        expect(Scanner.GT);    }    protected void parseContentSpec(Scanner scanner, DTDElement element)        throws IOException    {        Token token = scanner.get();        if (token.type == Scanner.IDENTIFIER)        {            if (token.value.equals("EMPTY"))            {                element.content = new DTDEmpty();            }            else if (token.value.equals("ANY"))            {                element.content = new DTDAny();            }            else            {                throw new DTDParseException(scanner.getUriId(),                    "Invalid token in entity content spec "+                        token.value,                        scanner.getLineNumber(), scanner.getColumn());            }        }        else if (token.type == Scanner.LPAREN)        {            token = scanner.peek();            if (token.type == Scanner.IDENTIFIER)            {                if (token.value.equals("#PCDATA"))                {                    parseMixed(element);                }                else                {                    parseChildren(element);                }            }            else if (token.type == Scanner.LPAREN)            {                parseChildren(element);            }        }    }    protected void parseMixed(DTDElement element)        throws IOException    {        // MAW Version 1.19        // Keep track of whether the mixed is #PCDATA only        // Don't allow * after (#PCDATA), but allow after        // (#PCDATA|foo|bar|baz)*        boolean isPcdataOnly = true;        DTDMixed mixed = new DTDMixed();        mixed.add(new DTDPCData());        scanner.get();        element.content = mixed;        for (;;)        {            Token token = scanner.get();            if (token.type == Scanner.RPAREN)            {                token = scanner.peek();                if (token.type == Scanner.ASTERISK)                {                    scanner.get();                    mixed.cardinal = DTDCardinal.ZEROMANY;                }                else                {                    if (!isPcdataOnly)                    {                        throw new DTDParseException(scanner.getUriId(),                                        "Invalid token in Mixed content type, '*' required after (#PCDATA|xx ...): "+                                        token.type.name, scanner.getLineNumber(), scanner.getColumn());                    }                    mixed.cardinal = DTDCardinal.NONE;                }                return;            }            else if (token.type == Scanner.PIPE)            {                token = scanner.get();                mixed.add(new DTDName(token.value));                // MAW Ver. 1.19                isPcdataOnly = false;            }            else            {                throw new DTDParseException(scanner.getUriId(),                                "Invalid token in Mixed content type: "+                                token.type.name, scanner.getLineNumber(), scanner.getColumn());            }        }    }    protected void parseChildren(DTDElement element)        throws IOException    {        DTDContainer choiceSeq = parseChoiceSequence();        Token token = scanner.peek();        choiceSeq.cardinal = parseCardinality();        if (token.type == Scanner.QUES)        {            choiceSeq.cardinal = DTDCardinal.OPTIONAL;        }        else if (token.type == Scanner.ASTERISK)        {            choiceSeq.cardinal = DTDCardinal.ZEROMANY;        }        else if (token.type == Scanner.PLUS)        {            choiceSeq.cardinal = DTDCardinal.ONEMANY;        }        else        {            choiceSeq.cardinal = DTDCardinal.NONE;        }        element.content = choiceSeq;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -