⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 libxmlparser.pas

📁 用AIML语言开发的一款中文自动聊天软件
💻 PAS
📖 第 1 页 / 共 2 页
字号:
                  ptEndTag,          // End Tag                     XmlSpec 3.1
                  ptContent,         // Text Content between Tags
                  ptCData);          // CDATA Section               XmlSpec 2.7

  TDtdElemType = // --- DTD Elements
                 (deElement,         // !ELEMENT declaration
                  deAttList,         // !ATTLIST declaration
                  deEntity,          // !ENTITY declaration
                  deNotation,        // !NOTATION declaration
                  dePI,              // PI in DTD
                  deComment,         // Comment in DTD
                  deError);          // Error found in the DTD

TYPE
  TAttrList    = CLASS;
  TEntityStack = CLASS;
  TNvpList     = CLASS;
  TElemDef     = CLASS;
  TElemList    = CLASS;
  TEntityDef   = CLASS;
  TNotationDef = CLASS;

  TDtdElementRec = RECORD    // --- This Record is returned by the DTD parser callback function
                     Start, Final : PChar;                             // Start/End of the Element's Declaration
                     CASE ElementType : TDtdElemType OF                // Type of the Element
                       deElement,                                      // <!ELEMENT>
                       deAttList  : (ElemDef      : TElemDef);         // <!ATTLIST>
                       deEntity   : (EntityDef    : TEntityDef);       // <!ENTITY>
                       deNotation : (NotationDef  : TNotationDef);     // <!NOTATION>
                       dePI       : (Target       : PChar;             // <?PI ?>
                                     Content      : PChar;
                                     AttrList     : TAttrList);
                       deError    : (Pos          : PChar);            // Error
                       // deComment : ((No additional fields here));   // <!-- Comment -->
                   END;

  TXmlParser = CLASS                             // --- Internal Properties and Methods
               PROTECTED
                 FBuffer      : PChar;           // NIL if there is no buffer available
                 FBufferSize  : INTEGER;         // 0 if the buffer is not owned by the Document instance
                 FSource      : STRING;          // Name of Source of document. Filename for Documents loaded with LoadFromFile

                 FXmlVersion  : STRING;          // XML version from Document header. Default is '1.0'
                 FEncoding    : STRING;          // Encoding from Document header. Default is 'UTF-8'
                 FStandalone  : BOOLEAN;         // Standalone declaration from Document header. Default is 'yes'
                 FRootName    : STRING;          // Name of the Root Element (= DTD name)
                 FDtdcFinal   : PChar;           // Pointer to the '>' character terminating the DTD declaration

                 FNormalize   : BOOLEAN;         // If true: Pack Whitespace and don't return empty contents
                 EntityStack  : TEntityStack;    // Entity Stack for Parameter and General Entities
                 FCurEncoding : STRING;          // Current Encoding during parsing (always uppercase)

                 PROCEDURE AnalyzeProlog;                                         // Analyze XML Prolog or Text Declaration
                 PROCEDURE AnalyzeComment (Start : PChar; VAR Final : PChar);     // Analyze Comments
                 PROCEDURE AnalyzePI      (Start : PChar; VAR Final : PChar);     // Analyze Processing Instructions (PI)
                 PROCEDURE AnalyzeDtdc;                                           // Analyze Document Type Declaration
                 PROCEDURE AnalyzeDtdElements (Start : PChar; VAR Final : PChar); // Analyze DTD declarations
                 PROCEDURE AnalyzeTag;                                            // Analyze Start/End/Empty-Element Tags
                 PROCEDURE AnalyzeCData;                                          // Analyze CDATA Sections
                 PROCEDURE AnalyzeText (VAR IsDone : BOOLEAN);                    // Analyze Text Content between Tags
                 PROCEDURE AnalyzeElementDecl  (Start : PChar; VAR Final : PChar);
                 PROCEDURE AnalyzeAttListDecl  (Start : PChar; VAR Final : PChar);
                 PROCEDURE AnalyzeEntityDecl   (Start : PChar; VAR Final : PChar);
                 PROCEDURE AnalyzeNotationDecl (Start : PChar; VAR Final : PChar);

                 PROCEDURE PushPE (VAR Start : PChar);
                 PROCEDURE ReplaceCharacterEntities (VAR Str : STRING);
                 PROCEDURE ReplaceParameterEntities (VAR Str : STRING);
                 PROCEDURE ReplaceGeneralEntities   (VAR Str : STRING);

                 FUNCTION GetDocBuffer : PChar;  // Returns FBuffer or a pointer to a NUL char if Buffer is empty

               PUBLIC                         // --- Document Properties
                 PROPERTY XmlVersion : STRING  READ FXmlVersion;                 // XML version from the Document Prolog
                 PROPERTY Encoding   : STRING  READ FEncoding;                   // Document Encoding from Prolog
                 PROPERTY Standalone : BOOLEAN READ FStandalone;                 // Standalone Declaration from Prolog
                 PROPERTY RootName   : STRING  READ FRootName;                   // Name of the Root Element
                 PROPERTY Normalize  : BOOLEAN READ FNormalize WRITE FNormalize; // True if Content is to be normalized
                 PROPERTY Source     : STRING  READ FSource;                     // Name of Document Source (Filename)
                 PROPERTY DocBuffer  : PChar   READ GetDocBuffer;                // Returns document buffer
               PUBLIC                         // --- DTD Objects
                 Elements    : TElemList;     // Elements: List of TElemDef (contains Attribute Definitions)
                 Entities    : TNvpList;      // General Entities: List of TEntityDef
                 ParEntities : TNvpList;      // Parameter Entities: List of TEntityDef
                 Notations   : TNvpList;      // Notations: List of TNotationDef
               PUBLIC
                 CONSTRUCTOR Create;
                 DESTRUCTOR Destroy;                                      OVERRIDE;

                 // --- Document Handling
                 FUNCTION  LoadFromFile   (Filename : STRING;
                                           FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN;
                                                                          // Loads Document from given file
                 FUNCTION  LoadFromBuffer (Buffer : PChar) : BOOLEAN;     // Loads Document from another buffer
                 PROCEDURE SetBuffer      (Buffer : PChar);               // References another buffer
                 PROCEDURE Clear;                                         // Clear Document

               PUBLIC
                 // --- Scanning through the document
                 CurPartType : TPartType;                         // Current Type
                 CurName     : STRING;                            // Current Name
                 CurContent  : STRING;                            // Current Normalized Content
                 CurStart    : PChar;                             // Current First character
                 CurFinal    : PChar;                             // Current Last character
                 CurAttr     : TAttrList;                         // Current Attribute List
                 PROPERTY CurEncoding : STRING READ FCurEncoding; // Current Encoding
                 PROCEDURE StartScan;
                 FUNCTION  Scan : BOOLEAN;

                 // --- Events / Callbacks
                 FUNCTION  LoadExternalEntity (SystemId, PublicId,
                                               Notation : STRING) : TXmlParser;     VIRTUAL;
                 FUNCTION  TranslateEncoding  (CONST Source : STRING) : STRING;     VIRTUAL;
                 PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec);        VIRTUAL;
               END;

  TValueType   = // --- Attribute Value Type
                 (vtNormal,       // Normal specified Attribute
                  vtImplied,      // #IMPLIED attribute value
                  vtFixed,        // #FIXED attribute value
                  vtDefault);     // Attribute value from default value in !ATTLIST declaration

  TAttrDefault = // --- Attribute Default Type
                 (adDefault,      // Normal default value
                  adRequired,     // #REQUIRED attribute
                  adImplied,      // #IMPLIED attribute
                  adFixed);       // #FIXED attribute

  TAttrType    = // --- Type of attribute
                 (atUnknown,      // Unknown type
                  atCData,        // Character data only
                  atID,           // ID
                  atIdRef,        // ID Reference
                  atIdRefs,       // Several ID References, separated by Whitespace
                  atEntity,       // Name of an unparsed Entity
                  atEntities,     // Several unparsed Entity names, separated by Whitespace
                  atNmToken,      // Name Token
                  atNmTokens,     // Several Name Tokens, separated by Whitespace
                  atNotation,     // A selection of Notation names (Unparsed Entity)
                  atEnumeration); // Enumeration

  TElemType    = // --- Element content type
                 (etEmpty,        // Element is always empty
                  etAny,          // Element can have any mixture of PCDATA and any elements
                  etChildren,     // Element must contain only elements
                  etMixed);       // Mixed PCDATA and elements

  (*$IFDEF HAS_CONTNRS_UNIT *)
  TObjectList = Contnrs.TObjectList;    // Re-Export this identifier
  (*$ELSE *)
  TObjectList = CLASS (TList)
                  DESTRUCTOR Destroy; OVERRIDE;
                  PROCEDURE Delete (Index : INTEGER);
                  PROCEDURE Clear; OVERRIDE;
                END;
  (*$ENDIF *)

  TNvpNode  = CLASS                     // Name-Value Pair Node
                 Name  : STRING;
                 Value : STRING;
                 CONSTRUCTOR Create (TheName : STRING = ''; TheValue : STRING = '');
              END;

  TNvpList  = CLASS (TObjectList)       // Name-Value Pair List
                PROCEDURE Add   (Node  : TNvpNode);
                FUNCTION  Node  (Name  : STRING)  : TNvpNode;  OVERLOAD;
                FUNCTION  Node  (Index : INTEGER) : TNvpNode;  OVERLOAD;
                FUNCTION  Value (Name  : STRING)  : STRING;    OVERLOAD;
                FUNCTION  Value (Index : INTEGER) : STRING;    OVERLOAD;
                FUNCTION  Name  (Index : INTEGER) : STRING;
              END;

  TAttr     = CLASS (TNvpNode)          // Attribute of a Start-Tag or Empty-Element-Tag
                 ValueType : TValueType;
                 AttrType  : TAttrType;
               END;

  TAttrList = CLASS (TNvpList)          // List of Attributes
                PROCEDURE Analyze (Start : PChar; VAR Final : PChar);
              END;

  TEntityStack = CLASS (TObjectList)    // Stack where current position is stored before parsing entities
                 PROTECTED
                   Owner : TXmlParser;
                 PUBLIC
                   CONSTRUCTOR Create (TheOwner : TXmlParser);
                   PROCEDURE Push (LastPos : PChar);                      OVERLOAD;
                   PROCEDURE Push (Instance : TObject; LastPos : PChar);  OVERLOAD;
                   FUNCTION  Pop : PChar;         // Returns next char or NIL if EOF is reached. Frees Instance.
                 END;

  TAttrDef    = CLASS (TNvpNode)        // Represents a <!ATTLIST Definition. "Value" is the default value
                  TypeDef     : STRING;           // Type definition from the DTD
                  Notations   : STRING;           // Notation List, separated by pipe symbols '|'
                  AttrType    : TAttrType;        // Attribute Type
                  DefaultType : TAttrDefault;     // Default Type
                END;

  TElemDef    = CLASS (TNvpList)       // Represents a <!ELEMENT Definition. Is a list of TAttrDef-Nodes
                  Name       : STRING;            // Element name
                  ElemType   : TElemType;         // Element type
                  Definition : STRING;            // Element definition from DTD
                END;

  TElemList   = CLASS (TObjectList)    // List of TElemDef nodes
                  FUNCTION  Node (Name : STRING) : TElemDef;
                  PROCEDURE Add (Node : TElemDef);
                END;

  TEntityDef  = CLASS (TNvpNode)       // Represents a <!ENTITY Definition.
                  SystemId     : STRING;
                  PublicId     : STRING;
                  NotationName : STRING;
                END;

  TNotationDef = CLASS (TNvpNode)      // Represents a <!NOTATION Definition. Value is the System ID
                   PublicId : STRING;
                 END;

  TCharset = SET OF CHAR;


CONST
  CWhitespace   = [#32, #9, #13, #10];                // Whitespace characters (XmlSpec 2.3)
  CLetter       = [#$41..#$5A, #$61..#$7A, #$C0..#$D6, #$D8..#$F6, #$F8..#$FF];
  CDigit        = [#$30..#$39];
  CNameChar     = CLetter + CDigit + ['.', '-', '_', ':', #$B7];
  CNameStart    = CLetter + ['_', ':'];
  CQuoteChar    = ['"', ''''];
  CPubidChar    = [#32, ^M, ^J, #9, 'a'..'z', 'A'..'Z', '0'..'9',
                   '-', '''', '(', ')', '+', ',', '.', '/', ':',
                   '=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];

  CDStart       = '<![CDATA[';
  CDEnd         = ']]>';

  // --- Name Constants for the above enumeration types
  CPartType_Name : ARRAY [TPartType] OF STRING =
                   ('', 'XML Prolog', 'Comment', 'PI',
                    'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag',
                    'Text', 'CDATA');
  CValueType_Name   : ARRAY [TValueType]    OF STRING = ('Normal', 'Implied', 'Fixed', 'Default');
  CAttrDefault_Name : ARRAY [TAttrDefault]  OF STRING = ('Default', 'Required', 'Implied', 'Fixed');
  CElemType_Name    : ARRAY [TElemType]     OF STRING = ('Empty', 'Any', 'Childs only', 'Mixed');
  CAttrType_Name    : ARRAY [TAttrType]     OF STRING = ('Unknown', 'CDATA',
                                                         'ID', 'IDREF', 'IDREFS',
                                                         'ENTITY', 'ENTITIES',
                                                         'NMTOKEN', 'NMTOKENS',
                                                         'Notation', 'Enumeration');

FUNCTION  ConvertWs   (Source: STRING; PackWs: BOOLEAN) : STRING;          // Convert WS to spaces #x20
PROCEDURE SetStringSF (VAR S : STRING; BufferStart, BufferFinal : PChar);  // SetString by Start/Final of buffer
FUNCTION  StrSFPas    (Start, Finish : PChar) : STRING;                    // Convert buffer part to Pascal string
FUNCTION  TrimWs      (Source : STRING) : STRING;                          // Trim Whitespace

FUNCTION  AnsiToUtf8  (Source : ANSISTRING) : STRING;                            // Convert Win-1252 to UTF-8
FUNCTION  Utf8ToAnsi  (Source : STRING; UnknownChar : CHAR = '

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -