⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 libxmlparser.pas

📁 Delphi VCL Component Pack
💻 PAS
📖 第 1 页 / 共 2 页
字号:
      ptEndTag, // End Tag                     XmlSpec 3.1
      ptContent, // Text Content between Tags
      ptCData); // CDATA Section               XmlSpec 2.7

   TDtdElemType = // --- DTD Elements
      (deElement, // !ELEMENT declaration
      deAttList, // !ATTLIST declaration
      deEntity, // !ENTITY declaration
      deNotation, // !NOTATION declaration
      dePI, // PI in DTD
      deComment, // Comment in DTD
      deError); // Error found in the DTD

type
   TAttrList = class;
   TEntityStack = class;
   TNvpList = class;
   TElemDef = class;
   TElemList = class;
   TEntityDef = class;
   TNotationDef = class;

   TDtdElementRec = record // --- This Record is returned by the DTD parser callback function
      Start, Final: PChar; // Start/End of the Element's Declaration
      case ElementType: TDtdElemType of // Type of the Element
         deElement, // <!ELEMENT>
            deAttList: (ElemDef: TElemDef); // <!ATTLIST>
         deEntity: (EntityDef: TEntityDef); // <!ENTITY>
         deNotation: (NotationDef: TNotationDef); // <!NOTATION>
         dePI: (Target: PChar; // <?PI ?>
            Content: PChar;
            AttrList: TAttrList);
         deError: (Pos: PChar); // Error
                       // deComment : ((No additional fields here));   // <!-- Comment -->
   end;

   TXmlParser = class // --- Internal Properties and Methods
   protected
      FBuffer: PChar; // NIL if there is no buffer available
      FBufferSize: INTEGER; // 0 if the buffer is not owned by the Document instance
      FSource: string; // Name of Source of document. Filename for Documents loaded with LoadFromFile

      FXmlVersion: string; // XML version from Document header. Default is '1.0'
      FEncoding: string; // Encoding from Document header. Default is 'UTF-8'
      FStandalone: BOOLEAN; // Standalone declaration from Document header. Default is 'yes'
      FRootName: string; // Name of the Root Element (= DTD name)
      FDtdcFinal: PChar; // Pointer to the '>' character terminating the DTD declaration

      FNormalize: BOOLEAN; // If true: Pack Whitespace and don't return empty contents
      EntityStack: TEntityStack; // Entity Stack for Parameter and General Entities
      FCurEncoding: string; // Current Encoding during parsing (always uppercase)

      procedure AnalyzeProlog; // Analyze XML Prolog or Text Declaration
      procedure AnalyzeComment(Start: PChar; var Final: PChar); // Analyze Comments
      procedure AnalyzePI(Start: PChar; var Final: PChar); // Analyze Processing Instructions (PI)
      procedure AnalyzeDtdc; // Analyze Document Type Declaration
      procedure AnalyzeDtdElements(Start: PChar; var Final: PChar); // Analyze DTD declarations
      procedure AnalyzeTag; // Analyze Start/End/Empty-Element Tags
      procedure AnalyzeCData; // Analyze CDATA Sections
      procedure AnalyzeText(var IsDone: BOOLEAN); // Analyze Text Content between Tags
      procedure AnalyzeElementDecl(Start: PChar; var Final: PChar);
      procedure AnalyzeAttListDecl(Start: PChar; var Final: PChar);
      procedure AnalyzeEntityDecl(Start: PChar; var Final: PChar);
      procedure AnalyzeNotationDecl(Start: PChar; var Final: PChar);

      procedure PushPE(var Start: PChar);
      procedure ReplaceCharacterEntities(var Str: string);
      procedure ReplaceParameterEntities(var Str: string);
      procedure ReplaceGeneralEntities(var Str: string);

      function GetDocBuffer: PChar; // Returns FBuffer or a pointer to a NUL char if Buffer is empty

   public // --- Document Properties
      property XmlVersion: string read FXmlVersion; // XML version from the Document Prolog
      property Encoding: string read FEncoding; // Document Encoding from Prolog
      property Standalone: BOOLEAN read FStandalone; // Standalone Declaration from Prolog
      property RootName: string read FRootName; // Name of the Root Element
      property Normalize: BOOLEAN read FNormalize write FNormalize; // True if Content is to be normalized
      property Source: string read FSource; // Name of Document Source (Filename)
      property DocBuffer: PChar read GetDocBuffer; // Returns document buffer
   public // --- DTD Objects
      Elements: TElemList; // Elements: List of TElemDef (contains Attribute Definitions)
      Entities: TNvpList; // General Entities: List of TEntityDef
      ParEntities: TNvpList; // Parameter Entities: List of TEntityDef
      Notations: TNvpList; // Notations: List of TNotationDef
   public
      constructor Create;
      destructor Destroy; override;

                 // --- Document Handling
      function LoadFromFile(Filename: string;
         FileMode: INTEGER = fmOpenRead or fmShareDenyNone): BOOLEAN;
                                                                          // Loads Document from given file
      function LoadFromBuffer(Buffer: PChar): BOOLEAN; // Loads Document from another buffer
      procedure SetBuffer(Buffer: PChar); // References another buffer
      procedure Clear; // Clear Document

   public
                 // --- Scanning through the document
      CurPartType: TPartType; // Current Type
      CurName: string; // Current Name
      CurContent: string; // Current Normalized Content
      CurStart: PChar; // Current First character
      CurFinal: PChar; // Current Last character
      CurAttr: TAttrList; // Current Attribute List
      property CurEncoding: string read FCurEncoding; // Current Encoding
      procedure StartScan;
      function Scan: BOOLEAN;

                 // --- Events / Callbacks
      function LoadExternalEntity(SystemId, PublicId,
         Notation: string): TXmlParser; virtual;
      function TranslateEncoding(const Source: string): string; virtual;
      procedure DtdElementFound(DtdElementRec: TDtdElementRec); virtual;
   end;

   TValueType = // --- Attribute Value Type
      (vtNormal, // Normal specified Attribute
      vtImplied, // #IMPLIED attribute value
      vtFixed, // #FIXED attribute value
      vtDefault); // Attribute value from default value in !ATTLIST declaration

   TAttrDefault = // --- Attribute Default Type
      (adDefault, // Normal default value
      adRequired, // #REQUIRED attribute
      adImplied, // #IMPLIED attribute
      adFixed); // #FIXED attribute

   TAttrType = // --- Type of attribute
      (atUnknown, // Unknown type
      atCData, // Character data only
      atID, // ID
      atIdRef, // ID Reference
      atIdRefs, // Several ID References, separated by Whitespace
      atEntity, // Name of an unparsed Entity
      atEntities, // Several unparsed Entity names, separated by Whitespace
      atNmToken, // Name Token
      atNmTokens, // Several Name Tokens, separated by Whitespace
      atNotation, // A selection of Notation names (Unparsed Entity)
      atEnumeration); // Enumeration

   TElemType = // --- Element content type
      (etEmpty, // Element is always empty
      etAny, // Element can have any mixture of PCDATA and any elements
      etChildren, // Element must contain only elements
      etMixed); // Mixed PCDATA and elements

  (*$IFDEF HAS_CONTNRS_UNIT *)
   TObjectList = Contnrs.TObjectList; // Re-Export this identifier
  (*$ELSE *)
   TObjectList = class(TList)
      destructor Destroy; override;
      procedure Delete(Index: INTEGER);
      procedure Clear; override;
   end;
  (*$ENDIF *)

   TNvpNode = class // Name-Value Pair Node
      Name: string;
      Value: string;
      constructor Create(TheName: string = ''; TheValue: string = '');
   end;

   TNvpList = class(TObjectList) // Name-Value Pair List
      procedure Add(Node: TNvpNode);
      function Node(Name: string): TNvpNode; overload;
      function Node(Index: INTEGER): TNvpNode; overload;
      function Value(Name: string): string; overload;
      function Value(Index: INTEGER): string; overload;
      function Name(Index: INTEGER): string;
   end;

   TAttr = class(TNvpNode) // Attribute of a Start-Tag or Empty-Element-Tag
      ValueType: TValueType;
      AttrType: TAttrType;
   end;

   TAttrList = class(TNvpList) // List of Attributes
      procedure Analyze(Start: PChar; var Final: PChar);
   end;

   TEntityStack = class(TObjectList) // Stack where current position is stored before parsing entities
   protected
      Owner: TXmlParser;
   public
      constructor Create(TheOwner: TXmlParser);
      procedure Push(LastPos: PChar); overload;
      procedure Push(Instance: TObject; LastPos: PChar); overload;
      function Pop: PChar; // Returns next char or NIL if EOF is reached. Frees Instance.
   end;

   TAttrDef = class(TNvpNode) // Represents a <!ATTLIST Definition. "Value" is the default value
      TypeDef: string; // Type definition from the DTD
      Notations: string; // Notation List, separated by pipe symbols '|'
      AttrType: TAttrType; // Attribute Type
      DefaultType: TAttrDefault; // Default Type
   end;

   TElemDef = class(TNvpList) // Represents a <!ELEMENT Definition. Is a list of TAttrDef-Nodes
      Name: string; // Element name
      ElemType: TElemType; // Element type
      Definition: string; // Element definition from DTD
   end;

   TElemList = class(TObjectList) // List of TElemDef nodes
      function Node(Name: string): TElemDef;
      procedure Add(Node: TElemDef);
   end;

   TEntityDef = class(TNvpNode) // Represents a <!ENTITY Definition.
      SystemId: string;
      PublicId: string;
      NotationName: string;
   end;

   TNotationDef = class(TNvpNode) // Represents a <!NOTATION Definition. Value is the System ID
      PublicId: string;
   end;

   TCharset = set of CHAR;

const
   CWhitespace = [#32, #9, #13, #10]; // Whitespace characters (XmlSpec 2.3)
   CLetter = [#$41..#$5A, #$61..#$7A, #$C0..#$D6, #$D8..#$F6, #$F8..#$FF];
   CDigit = [#$30..#$39];
   CNameChar = CLetter + CDigit + ['.', '-', '_', ':', #$B7];
   CNameStart = CLetter + ['_', ':'];
   CQuoteChar = ['"', ''''];
   CPubidChar = [#32, ^M, ^J, #9, 'a'..'z', 'A'..'Z', '0'..'9',
      '-', '''', '(', ')', '+', ',', '.', '/', ':',
      '=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];

   CDStart = '<![CDATA[';
   CDEnd = ']]>';

  // --- Name Constants for the above enumeration types
   CPartType_Name: array[TPartType] of string =
   ('', 'XML Prolog', 'Comment', 'PI',
      'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag',
      'Text', 'CDATA');
   CValueType_Name: array[TValueType] of string = ('Normal', 'Implied', 'Fixed', 'Default');
   CAttrDefault_Name: array[TAttrDefault] of string = ('Default', 'Required', 'Implied', 'Fixed');
   CElemType_Name: array[TElemType] of string = ('Empty', 'Any', 'Childs only', 'Mixed');
   CAttrType_Name: array[TAttrType] of string = ('Unknown', 'CDATA',
      'ID', 'IDREF', 'IDREFS',
      'ENTITY', 'ENTITIES',
      'NMTOKEN', 'NMTOKENS',
      'Notation', 'Enumeration');

function ConvertWs(Source: string; PackWs: BOOLEAN): string; // Convert WS to spaces #x20
procedure SetStringSF(var S: string; BufferStart, BufferFinal: PChar); // SetString by Start/Final of buffer
function StrSFPas(Start, Finish: PChar): string; // Convert buffer part to Pascal string
function TrimWs(Source: string): string; // Trim Whitespace

function AnsiToUtf8(Source: ANSISTRING): string; // Convert Win-1252 to UTF-8
function Utf8ToAnsi(Source: string; UnknownChar: CHAR = '

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -