📄 libxmlparser.pas
字号:
ptEndTag, // End Tag XmlSpec 3.1
ptContent, // Text Content between Tags
ptCData); // CDATA Section XmlSpec 2.7
TDtdElemType = // --- DTD Elements
(deElement, // !ELEMENT declaration
deAttList, // !ATTLIST declaration
deEntity, // !ENTITY declaration
deNotation, // !NOTATION declaration
dePI, // PI in DTD
deComment, // Comment in DTD
deError); // Error found in the DTD
type
TAttrList = class;
TEntityStack = class;
TNvpList = class;
TElemDef = class;
TElemList = class;
TEntityDef = class;
TNotationDef = class;
TDtdElementRec = record // --- This Record is returned by the DTD parser callback function
Start, Final: PChar; // Start/End of the Element's Declaration
case ElementType: TDtdElemType of // Type of the Element
deElement, // <!ELEMENT>
deAttList: (ElemDef: TElemDef); // <!ATTLIST>
deEntity: (EntityDef: TEntityDef); // <!ENTITY>
deNotation: (NotationDef: TNotationDef); // <!NOTATION>
dePI: (Target: PChar; // <?PI ?>
Content: PChar;
AttrList: TAttrList);
deError: (Pos: PChar); // Error
// deComment : ((No additional fields here)); // <!-- Comment -->
end;
TXmlParser = class // --- Internal Properties and Methods
protected
FBuffer: PChar; // NIL if there is no buffer available
FBufferSize: INTEGER; // 0 if the buffer is not owned by the Document instance
FSource: string; // Name of Source of document. Filename for Documents loaded with LoadFromFile
FXmlVersion: string; // XML version from Document header. Default is '1.0'
FEncoding: string; // Encoding from Document header. Default is 'UTF-8'
FStandalone: BOOLEAN; // Standalone declaration from Document header. Default is 'yes'
FRootName: string; // Name of the Root Element (= DTD name)
FDtdcFinal: PChar; // Pointer to the '>' character terminating the DTD declaration
FNormalize: BOOLEAN; // If true: Pack Whitespace and don't return empty contents
EntityStack: TEntityStack; // Entity Stack for Parameter and General Entities
FCurEncoding: string; // Current Encoding during parsing (always uppercase)
procedure AnalyzeProlog; // Analyze XML Prolog or Text Declaration
procedure AnalyzeComment(Start: PChar; var Final: PChar); // Analyze Comments
procedure AnalyzePI(Start: PChar; var Final: PChar); // Analyze Processing Instructions (PI)
procedure AnalyzeDtdc; // Analyze Document Type Declaration
procedure AnalyzeDtdElements(Start: PChar; var Final: PChar); // Analyze DTD declarations
procedure AnalyzeTag; // Analyze Start/End/Empty-Element Tags
procedure AnalyzeCData; // Analyze CDATA Sections
procedure AnalyzeText(var IsDone: BOOLEAN); // Analyze Text Content between Tags
procedure AnalyzeElementDecl(Start: PChar; var Final: PChar);
procedure AnalyzeAttListDecl(Start: PChar; var Final: PChar);
procedure AnalyzeEntityDecl(Start: PChar; var Final: PChar);
procedure AnalyzeNotationDecl(Start: PChar; var Final: PChar);
procedure PushPE(var Start: PChar);
procedure ReplaceCharacterEntities(var Str: string);
procedure ReplaceParameterEntities(var Str: string);
procedure ReplaceGeneralEntities(var Str: string);
function GetDocBuffer: PChar; // Returns FBuffer or a pointer to a NUL char if Buffer is empty
public // --- Document Properties
property XmlVersion: string read FXmlVersion; // XML version from the Document Prolog
property Encoding: string read FEncoding; // Document Encoding from Prolog
property Standalone: BOOLEAN read FStandalone; // Standalone Declaration from Prolog
property RootName: string read FRootName; // Name of the Root Element
property Normalize: BOOLEAN read FNormalize write FNormalize; // True if Content is to be normalized
property Source: string read FSource; // Name of Document Source (Filename)
property DocBuffer: PChar read GetDocBuffer; // Returns document buffer
public // --- DTD Objects
Elements: TElemList; // Elements: List of TElemDef (contains Attribute Definitions)
Entities: TNvpList; // General Entities: List of TEntityDef
ParEntities: TNvpList; // Parameter Entities: List of TEntityDef
Notations: TNvpList; // Notations: List of TNotationDef
public
constructor Create;
destructor Destroy; override;
// --- Document Handling
function LoadFromFile(Filename: string;
FileMode: INTEGER = fmOpenRead or fmShareDenyNone): BOOLEAN;
// Loads Document from given file
function LoadFromBuffer(Buffer: PChar): BOOLEAN; // Loads Document from another buffer
procedure SetBuffer(Buffer: PChar); // References another buffer
procedure Clear; // Clear Document
public
// --- Scanning through the document
CurPartType: TPartType; // Current Type
CurName: string; // Current Name
CurContent: string; // Current Normalized Content
CurStart: PChar; // Current First character
CurFinal: PChar; // Current Last character
CurAttr: TAttrList; // Current Attribute List
property CurEncoding: string read FCurEncoding; // Current Encoding
procedure StartScan;
function Scan: BOOLEAN;
// --- Events / Callbacks
function LoadExternalEntity(SystemId, PublicId,
Notation: string): TXmlParser; virtual;
function TranslateEncoding(const Source: string): string; virtual;
procedure DtdElementFound(DtdElementRec: TDtdElementRec); virtual;
end;
TValueType = // --- Attribute Value Type
(vtNormal, // Normal specified Attribute
vtImplied, // #IMPLIED attribute value
vtFixed, // #FIXED attribute value
vtDefault); // Attribute value from default value in !ATTLIST declaration
TAttrDefault = // --- Attribute Default Type
(adDefault, // Normal default value
adRequired, // #REQUIRED attribute
adImplied, // #IMPLIED attribute
adFixed); // #FIXED attribute
TAttrType = // --- Type of attribute
(atUnknown, // Unknown type
atCData, // Character data only
atID, // ID
atIdRef, // ID Reference
atIdRefs, // Several ID References, separated by Whitespace
atEntity, // Name of an unparsed Entity
atEntities, // Several unparsed Entity names, separated by Whitespace
atNmToken, // Name Token
atNmTokens, // Several Name Tokens, separated by Whitespace
atNotation, // A selection of Notation names (Unparsed Entity)
atEnumeration); // Enumeration
TElemType = // --- Element content type
(etEmpty, // Element is always empty
etAny, // Element can have any mixture of PCDATA and any elements
etChildren, // Element must contain only elements
etMixed); // Mixed PCDATA and elements
(*$IFDEF HAS_CONTNRS_UNIT *)
TObjectList = Contnrs.TObjectList; // Re-Export this identifier
(*$ELSE *)
TObjectList = class(TList)
destructor Destroy; override;
procedure Delete(Index: INTEGER);
procedure Clear; override;
end;
(*$ENDIF *)
TNvpNode = class // Name-Value Pair Node
Name: string;
Value: string;
constructor Create(TheName: string = ''; TheValue: string = '');
end;
TNvpList = class(TObjectList) // Name-Value Pair List
procedure Add(Node: TNvpNode);
function Node(Name: string): TNvpNode; overload;
function Node(Index: INTEGER): TNvpNode; overload;
function Value(Name: string): string; overload;
function Value(Index: INTEGER): string; overload;
function Name(Index: INTEGER): string;
end;
TAttr = class(TNvpNode) // Attribute of a Start-Tag or Empty-Element-Tag
ValueType: TValueType;
AttrType: TAttrType;
end;
TAttrList = class(TNvpList) // List of Attributes
procedure Analyze(Start: PChar; var Final: PChar);
end;
TEntityStack = class(TObjectList) // Stack where current position is stored before parsing entities
protected
Owner: TXmlParser;
public
constructor Create(TheOwner: TXmlParser);
procedure Push(LastPos: PChar); overload;
procedure Push(Instance: TObject; LastPos: PChar); overload;
function Pop: PChar; // Returns next char or NIL if EOF is reached. Frees Instance.
end;
TAttrDef = class(TNvpNode) // Represents a <!ATTLIST Definition. "Value" is the default value
TypeDef: string; // Type definition from the DTD
Notations: string; // Notation List, separated by pipe symbols '|'
AttrType: TAttrType; // Attribute Type
DefaultType: TAttrDefault; // Default Type
end;
TElemDef = class(TNvpList) // Represents a <!ELEMENT Definition. Is a list of TAttrDef-Nodes
Name: string; // Element name
ElemType: TElemType; // Element type
Definition: string; // Element definition from DTD
end;
TElemList = class(TObjectList) // List of TElemDef nodes
function Node(Name: string): TElemDef;
procedure Add(Node: TElemDef);
end;
TEntityDef = class(TNvpNode) // Represents a <!ENTITY Definition.
SystemId: string;
PublicId: string;
NotationName: string;
end;
TNotationDef = class(TNvpNode) // Represents a <!NOTATION Definition. Value is the System ID
PublicId: string;
end;
TCharset = set of CHAR;
const
CWhitespace = [#32, #9, #13, #10]; // Whitespace characters (XmlSpec 2.3)
CLetter = [#$41..#$5A, #$61..#$7A, #$C0..#$D6, #$D8..#$F6, #$F8..#$FF];
CDigit = [#$30..#$39];
CNameChar = CLetter + CDigit + ['.', '-', '_', ':', #$B7];
CNameStart = CLetter + ['_', ':'];
CQuoteChar = ['"', ''''];
CPubidChar = [#32, ^M, ^J, #9, 'a'..'z', 'A'..'Z', '0'..'9',
'-', '''', '(', ')', '+', ',', '.', '/', ':',
'=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];
CDStart = '<![CDATA[';
CDEnd = ']]>';
// --- Name Constants for the above enumeration types
CPartType_Name: array[TPartType] of string =
('', 'XML Prolog', 'Comment', 'PI',
'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag',
'Text', 'CDATA');
CValueType_Name: array[TValueType] of string = ('Normal', 'Implied', 'Fixed', 'Default');
CAttrDefault_Name: array[TAttrDefault] of string = ('Default', 'Required', 'Implied', 'Fixed');
CElemType_Name: array[TElemType] of string = ('Empty', 'Any', 'Childs only', 'Mixed');
CAttrType_Name: array[TAttrType] of string = ('Unknown', 'CDATA',
'ID', 'IDREF', 'IDREFS',
'ENTITY', 'ENTITIES',
'NMTOKEN', 'NMTOKENS',
'Notation', 'Enumeration');
function ConvertWs(Source: string; PackWs: BOOLEAN): string; // Convert WS to spaces #x20
procedure SetStringSF(var S: string; BufferStart, BufferFinal: PChar); // SetString by Start/Final of buffer
function StrSFPas(Start, Finish: PChar): string; // Convert buffer part to Pascal string
function TrimWs(Source: string): string; // Trim Whitespace
function AnsiToUtf8(Source: ANSISTRING): string; // Convert Win-1252 to UTF-8
function Utf8ToAnsi(Source: string; UnknownChar: CHAR = '
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -