📄 csvtoxmlv1_1.lss.txt
字号:
'CSVtoXML Class:
Option Public
Option Explicit
Class CSVtoXML
'** The CSVtoXML class allows you to convert the contents of a CSV file
'** to XML. It is assumed that the first non-blank line in the file contains
'** header information with the names of all the fields/columns in the file.
'**
'** Julian Robichaux -- http://www.nsftools.com
'** version 1.1
'** August 9, 2004
'**
'** Release History
'** 1.1 (Aug 9, 2004)
'** -- added replaceBadTagNameChars method
'**
'** 1.0 (Aug 8, 2004)
'** -- initial release
Public delim As String
Public quoteChar As String
Public newLine As String
Public indent As String
Public topNodeName As String
Public childNodeName As String
Public xmlEncoding As String
Public Sub New ()
'** initialize the public members
delim = |,|
quoteChar = |"|
newLine = Chr(13) & Chr(10)
indent = " "
topNodeName = "csvtoxml"
childNodeName = "data"
xmlEncoding = "ISO-8859-1"
End Sub
Public Function convertFile (csvFileName As String, xmlFileName As String) As Integer
'** read a CSV file and output the contents to an XML file
'** (actually, it doesn't necessarily have to be comma-separated
'** values, since you can set whatever delimiter string you want;
'** however, most files of this type are delimited by commas, with
'** a small percentage delimited by Chr(9) tab characters)
Dim csvFile As Integer
Dim xmlFile As Integer
Dim lineText As String
Dim dataText As String
Dim headerArray As Variant
Dim dataArray As Variant
Dim i As Integer
Dim qcount As Integer
Dim pos As Integer
'** open the input and output files
csvFile = Freefile()
Open csvFileName For Input As csvFile
xmlFile = Freefile()
Open xmlFileName For Output As xmlFile
'** go through the csvFile line by line
Do While Not Eof(csvFile)
Line Input #csvFile, lineText
dataText = dataText & lineText
'** if there are an even number of quoteChars on this line, we can process it;
'** otherwise, there's probably just a linefeed embedded in a quoted line, and
'** we should skip the line processing and append the next line to this one
'** (see the rules for using quotes in the DelimSplit function)
qcount = 0
pos = Instr(1, dataText, quoteChar)
Do While (pos > 0)
qcount = qcount + 1
pos = Instr(pos + 1, dataText, quoteChar)
Loop
If (qcount Mod 2 = 1) And Not Eof(csvFile) Then
'** quoted line with embedded linefeeds: we'll need to loop and append
dataText = dataText & newline
Elseif (Len(Trim(dataText)) > 0) Then
'** if there's data in the buffer and the quote count is even (or we're at the
'** end of the file), process the data
If Not Isarray(headerArray) Then
'** if we don't have any header information yet, use the first non-blank line
'** as the header line with the list of column names
headerArray = DelimSplit(dataText, delim, quoteChar)
For i = 0 To Ubound(headerArray)
headerArray(i) = replaceBadTagNameChars(Trim(headerArray(i)))
If (headerArray(i) = "") Then
headerArray(i) = "untitled_" & i
End If
Next
Print #xmlFile, |<?xml version="1.0" encoding="| & xmlEncoding & |"?>|
Print #xmlFile, |<| & topNodeName & |>|
Else
'** if we've already captured the header information, this must be data
dataArray = DelimSplit(removeBadChars(dataText), delim, quoteChar)
Print #xmlFile, indent & |<| & childNodeName & |>|
For i = 0 To Ubound(dataArray)
If (i <= Ubound(headerArray)) Then
Print #xmlFile, indent & indent & |<| & headerArray(i) & |>|;
Print #xmlFile, dataArray(i);
Print #xmlFile, |</| & headerArray(i) & |>|
Else
Print #xmlFile, indent & indent & |<untitled_| & i & |>|;
Print #xmlFile, dataArray(i);
Print #xmlFile, |</untitled_| & i & |>|
End If
Next
Print #xmlFile, indent & |</| & childNodeName & |>|
End If
'** we processed this line, so reset dataText to ""
dataText = ""
End If
Loop
'** if we were able to capture any data at all, try to close the xml file properly
If Isarray(headerArray) Then
Print #xmlFile, |</| & topNodeName & |>|
End If
Close csvFile
Close xmlFile
convertFile = True
Exit Function
processError:
'** add your favorite error-handling routine here
Print "Error " & Err & " on line " & Erl & " parsing file " & csvFile & ": " & Error$
convertFile = False
Reset
Exit Function
End Function
Public Function delimSplit (fullString As String, delim As String, quoteChar As String) As Variant
'** Split a string at the specified delimiters, adjusting for delimiters that are
'** within quoted strings (you can use any quoteChar you want, but it's
'** virtually always going to be the " character)
'**
'** RULES FOR USING / INTERPRETING QUOTES
'** 1. If you want to encapsulate a data string in quotes, there should be
'** no whitespace before or after the quoted string, unless that string
'** is the first or last element on the line (in other words, there should
'** be no spaces or tabs or characters between the delimiter characters
'** that begin and end the string and the quotes that encapsulate it)
'** 2. If you want to include a literal quote character within a string, you need
'** to encapsulate the string in quotes (per #1 above) and use a pair of
'** quote characters for every single quote character you wish to represent
'** (for example, "" in a quoted string represents " )
'** 3. There should be no single instances of a quote character within a
'** quoted string, unless you are ending the quoted string with that character
'** 4. If you want to use the delimited character as a literal within a string,
'** you must encapsulate the string in quotes (per #1 above)
'**
'** You could also do this sort of thing with the Input # statement, but that would
'** require you to know the number and type of fields in each line in advance,
'** and you could run into problems if one of the lines doesn't have the proper
'** number of fields.
'**
'** This function uses the following ND6-specific functions: Split and Join
'** (write your own equivalents if you need this for R5)
Dim tempArray As Variant
Dim quotedArray As Variant
Dim returnArray As Variant
Dim emptyArray(0) As String
Dim count As Integer
Dim i As Integer, j As Integer
'** split the string along the delimiter
'** if there is no quote char, or it doesn't appear in the string, we're done
tempArray = Split(fullString, delim)
If (Len(quoteChar) = 0) Or (Instr(fullString, quoteChar) = 0) Then
delimSplit = tempArray
Exit Function
End If
'** initialize the temporary arrays
Redim finalArray(0) As String
quotedArray = emptyArray
'** start processing
For i = 0 To Ubound(tempArray)
quotedArray = Arrayappend(quotedArray, Split(tempArray(i), quoteChar))
If (Ubound(quotedArray) Mod 2 = 1) Or (i = Ubound(tempArray)) Then
'** ignore any single quoteChars at the beginning or end of the string,
'** and convert double quoteChars to single quoteChars
For j = 2 To (Ubound(quotedArray) - 1)
If (quotedArray(j) = "") And (quotedArray(j-1) <> quoteChar)Then
quotedArray(j) = quoteChar
End If
Next
'** add the string to the array that we'll return
Redim Preserve finalArray(0 To count)
finalArray(count) = Join(quotedArray, "")
count = count + 1
quotedArray = emptyArray
Else
quotedArray = Arrayappend(quotedArray, delim)
End If
Next
delimSplit = finalArray
End Function
Public Function removeBadChars (fullString As String) As String
'** remove the characters that commonly cause XML parsing errors
'** (there are much more generic ways to do this, but I don't feel like
'** writing something that generic right now)
Dim returnString As String
returnString = Replace(fullString, "&", "&")
returnString = Replace(returnString, "<", "<")
returnString = Replace(returnString, ">", ">")
removeBadChars = returnString
End Function
Public Function replaceBadTagNameChars (fullString As String) As String
'** remove some common characters that are illegal in XML tag names,
'** per:
'** http://www.w3.org/TR/REC-xml/#sec-starttags
'** Keep in mind that this is an incomplete list; for a more complete list,
'** compare against the character strings produced by the XMLChars
'** class at:
'** http://www.nsftools.com/tips/NotesTips.htm#xmlchars
Dim i As Integer
Dim charCode As Long
Dim returnString As String
returnString = fullString
For i = 1 To Len(returnString)
charCode = Uni(Mid$(returnString, i, 1))
If (charCode < 65) Or _
(charCode > 90 And charCode < 97) Or _
(charCode > 122 And charCode < 192) Then
If (i = 1) Then
Mid$(returnString, i, 1) = "_"
Elseif (Instr("0123456789.-:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -