Figure 1 SAX Interface Descriptions
Interface |
Description |
ContentHandler |
Primary SAX interface that models the Infoset's core information items |
ErrorHandler |
Models fatal errors, errors, and warnings (as per XML 1.0) |
DTDHandler |
Models unparsed entities and notations |
EntityResolver |
Allows an application to perform custom resolution of external entity identifiers |
LexicalHandler |
Models noncore lexical information (comments, CDATA sections, entity references, and so on) |
DeclHandler |
Models element and attribute declarations |
XMLReader |
Makes it possible to tie the previously listed interfaces together in order to process a complete document information item |
Attributes |
Models a collection of attributes |
Locator |
Provides contextual information about the caller |
Figure 2 SAX Interface Names
Java Language |
MSXML COM and C++ |
MSXML COM and Visual Basic |
ContentHandler |
ISAXContentHandler |
IVBSAXContentHandler |
ErrorHandler |
ISAXErrorHandler |
IVBSAXErrorHandler |
DTDHandler |
ISAXDTDHandler |
IVBSAXDTDHandler |
EntityResolver |
ISAXEntityResolver |
IVBSAXEntityResolver |
LexicalHandler |
ISAXLexicalHandler |
IVBSAXLexicalHandler |
DeclHandler |
ISAXDeclHandler |
IVBSAXDeclHandler |
XMLReader |
ISAXXMLReader |
IVBSAXXMLReader |
Attributes |
ISAXAttributes |
IVBSAXAttributes |
Locator |
ISAXLocator |
IVBSAXLocator |
Figure 3 ContentHandler Members
Method |
Description |
startDocument |
Signals the beginning of a document. |
endDocument |
Signals the end of a document. |
startElement |
Signals the beginning of an element. |
endElement |
Signals the end of an element. |
startPrefixMapping |
Signals the beginning of a prefix-URI Namespace mapping scope. |
endPrefixMapping |
Signals the end of a prefix URI mapping scope. |
Characters |
Signals character data. |
ignorableWhitespace |
Signals ignorable whitespace in element content. This method is not called in the current implementation because the parser is nonvalidating. |
processingInstruction |
Signals a processing instruction. |
skippedEntity |
Signals a skipped entity. |
Property |
Description |
DocumentLocator |
Receives a Locator interface reference, which provides methods for returning the column number, line number, PublicID, or SystemID from the caller for a current item. |
Figure 4 Using a Locator Object
Public Sub GenerateXML(ch as IVBSAXContentHandler)
Dim atts as New CVBSAXAttributesImpl
Dim loc as New CVBSAXLocatorImpl
set ch.documentLocator = loc
loc.setLineNumber 0
loc.setColumnNumber 0
loc.setSystemId 'https://www.develop.com/foo.xml'
loc.setPublicId ''
ch.startDocument
' the ch implementation can look at the Locator
' information during any method call
ch.endDocument
End Sub
Figure 5 CSerializer Excerpt
' CSerializer is a generic XML 1.0 serializer for IVBSAXContentHandler
Implements IVBSAXContentHandler
' some helper methods omitted...
•••
' IVBSAXContentHandler method implementations
Private Sub IVBSAXContentHandler_characters(ByVal strChars As String)
appendXML strChars
End Sub
Private Property Set IVBSAXContentHandler_documentLocator(ByVal loc _
As MSXML2.IVBSAXLocator)
Set m_locator = loc
End Property
Private Sub IVBSAXContentHandler_endDocument()
End Sub
Private Sub IVBSAXContentHandler_endElement(ByVal strNamespaceURI As _
String, ByVal strLocalName As String, ByVal strQName As String)
appendXML "</" & strQName & ">"
End Sub
Private Sub IVBSAXContentHandler_endPrefixMapping(ByVal strPrefix As _
String)
End Sub
Private Sub IVBSAXContentHandler_ignorableWhitespace(ByVal strChars _
As String)
appendXML strChars
End Sub
Private Sub IVBSAXContentHandler_processingInstruction(ByVal strTarget _
As String, ByVal strData As String)
appendXML "<?" & strTarget & " " & strData & "?>"
End Sub
Private Sub IVBSAXContentHandler_skippedEntity(ByVal strName As String)
MsgBox "###warning: skipped entity: " & strName
printLocation
End Sub
' intialize state
Private Sub IVBSAXContentHandler_startDocument()
resetNSDecls
End Sub
' serialize the element's start tag including attributes & namespace decls
Private Sub IVBSAXContentHandler_startElement(ByVal strNamespaceURI _
As String, ByVal strLocalName As String, ByVal strQName As _
String, ByVal oAttributes As MSXML2.IVBSAXAttributes)
' begin tag
appendXML "<" & strQName
' serialize attributes
Dim i As Integer
For i = 0 To oAttributes.length - 1
appendXML " " & oAttributes.getQName(i) & "='" & _
oAttributes.getValue(i) & "'"
Next
' serialize namespace decls
If Not m_currentNSDecls Is Nothing Then
Dim nsdecl As CNSMapping
For Each nsdecl In m_currentNSDecls
If nsdecl.prefix = "" Then
appendXML " xmlns='" & nsdecl.uri & "'"
Else
appendXML " xmlns:" & nsdecl.prefix & "='" & nsdecl.uri & _
"'"
End If
Next
End If
resetNSDecls
' close the start tag
appendXML ">"
End Sub
' save any namespace declarations so they can be serialized in the next
' startElement call
Private Sub IVBSAXContentHandler_startPrefixMapping(ByVal strPrefix As _
String, ByVal strURI As String)
Dim nsdecl As CNSMapping
Set nsdecl = New CNSMapping
nsdecl.prefix = strPrefix
nsdecl.uri = strURI
m_currentNSDecls.Add nsdecl
End Sub
Figure 6 startElement
Private Sub IVBSAXContentHandler_startElement(ByVal _
strNamespaceURI As String, ByVal strLocalName As String, _
ByVal strQName As String, ByVal oAttributes As _
MSXML2.IVBSAXAttributes)
If strNamespaceURI = "urn:www-develop-com:invoices" Then
Select Case strLocalName
Case "Invoice"
m_ctx.Push stateInvoice
Set m_inv = New CInvoice
Case "InvoiceID"
m_ctx.Push stateInvoiceID
Case "CustomerName"
m_ctx.Push stateCustomerName
•••
End Select
End If
End Sub
Figure 7 CDeserializer Implementation
Option Explicit
Implements IVBSAXContentHandler
' reference to deserialized CInvoice instance
Dim m_inv As CInvoice
' current state
Dim m_curSku As String
Dim m_curDesc As String
Dim m_curPrice As String
' stack of state identifiers
Dim m_ctx As CStack
' state identifiers
Private Enum InvoiceStates
stateInvoice = 1
stateInvoiceID
stateCustomerName
stateLineItems
stateLineItem
stateSku
stateDescription
statePrice
stateUnknown
End Enum
' returns the generated CInvoice instance
Public Function getInvoice() As CInvoice
Set getInvoice = m_inv
End Function
Private Sub Class_Initialize()
Set m_ctx = New CStack ' stack for managing element states
End Sub
Private Sub IVBSAXContentHandler_characters(ByVal strChars As String)
' populate the CInvoice instance with the appropriate data
If Not m_inv Is Nothing Then
Select Case m_ctx.Peek()
Case stateInvoiceID
m_inv.invoiceID = strChars
Case stateCustomerName
m_inv.customerName = strChars
Case stateSku
m_curSku = strChars
Case stateDescription
m_curDesc = strChars
Case statePrice
m_curPrice = strChars
m_inv.addItem m_curSku, m_curDesc, CDbl(m_curPrice)
Case Else
' do nothing
End Select
End If
End Sub
' manage state machine in startElement/endElement
Private Sub IVBSAXContentHandler_startElement(ByVal strNamespaceURI As _
String, ByVal strLocalName As String, ByVal strQName As String, _
ByVal oAttributes As MSXML2.IVBSAXAttributes)
If strNamespaceURI = "urn:www-develop-com:invoices" Then
' if we're processing the right document (per namespace) then
' push the state identifier onto stack for the current element
Select Case strLocalName
Case "Invoice"
m_ctx.Push stateInvoice
Set m_inv = New CInvoice
Case "InvoiceID"
m_ctx.Push stateInvoiceID
Case "CustomerName"
m_ctx.Push stateCustomerName
Case "LineItems"
m_ctx.Push stateLineItems
Case "LineItem"
m_ctx.Push stateLineItem
Case "Sku"
m_ctx.Push stateSku
Case "Description"
m_ctx.Push stateDescription
Case "Price"
m_ctx.Push statePrice
Case Else
m_ctx.Push stateUnknown
End Select
End If
End Sub
Private Sub IVBSAXContentHandler_endElement(ByVal strNamespaceURI As _
String, ByVal strLocalName As String, ByVal strQName As String)
If strNamespaceURI = "urn:www-develop-com:invoices" Then
' if we're processing the right document (per namespace)
' pop the current element state off stack
m_ctx.Pop
' if a LineItem element, reset LineItem context variables
If strLocalName = "LineItem" Then
m_curSku = ""
m_curDesc = ""
m_curPrice = ""
End If
End If
End Sub
Private Sub IVBSAXContentHandler_startDocument()
Set m_inv = Nothing
End Sub
Private Sub IVBSAXContentHandler_startPrefixMapping(ByVal strPrefix _
As String, ByVal strURI As String)
End Sub
Private Sub IVBSAXContentHandler_endPrefixMapping(ByVal strPrefix _
As String)
End Sub
Private Sub IVBSAXContentHandler_ignorableWhitespace(ByVal strChars _
As String)
End Sub
Private Sub IVBSAXContentHandler_processingInstruction(ByVal strTarget _
As String, ByVal strData As String)
End Sub
Private Sub IVBSAXContentHandler_skippedEntity(ByVal strName As String)
End Sub
Private Sub IVBSAXContentHandler_endDocument()
End Sub
Private Property Set IVBSAXContentHandler_documentLocator(ByVal RHS _
As MSXML2.IVBSAXLocator)
End Property
Figure 9 IVBSAXERRORHANDLER Implementation
Implements IVBSAXContentHandler
Implements IVBSAXErrorHandler
Private Sub IVBSAXErrorHandler_error(ByVal oLocator As _
MSXML2.IVBSAXLocator, ByVal strError As String, _
ByVal nErrorCode As Long)
processError oLocator, strError, nErrorCode
End Sub
Private Sub IVBSAXErrorHandler_fatalError(ByVal oLocator As _
MSXML2.IVBSAXLocator, ByVal strError As String, _
ByVal nErrorCode As Long)
processError oLocator, strError, nErrorCode
End Sub
Private Sub IVBSAXErrorHandler_warning(ByVal oLocator As _
MSXML2.IVBSAXLocator, ByVal strError As String, _
ByVal nErrorCode As Long)
logWarning oLocator, strError, nErrorCode
End Sub
' IVBSAXContentHandler methods omitted for clarity
Figure 10 Creating a SAX Pipeline
' builds a SAX pipeline of 3 filters (CVBSAXXMLFilterImpl) and the
' VBSAXXMLReader class
Private Sub cmdTestXMLFilters_Click()
' clear TextBox
txtResult = ""
Dim reader As IVBSAXXMLReader
Dim filter1 As CVBSAXXMLFilterImpl, filter2 As CVBSAXXMLFilterImpl
Dim filter3 As CVBSAXXMLFilterImpl
Dim xmlFilter1 As IVBSAXXMLFilter, xmlFilter2 As IVBSAXXMLFilter
Dim xmlFilter3 As IVBSAXXMLFilter
Dim txtStream As Scripting.TextStream
Set txtStream = createTextStream("c:\temp\out.txt")
' create the VBSAXXMLReader30 implementation of IVBSAXXMLReader
Set reader = New VBSAXXMLReader30
' create the first filter & set its message
Set filter1 = New CVBSAXXMLFilterImpl
Set xmlFilter1 = filter1
filter1.setFilterID "FILTER 1"
filter1.setOutputStream txtStream
' hook first filter up to VBSAXXMLReader30 instance
Set xmlFilter1.Parent = reader
' create the second filter & set its message
Set filter2 = New CVBSAXXMLFilterImpl
Set xmlFilter2 = filter2
filter2.setFilterID "FILTER 2"
filter2.setOutputStream txtStream
' hook second filter up to first filter
Set xmlFilter2.Parent = filter1
' create the second filter & set its message
Set filter3 = New CVBSAXXMLFilterImpl
Set xmlFilter3 = filter3
filter3.setFilterID "FILTER 3"
filter3.setOutputStream txtStream
' hook third filter up to second filter
Set xmlFilter3.Parent = filter2
' the pipeline now looks like this:
' filter3 <-> filter2 <-> filter1 <-> reader
' create an IVBSAXContentHandler implementation (CSerializer)
' and register with the last filter in the pipeline
Set reader = filter3
Dim ser As CSerializer
Set ser = New CSerializer
ser.cacheXML = True
Set reader.contentHandler = ser
reader.putProperty "https://xml.org/sax/properties" & _
"/lexical-handler", ser
' parse and display output
reader.parseURL txtFileName.Text
txtResult = "Output from filter pipeline written to..." & _
" & vbNewLine & "c:\temp\out.txt" & vbNewLine & vbNewLine
txtResult = txtResult & ser.getSerializedXML()
txtStream.Close
End Sub
Figure 11 Flow of Method Calls