~singpolyma/haskell-libxml-sax

5946d5acee5b9c3abf0d03bd8369412240b159c8 — John Millikin 13 years ago 4a743c0 + ba5bd1f
Version 0.7
9 files changed, 1743 insertions(+), 424 deletions(-)

D Text/XML/LibXML/SAX.chs
A Text/XML/LibXML/SAX.hs
A hslibxml-shim.c
A hslibxml-shim.h
M libxml-sax.cabal
A scripts/common.bash
A scripts/run-tests
A tests/Properties.hs
A tests/libxml-sax-tests.cabal
D Text/XML/LibXML/SAX.chs => Text/XML/LibXML/SAX.chs +0 -419
@@ 1,419 0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module: Text.XML.LibXML.SAX
-- Copyright: 2010 John Millikin
-- License: MIT
--
-- Maintainer: jmillikin@gmail.com
-- Portability: portable
--
-- Bindings for the libXML2 SAX interface
--
-----------------------------------------------------------------------------

{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE RankNTypes #-}
module Text.XML.LibXML.SAX
	(
	-- * Parser
	  Parser
	, newParserIO
	, newParserST
	
	-- ** Callbacks
	, Callback
	, setCallback
	, clearCallback
	
	, parsedBeginDocument
	, parsedEndDocument
	, parsedBeginElement
	, parsedEndElement
	, parsedCharacters
	, parsedComment
	, parsedInstruction
	, parsedDoctype
	
	-- *** Buffer-based callbacks
	, parsedCharactersBuffer
	, parsedCommentBuffer
	
	-- ** Parser input
	, parseText
	, parseLazyText
	, parseBytes
	, parseLazyBytes
	, parseBuffer
	, parseComplete
	) where
import qualified Control.Exception as E
import Control.Monad (when, unless)
import qualified Control.Monad.ST as ST
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as BU
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.XML.Types as X
import Data.IORef (IORef, newIORef, readIORef, writeIORef)
import Foreign hiding (free)
import Foreign.C
import qualified Foreign.Concurrent as FC

#include <libxml/parser.h>
#include <string.h>

{# pointer *xmlParserCtxt as ParserHandle newtype #}

data Parser m = Parser
	{ parserHandle :: ForeignPtr ParserHandle
	, parserCallbacks :: ForeignPtr ()
	, parserErrorRef :: IORef (Maybe E.SomeException)
	, parserOnError :: T.Text -> m ()
	, parserToIO :: forall a. m a -> IO a
	, parserFromIO :: forall a. IO a -> m a
	}

newParserIO :: (T.Text -> IO ()) -- ^ An error handler, called if parsing fails
            -> Maybe T.Text -- ^ An optional filename or URI
            -> IO (Parser IO)
newParserIO onError filename = E.block $ do
	ref <- newIORef Nothing
	
	ParserHandle handlePtr <-
		maybeWith withUTF8 filename $ \cFilename ->
		allocaBytes {# sizeof xmlSAXHandler #} $ \sax -> do
		void $ {# call memset #} sax 0 {# sizeof xmlSAXHandler #}
		{#set xmlSAXHandler->initialized #} sax xmlSax2Magic
		{#call xmlCreatePushParserCtxt #} sax nullPtr nullPtr 0 cFilename
	
	sax <- {#get xmlParserCtxt->sax #} handlePtr
	cCallbacks <- FC.newForeignPtr sax $ freeParserCallbacks sax
	
	{#set xmlParserCtxt->replaceEntities #} handlePtr 1
	
	parserFP <- newForeignPtr cParserFree handlePtr
	return $ Parser parserFP cCallbacks ref onError id id

newParserST :: (T.Text -> ST.ST s ()) -- ^ An error handler, called if parsing fails
            -> Maybe T.Text -- ^ An optional filename or URI
            -> ST.ST s (Parser (ST.ST s))
newParserST onError filename = ST.unsafeIOToST $ do
	p <- newParserIO (\_ -> return ()) filename
	return $ p
		{ parserToIO = ST.unsafeSTToIO
		, parserFromIO = ST.unsafeIOToST
		, parserOnError = onError
		}

freeParserCallbacks :: Ptr () -> IO ()
freeParserCallbacks raw = do
	{# get xmlSAXHandler->startElementNs #} raw >>= freeFunPtr
	{# get xmlSAXHandler->endElementNs #} raw >>= freeFunPtr
	{# get xmlSAXHandler->characters #} raw >>= freeFunPtr
	{# get xmlSAXHandler->comment #} raw >>= freeFunPtr
	{# get xmlSAXHandler->processingInstruction #} raw >>= freeFunPtr

foreign import ccall "libxml/parser.h &xmlFreeParserCtxt"
	cParserFree :: FunPtr (Ptr ParserHandle -> IO ())

-- | A callback should return 'True' to continue parsing, or 'False'
-- to cancel.
--
data Callback m a = Callback (Parser m -> a -> IO ()) (Parser m -> IO ())

setCallback :: Parser m -> Callback m a -> a -> m ()
setCallback p (Callback set _) io = parserFromIO p $ set p io

clearCallback :: Parser m -> Callback m a -> m ()
clearCallback p (Callback _ clear) = parserFromIO p $ clear p

catchRef :: Parser m -> m Bool -> IO ()
catchRef p io = do
	continue <- E.catch (E.unblock (parserToIO p io)) $ \e -> do
		writeIORef (parserErrorRef p) $ Just e
		return False
	unless continue $ withParserIO p $ {# call xmlStopParser #}

callback :: (Parser m -> a -> IO (FunPtr b))
         -> (Ptr () -> IO (FunPtr b))
         -> (Ptr () -> FunPtr b -> IO ())
         -> Callback m a
callback wrap getPtr setPtr = Callback set clear where
	set parser io = withForeignPtr (parserCallbacks parser) $ \p -> do
		free p
		wrap parser io >>= setPtr p
	clear parser = withForeignPtr (parserCallbacks parser) $ \p -> do
		free p
		setPtr p nullFunPtr
	free p = getPtr p >>= freeFunPtr

-- Callback wrappers
type CUString = Ptr CUChar

type Callback0 = Ptr () -> IO ()

type StartElementNsSAX2Func = (Ptr () -> CUString -> CUString
                               -> CUString -> CInt -> Ptr CUString -> CInt
                               -> CInt -> Ptr CUString -> IO ())
type EndElementNsSAX2Func = (Ptr () -> CUString -> CUString -> CUString
                             -> IO ())
type CharactersSAXFunc = (Ptr () -> CUString -> CInt -> IO ())

type CommentSAXFunc = Ptr () -> CUString -> IO ()

type ProcessingInstructionSAXFunc = Ptr () -> CUString -> CUString -> IO ()

type ExternalSubsetSAXFunc = Ptr () -> CUString -> CUString -> CUString -> IO ()

foreign import ccall "wrapper"
	allocCallback0 :: Callback0 -> IO (FunPtr Callback0)

foreign import ccall "wrapper"
	allocCallbackBeginElement :: StartElementNsSAX2Func -> IO (FunPtr StartElementNsSAX2Func)

foreign import ccall "wrapper"
	allocCallbackEndElement :: EndElementNsSAX2Func -> IO (FunPtr EndElementNsSAX2Func)

foreign import ccall "wrapper"
	allocCallbackCharacters :: CharactersSAXFunc -> IO (FunPtr CharactersSAXFunc)

foreign import ccall "wrapper"
	allocCallbackComment :: CommentSAXFunc -> IO (FunPtr CommentSAXFunc)

foreign import ccall "wrapper"
	allocCallbackInstruction :: ProcessingInstructionSAXFunc -> IO (FunPtr ProcessingInstructionSAXFunc)

foreign import ccall "wrapper"
	allocCallbackExternalSubset :: ExternalSubsetSAXFunc -> IO (FunPtr ExternalSubsetSAXFunc)

-- localname, prefix, namespace, value_begin, value_end
data CAttribute = CAttribute CString CString CString CString CString

splitCAttributes :: CInt -> Ptr CString -> IO [CAttribute]
splitCAttributes = loop 0 where
	loop _      0 _     = return []
	loop offset n attrs = do
		c_ln <- peekElemOff attrs (offset + 0)
		c_prefix <- peekElemOff attrs (offset + 1)
		c_ns <- peekElemOff attrs (offset + 2)
		c_vbegin <- peekElemOff attrs (offset + 3)
		c_vend <- peekElemOff attrs (offset + 4)
		as <- loop (offset + 5) (n - 1) attrs
		return (CAttribute c_ln c_prefix c_ns c_vbegin c_vend : as)

convertCAttribute :: CAttribute -> IO X.Attribute
convertCAttribute (CAttribute c_ln c_pfx c_ns c_vbegin c_vend) = do
	ln <- peekUTF8 c_ln
	pfx <- maybePeek peekUTF8 c_pfx
	ns <- maybePeek peekUTF8 c_ns
	val <- peekUTF8Len (c_vbegin, minusPtr c_vend c_vbegin)
	return (X.Attribute (X.Name ln ns pfx) [X.ContentText val])

-- Exposed callbacks

wrapCallback0 :: Parser m -> m Bool -> IO (FunPtr Callback0)
wrapCallback0 p io = allocCallback0 $ \_ -> catchRef p io

parsedBeginDocument :: Callback m (m Bool)
parsedBeginDocument = callback wrapCallback0
	{# get xmlSAXHandler->startDocument #}
	{# set xmlSAXHandler->startDocument #}

parsedEndDocument :: Callback m (m Bool)
parsedEndDocument = callback wrapCallback0
	{# get xmlSAXHandler->endDocument #}
	{# set xmlSAXHandler->endDocument #}

wrapBeginElement :: Parser m -> (X.Name -> [X.Attribute] -> m Bool)
                 -> IO (FunPtr StartElementNsSAX2Func)
wrapBeginElement p io =
	allocCallbackBeginElement $ \_ cln cpfx cns _ _ n_attrs _ raw_attrs ->
	catchRef p $ parserFromIO p $ do
		ns <- maybePeek peekUTF8 $ castPtr cns
		pfx <- maybePeek peekUTF8 $ castPtr cpfx
		ln <- peekUTF8 $ castPtr cln
		c_attrs <- splitCAttributes n_attrs (castPtr raw_attrs)
		attrs <- mapM convertCAttribute c_attrs
		parserToIO p $ io (X.Name ln ns pfx) attrs

parsedBeginElement :: Callback m (X.Name -> [X.Attribute] -> m Bool)
parsedBeginElement = callback wrapBeginElement
	{# get xmlSAXHandler->startElementNs #}
	{# set xmlSAXHandler->startElementNs #}

wrapEndElement :: Parser m -> (X.Name -> m Bool)
               -> IO (FunPtr EndElementNsSAX2Func)
wrapEndElement p io =
	allocCallbackEndElement $ \_ cln cpfx cns ->
	catchRef p $ parserFromIO p $ do
		ns <- maybePeek peekUTF8 $ castPtr cns
		pfx <- maybePeek peekUTF8 $ castPtr cpfx
		ln <- peekUTF8 $ castPtr cln
		parserToIO p $ io $ X.Name ln ns pfx

parsedEndElement :: Callback m (X.Name -> m Bool)
parsedEndElement = callback wrapEndElement
	{# get xmlSAXHandler->endElementNs #}
	{# set xmlSAXHandler->endElementNs #}

wrapCharacters :: Parser m -> (T.Text -> m Bool)
               -> IO (FunPtr CharactersSAXFunc)
wrapCharacters p io =
	allocCallbackCharacters $ \_ cstr clen ->
	catchRef p $ parserFromIO p $ do
		text <- peekStrictUTF8Len (castPtr cstr, fromIntegral clen)
		parserToIO p $ io text

parsedCharacters :: Callback m (T.Text -> m Bool)
parsedCharacters = callback wrapCharacters
	{# get xmlSAXHandler->characters #}
	{# set xmlSAXHandler->characters #}

wrapComment :: Parser m -> (T.Text -> m Bool)
            -> IO (FunPtr CommentSAXFunc)
wrapComment p io =
	allocCallbackComment $ \_ cstr ->
	catchRef p $ parserFromIO p $ do
		text <- peekStrictUTF8 (castPtr cstr)
		parserToIO p $ io text

parsedComment :: Callback m (T.Text -> m Bool)
parsedComment = callback wrapComment
	{# get xmlSAXHandler->comment #}
	{# set xmlSAXHandler->comment #}

wrapInstruction :: Parser m -> (X.Instruction -> m Bool)
                -> IO (FunPtr ProcessingInstructionSAXFunc)
wrapInstruction p io =
	allocCallbackInstruction $ \_ ctarget cdata ->
	catchRef p $ parserFromIO p $ do
		target <- peekUTF8 (castPtr ctarget)
		value <- peekUTF8 (castPtr cdata)
		parserToIO p $ io $ X.Instruction target value

parsedInstruction :: Callback m (X.Instruction -> m Bool)
parsedInstruction = callback wrapInstruction
	{# get xmlSAXHandler->processingInstruction #}
	{# set xmlSAXHandler->processingInstruction #}

wrapExternalSubset :: Parser m -> (X.Doctype -> m Bool) -> IO (FunPtr ExternalSubsetSAXFunc)
wrapExternalSubset p io =
	allocCallbackExternalSubset $ \_ cname cpublic csystem ->
	catchRef p $ parserFromIO p $ do
		name <- peekUTF8 (castPtr cname)
		public <- maybePeek peekUTF8 (castPtr cpublic)
		system <- maybePeek peekUTF8 (castPtr csystem)
		let external = case (public, system) of
			(Nothing, Just s) -> Just $ X.SystemID s
			(Just p', Just s) -> Just $ X.PublicID p' s
			_ -> Nothing
		parserToIO p $ io $ X.Doctype name external []

parsedDoctype :: Callback m (X.Doctype -> m Bool)
parsedDoctype = callback wrapExternalSubset
	{# get xmlSAXHandler->externalSubset #}
	{# set xmlSAXHandler->externalSubset #}

wrapCharactersBuffer :: Parser m -> ((Ptr Word8, Integer) -> m Bool)
                     -> IO (FunPtr CharactersSAXFunc)
wrapCharactersBuffer p io =
	allocCallbackCharacters $ \_ cstr clen ->
	catchRef p $ do
		io (castPtr cstr, fromIntegral clen)

parsedCharactersBuffer :: Callback m ((Ptr Word8, Integer) -> m Bool)
parsedCharactersBuffer = callback wrapCharactersBuffer
	{# get xmlSAXHandler->characters #}
	{# set xmlSAXHandler->characters #}

wrapCommentBuffer :: Parser m -> ((Ptr Word8, Integer) -> m Bool)
            -> IO (FunPtr CommentSAXFunc)
wrapCommentBuffer p io =
	allocCallbackComment $ \_ cstr ->
	catchRef p $ parserFromIO p $ do
		clen <- {# call xmlStrlen #} cstr
		parserToIO p $ io (castPtr cstr, fromIntegral clen)

parsedCommentBuffer :: Callback m ((Ptr Word8, Integer) -> m Bool)
parsedCommentBuffer = callback wrapCommentBuffer
	{# get xmlSAXHandler->comment #}
	{# set xmlSAXHandler->comment #}

withParserIO :: Parser m -> (ParserHandle -> IO a) -> IO a
withParserIO p io = withForeignPtr (parserHandle p) $ io . ParserHandle

parseImpl :: Parser m -> (ParserHandle -> IO CInt) -> m ()
parseImpl p io = parserFromIO p $ do
	writeIORef (parserErrorRef p) Nothing
	rc <- E.block $ withParserIO p io
	touchForeignPtr $ parserCallbacks p
	
	threw <- readIORef $ parserErrorRef p
	case threw of
		Nothing -> return ()
		Just exc -> E.throwIO exc
	
	when (rc /= 0) $ do
		err <- getParseError p
		parserToIO p $ parserOnError p $ err

parseText :: Parser m -> T.Text -> m ()
parseText p = parseBytes p . TE.encodeUtf8

parseLazyText :: Parser m -> TL.Text -> m ()
parseLazyText p = parseText p . T.concat . TL.toChunks

parseBytes :: Parser m -> B.ByteString -> m ()
parseBytes p bytes = parseImpl p $ \h ->
	BU.unsafeUseAsCStringLen bytes $ \(cstr, len) ->
	{# call xmlParseChunk #} h cstr (fromIntegral len) 0

parseLazyBytes :: Parser m -> BL.ByteString -> m ()
parseLazyBytes p = parseBytes p . B.concat . BL.toChunks

parseBuffer :: Parser m -> (Ptr Word8, Integer) -> m ()
parseBuffer p (ptr, len) = parseImpl p $ \h ->
	{# call xmlParseChunk #} h (castPtr ptr) (fromIntegral len) 0

-- | Finish parsing any buffered data, and check that the document was
-- closed correctly.
-- 
parseComplete :: Parser m -> m ()
parseComplete p = parseImpl p $ \h ->
	{#call xmlParseChunk #} h nullPtr 0 1

getParseError :: Parser m -> IO T.Text
getParseError p = withParserIO p $ \h -> do
	let ParserHandle h' = h
	errInfo <- {#call xmlCtxtGetLastError #} $ castPtr h'
	peekStrictUTF8 =<< {#get xmlError->message #} errInfo
	

peekStrictUTF8 :: CString -> IO T.Text
peekStrictUTF8 = fmap (TE.decodeUtf8) . B.packCString

peekStrictUTF8Len :: CStringLen -> IO T.Text
peekStrictUTF8Len = fmap (TE.decodeUtf8) . B.packCStringLen

peekUTF8 :: CString -> IO TL.Text
peekUTF8 = fmap (fromStrict . TE.decodeUtf8) . B.packCString

peekUTF8Len :: CStringLen -> IO TL.Text
peekUTF8Len = fmap (fromStrict . TE.decodeUtf8) . B.packCStringLen

withUTF8 :: T.Text -> (CString -> IO a) -> IO a
withUTF8 = B.useAsCString . TE.encodeUtf8

fromStrict :: T.Text -> TL.Text
fromStrict t = TL.fromChunks [t]

freeFunPtr :: FunPtr a -> IO ()
freeFunPtr ptr = if ptr == nullFunPtr
	then return ()
	else freeHaskellFunPtr ptr

-- XML_SAX2_MAGIC
xmlSax2Magic :: CUInt
xmlSax2Magic = 0xDEEDBEAF

A Text/XML/LibXML/SAX.hs => Text/XML/LibXML/SAX.hs +724 -0
@@ 0,0 1,724 @@
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE RankNTypes #-}

-----------------------------------------------------------------------------
-- |
-- Module: Text.XML.LibXML.SAX
-- Copyright: 2010 John Millikin
-- License: MIT
--
-- Maintainer: jmillikin@gmail.com
-- Portability: portable
--
-- Bindings for the libXML2 SAX interface
--
-----------------------------------------------------------------------------

module Text.XML.LibXML.SAX
	(
	-- * Parser
	  Parser
	, newParserIO
	, newParserST
	
	-- ** Parser input
	, parseBytes
	, parseComplete
	
	-- * Callbacks
	, Callback
	, setCallback
	, clearCallback
	
	-- ** Parse events
	, parsedBeginDocument
	, parsedEndDocument
	, parsedBeginElement
	, parsedEndElement
	, parsedCharacters
	, parsedReference
	, parsedComment
	, parsedInstruction
	, parsedCDATA
	, parsedWhitespace
	, parsedInternalSubset
	, parsedExternalSubset
	
	-- ** Warning and error reporting
	, reportWarning
	, reportError
	
	) where

import qualified Control.Exception as E
import           Control.Monad (when, unless)
import qualified Control.Monad.ST as ST
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as BU
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.XML.Types as X
import           Data.IORef (IORef, newIORef, readIORef, writeIORef)
import           Foreign hiding (free, void)
import           Foreign.C
import qualified Foreign.Concurrent as FC
import           Text.ParserCombinators.ReadP ((+++))
import qualified Text.ParserCombinators.ReadP as ReadP

data Context = Context

-- | A 'Parser' tracks the internal state of a LibXML parser context.
--
-- As LibXML is a very stateful library, parsers must operate within either
-- the 'IO' or 'ST.ST' monad. Use 'newParserIO' or 'newParserST' to create
-- parsers in the appropriate monad.
--
-- In general, clients should prefer 'newParserST', because ST values can be
-- safely computed with no side effects.
data Parser m = Parser
	{ parserHandle :: ForeignPtr Context
	, parserErrorRef :: IORef (Maybe E.SomeException)
	, parserToIO :: forall a. m a -> IO a
	, parserFromIO :: forall a. IO a -> m a
	}

newParserIO :: Maybe T.Text -- ^ An optional filename or URI
            -> IO (Parser IO)
newParserIO filename = E.block $ do
	ref <- newIORef Nothing
	
	raw <- maybeWith withUTF8 filename cAllocParser
	managed <- newForeignPtr_ raw
	
	FC.addForeignPtrFinalizer managed (cFreeParser raw)
	FC.addForeignPtrFinalizer managed (freeCallbacks raw)
	
	return (Parser managed ref id id)

newParserST :: Maybe T.Text -- ^ An optional filename or URI
            -> ST.ST s (Parser (ST.ST s))
newParserST filename = ST.unsafeIOToST $ do
	p <- newParserIO filename
	return $ p
		{ parserToIO = ST.unsafeSTToIO
		, parserFromIO = ST.unsafeIOToST
		}

parseImpl :: Parser m -> (Ptr Context -> IO a) -> m ()
parseImpl p io = parserFromIO p $ do
	writeIORef (parserErrorRef p) Nothing
	E.block (void (withParserIO p io))
	
	threw <- readIORef (parserErrorRef p)
	case threw of
		Nothing -> return ()
		Just exc -> E.throwIO exc

parseBytes :: Parser m -> B.ByteString -> m ()
parseBytes p bytes = parseImpl p $ \h ->
	BU.unsafeUseAsCStringLen bytes $ \(cstr, len) ->
	cParseChunk h cstr (fromIntegral len) 0

-- | Finish parsing any buffered data, and check that the document was
-- closed correctly.
-- 
parseComplete :: Parser m -> m ()
parseComplete p = parseImpl p (\h -> cParseChunk h nullPtr 0 1)

-- Callbacks {{{

freeCallbacks :: Ptr Context -> IO ()
freeCallbacks ctx = do
	getcb_startDocument ctx >>= freeFunPtr
	getcb_endDocument ctx >>= freeFunPtr
	getcb_startElementNs ctx >>= freeFunPtr
	getcb_endElementNs ctx >>= freeFunPtr
	getcb_characters ctx >>= freeFunPtr
	getcb_reference ctx >>= freeFunPtr
	getcb_comment ctx >>= freeFunPtr
	getcb_processingInstruction ctx >>= freeFunPtr
	getcb_cdataBlock ctx >>= freeFunPtr
	getcb_ignorableWhitespace ctx >>= freeFunPtr
	getcb_internalSubset ctx >>= freeFunPtr
	getcb_externalSubset ctx >>= freeFunPtr
	getcb_warning ctx >>= freeFunPtr
	getcb_error ctx >>= freeFunPtr

data Callback m a = Callback (Parser m -> a -> IO ()) (Parser m -> IO ())

-- | Set a callback computation to run when a particular parse event occurs.
-- The callback should return 'True' to continue parsing, or 'False'
-- to abort.
--
-- Alternatively, callbacks may throw an 'E.Exception' to abort parsing. The
-- exception will be propagated through to the caller of 'parseBytes' or
-- 'parseComplete'.
setCallback :: Parser m -> Callback m a -> a -> m ()
setCallback p (Callback set _) io = parserFromIO p (set p io)

-- | Remove a callback from the parser. This might also change the parser's
-- behavior, such as automatically expanding entity references when no
-- 'parsedReference' callback is set.
clearCallback :: Parser m -> Callback m a -> m ()
clearCallback p (Callback _ clear) = parserFromIO p (clear p)

catchRef :: Parser m -> Ptr Context -> m Bool -> IO ()
catchRef p cb_ctx io = withParserIO p $ \ctx ->
	(cWantCallback ctx cb_ctx >>=) $ \want ->
	when (want == 1) $ do
		continue <- E.catch (E.unblock (parserToIO p io)) $ \e -> do
			writeIORef (parserErrorRef p) (Just e)
			return False
		unless continue (cStopParser ctx)

catchRefIO :: Parser m -> Ptr Context -> IO Bool -> IO ()
catchRefIO p cb_ctx io = catchRef p cb_ctx (parserFromIO p io)

callback :: (Parser m -> a -> IO (FunPtr b))
         -> (Ptr Context -> IO (FunPtr b))
         -> (Ptr Context -> FunPtr b -> IO ())
         -> Callback m a
callback wrap getPtr setPtr = Callback set clear where
	set p io = withForeignPtr (parserHandle p) $ \ctx -> do
		free ctx
		wrap p io >>= setPtr ctx
	clear p = withForeignPtr (parserHandle p) $ \ctx -> do
		free ctx
		setPtr ctx nullFunPtr
	free ctx = getPtr ctx >>= freeFunPtr

-- begin document {{{

parsedBeginDocument :: Callback m (m Bool)
parsedBeginDocument = callback wrap_startDocument
	getcb_startDocument
	setcb_startDocument

type StartDocumentSAXFunc = Ptr Context -> IO ()

wrap_startDocument :: Parser m -> m Bool -> IO (FunPtr StartDocumentSAXFunc)
wrap_startDocument p io = newcb_startDocument (\ctx -> catchRef p ctx io)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_startDocument"
	getcb_startDocument :: Ptr Context -> IO (FunPtr StartDocumentSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_startDocument"
	setcb_startDocument :: Ptr Context -> FunPtr StartDocumentSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_startDocument  :: StartDocumentSAXFunc -> IO (FunPtr StartDocumentSAXFunc)

-- }}}

-- end document {{{

parsedEndDocument :: Callback m (m Bool)
parsedEndDocument = callback wrap_endDocument
	getcb_endDocument
	setcb_endDocument

type EndDocumentSAXFunc = Ptr Context -> IO ()

wrap_endDocument :: Parser m -> m Bool -> IO (FunPtr EndDocumentSAXFunc)
wrap_endDocument p io = newcb_endDocument (\ctx -> catchRef p ctx io)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_endDocument"
	getcb_endDocument :: Ptr Context -> IO (FunPtr EndDocumentSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_endDocument"
	setcb_endDocument :: Ptr Context -> FunPtr EndDocumentSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_endDocument  :: EndDocumentSAXFunc -> IO (FunPtr EndDocumentSAXFunc)

-- }}}

-- begin element {{{

parsedBeginElement :: Callback m (X.Name -> [(X.Name, [X.Content])] -> m Bool)
parsedBeginElement = callback wrap_beginElement
	getcb_startElementNs
	setcb_startElementNs

type StartElementNsSAX2Func = (Ptr Context -> CString -> CString -> CString -> CInt -> Ptr CString -> CInt -> CInt -> Ptr CString -> IO ())

wrap_beginElement :: Parser m -> (X.Name -> [(X.Name, [X.Content])] -> m Bool) -> IO (FunPtr StartElementNsSAX2Func)
wrap_beginElement p io =
	newcb_startElementNs $ \ctx cln cpfx cns _ _ n_attrs _ raw_attrs ->
	catchRefIO p ctx $ do
		ns <- maybePeek peekUTF8 (castPtr cns)
		pfx <- maybePeek peekUTF8 (castPtr cpfx)
		ln <- peekUTF8 (castPtr cln)
		attrs <- peekAttributes (castPtr raw_attrs) n_attrs
		parserToIO p (io (X.Name ln ns pfx) attrs)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_startElementNs"
	getcb_startElementNs :: Ptr Context -> IO (FunPtr StartElementNsSAX2Func)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_startElementNs"
	setcb_startElementNs :: Ptr Context -> FunPtr StartElementNsSAX2Func -> IO ()

foreign import ccall "wrapper"
	newcb_startElementNs :: StartElementNsSAX2Func -> IO (FunPtr StartElementNsSAX2Func)

peekAttributes :: Ptr CString -> CInt -> IO [(X.Name, [X.Content])]
peekAttributes ptr = loop 0 where
	loop _      0 = return []
	loop offset n = do
		local <- peekUTF8 =<< peekElemOff ptr (offset + 0)
		prefix <- maybePeek peekUTF8 =<< peekElemOff ptr (offset + 1)
		ns <- maybePeek peekUTF8 =<< peekElemOff ptr (offset + 2)
		
		val_begin <- peekElemOff ptr (offset + 3)
		val_end <- peekElemOff ptr (offset + 4)
		val <- peekUTF8Len (val_begin, minusPtr val_end val_begin)
		
		let content = parseAttributeContent val
		let attr = (X.Name local ns prefix, content)
		attrs <- loop (offset + 5) (n - 1)
		
		return (attr:attrs)

parseAttributeContent :: T.Text -> [X.Content]
parseAttributeContent = parse . T.unpack where
	parse chars = case ReadP.readP_to_S parser chars of
		(cs,_):_ -> cs
		_ -> error "parseAttributeContent: no parse"
	parser = ReadP.manyTill content ReadP.eof
	content = reference +++ text
	reference = do
		void (ReadP.char '&')
		name <- ReadP.munch1 (/= ';')
		void (ReadP.char ';')
		return (X.ContentEntity (T.pack name))
	text = do
		chars <- ReadP.munch1 (/= '&')
		return (X.ContentText (T.pack chars))

-- }}}

-- end element {{{

parsedEndElement :: Callback m (X.Name -> m Bool)
parsedEndElement = callback wrap_endElementNs
	getcb_endElementNs
	setcb_endElementNs

type EndElementNsSAX2Func = (Ptr Context -> CString -> CString -> CString -> IO ())

wrap_endElementNs :: Parser m -> (X.Name -> m Bool) -> IO (FunPtr EndElementNsSAX2Func)
wrap_endElementNs p io =
	newcb_endElementNs $ \ctx cln cpfx cns ->
	catchRefIO p ctx $ do
		ns <- maybePeek peekUTF8 (castPtr cns)
		prefix <- maybePeek peekUTF8 (castPtr cpfx)
		local <- peekUTF8 (castPtr cln)
		parserToIO p (io (X.Name local ns prefix))

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_endElementNs"
	getcb_endElementNs :: Ptr Context -> IO (FunPtr EndElementNsSAX2Func)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_endElementNs"
	setcb_endElementNs :: Ptr Context -> FunPtr EndElementNsSAX2Func -> IO ()

foreign import ccall "wrapper"
	newcb_endElementNs :: EndElementNsSAX2Func -> IO (FunPtr EndElementNsSAX2Func)

-- }}}

-- characters, cdata, and whitespace {{{

parsedCharacters :: Callback m (T.Text -> m Bool)
parsedCharacters = callback wrap_characters
	getcb_characters
	setcb_characters

-- | If 'parsedCDATA' is set, it receives any text contained in CDATA
-- blocks. By default, all text is received by 'parsedCharacters'.
parsedCDATA :: Callback m (T.Text -> m Bool)
parsedCDATA = callback wrap_characters
	getcb_cdataBlock
	setcb_cdataBlock

-- | If 'parsedWhitespace' is set, it receives any whitespace marked as
-- ignorable by the document's DTD. By default, all text is received by
-- 'parsedCharacters'.
parsedWhitespace :: Callback m (T.Text -> m Bool)
parsedWhitespace = callback wrap_characters
	getcb_ignorableWhitespace
	setcb_ignorableWhitespace

type CharactersSAXFunc = (Ptr Context -> CString -> CInt -> IO ())

wrap_characters :: Parser m -> (T.Text -> m Bool) -> IO (FunPtr CharactersSAXFunc)
wrap_characters p io =
	newcb_characters $ \ctx cstr clen ->
	catchRefIO p ctx $ do
		text <- peekUTF8Len (castPtr cstr, fromIntegral clen)
		parserToIO p (io text)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_characters"
	getcb_characters :: Ptr Context -> IO (FunPtr CharactersSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_cdataBlock"
	getcb_cdataBlock :: Ptr Context -> IO (FunPtr CharactersSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_ignorableWhitespace"
	getcb_ignorableWhitespace :: Ptr Context -> IO (FunPtr CharactersSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_characters"
	setcb_characters :: Ptr Context -> FunPtr CharactersSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_cdataBlock"
	setcb_cdataBlock :: Ptr Context -> FunPtr CharactersSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_ignorableWhitespace"
	setcb_ignorableWhitespace :: Ptr Context -> FunPtr CharactersSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_characters :: CharactersSAXFunc -> IO (FunPtr CharactersSAXFunc)

-- }}}

-- entity reference {{{

-- | If 'parsedReference' is set, entity references in element and attribute
-- content will reported separately from text, and will not be automatically
-- expanded.
--
-- Use this when processing documents in passthrough mode, to preserve
-- existing entity references.
parsedReference :: Callback m (T.Text -> m Bool)
parsedReference = callback wrap_reference
	getcb_reference
	setcb_reference

type ReferenceSAXFunc = Ptr Context -> CString -> IO ()

wrap_reference :: Parser m -> (T.Text -> m Bool) -> IO (FunPtr ReferenceSAXFunc)
wrap_reference p io =
	newcb_reference $ \ctx cstr ->
	catchRefIO p ctx $ do
		text <- peekUTF8 (castPtr cstr)
		parserToIO p (io text)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_reference"
	getcb_reference :: Ptr Context -> IO (FunPtr ReferenceSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_reference"
	setcb_reference :: Ptr Context -> FunPtr ReferenceSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_reference :: ReferenceSAXFunc -> IO (FunPtr ReferenceSAXFunc)

-- }}}

-- comment {{{

parsedComment :: Callback m (T.Text -> m Bool)
parsedComment = callback wrap_comment
	getcb_comment
	setcb_comment

type CommentSAXFunc = Ptr Context -> CString -> IO ()

wrap_comment :: Parser m -> (T.Text -> m Bool) -> IO (FunPtr CommentSAXFunc)
wrap_comment p io =
	newcb_comment $ \ctx cstr ->
	catchRefIO p ctx $ do
		text <- peekUTF8 (castPtr cstr)
		parserToIO p (io text)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_comment"
	getcb_comment :: Ptr Context -> IO (FunPtr CommentSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_comment"
	setcb_comment :: Ptr Context -> FunPtr CommentSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_comment :: CommentSAXFunc -> IO (FunPtr CommentSAXFunc)

-- }}}

-- processing instruction {{{

parsedInstruction :: Callback m (X.Instruction -> m Bool)
parsedInstruction = callback wrap_processingInstruction
	getcb_processingInstruction
	setcb_processingInstruction

type ProcessingInstructionSAXFunc = Ptr Context -> CString -> CString -> IO ()

wrap_processingInstruction :: Parser m -> (X.Instruction -> m Bool) -> IO (FunPtr ProcessingInstructionSAXFunc)
wrap_processingInstruction p io =
	newcb_processingInstruction $ \ctx ctarget cdata ->
	catchRefIO p ctx $ do
		target <- peekUTF8 (castPtr ctarget)
		value <- peekUTF8 (castPtr cdata)
		parserToIO p (io (X.Instruction target value))

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_processingInstruction"
	getcb_processingInstruction :: Ptr Context -> IO (FunPtr ProcessingInstructionSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_processingInstruction"
	setcb_processingInstruction :: Ptr Context -> FunPtr ProcessingInstructionSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_processingInstruction :: ProcessingInstructionSAXFunc -> IO (FunPtr ProcessingInstructionSAXFunc)

-- }}}

-- external subset {{{

parsedExternalSubset :: Callback m (T.Text -> Maybe X.ExternalID -> m Bool)
parsedExternalSubset = callback wrap_externalSubset
	getcb_externalSubset
	setcb_externalSubset

type ExternalSubsetSAXFunc = Ptr Context -> CString -> CString -> CString -> IO ()

wrap_externalSubset :: Parser m -> (T.Text -> Maybe X.ExternalID -> m Bool) -> IO (FunPtr ExternalSubsetSAXFunc)
wrap_externalSubset p io =
	newcb_externalSubset $ \ctx cname cpublic csystem ->
	catchRefIO p ctx $ do
		name <- peekUTF8 (castPtr cname)
		public <- maybePeek peekUTF8 (castPtr cpublic)
		system <- maybePeek peekUTF8 (castPtr csystem)
		let external = case (public, system) of
			(Nothing, Just s) -> Just (X.SystemID s)
			(Just p', Just s) -> Just (X.PublicID p' s)
			_ -> Nothing
		parserToIO p (io name external)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_externalSubset"
	getcb_externalSubset :: Ptr Context -> IO (FunPtr ExternalSubsetSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_externalSubset"
	setcb_externalSubset :: Ptr Context -> FunPtr ExternalSubsetSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_externalSubset :: ExternalSubsetSAXFunc -> IO (FunPtr ExternalSubsetSAXFunc)

-- }}}

-- internal subset {{{

parsedInternalSubset :: Callback m (T.Text -> Maybe X.ExternalID -> m Bool)
parsedInternalSubset = callback wrap_internalSubset
	getcb_internalSubset
	setcb_internalSubset

type InternalSubsetSAXFunc = Ptr Context -> CString -> CString -> CString -> IO ()

wrap_internalSubset :: Parser m -> (T.Text -> Maybe X.ExternalID -> m Bool) -> IO (FunPtr InternalSubsetSAXFunc)
wrap_internalSubset p io =
	newcb_internalSubset $ \ctx cname cpublic csystem ->
	catchRefIO p ctx $ do
		name <- peekUTF8 (castPtr cname)
		public <- maybePeek peekUTF8 (castPtr cpublic)
		system <- maybePeek peekUTF8 (castPtr csystem)
		let external = case (public, system) of
			(Nothing, Just s) -> Just (X.SystemID s)
			(Just p', Just s) -> Just (X.PublicID p' s)
			_ -> Nothing
		parserToIO p (io name external)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_internalSubset"
	getcb_internalSubset :: Ptr Context -> IO (FunPtr InternalSubsetSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_internalSubset"
	setcb_internalSubset :: Ptr Context -> FunPtr InternalSubsetSAXFunc -> IO ()

foreign import ccall "wrapper"
	newcb_internalSubset :: InternalSubsetSAXFunc -> IO (FunPtr InternalSubsetSAXFunc)

-- }}}

-- warning and error {{{

reportWarning :: Callback m (T.Text -> m Bool)
reportWarning = callback wrap_FixedError
	getcb_warning
	setcb_warning

reportError :: Callback m (T.Text -> m Bool)
reportError = callback wrap_FixedError
	getcb_error
	setcb_error

type FixedErrorFunc = Ptr Context -> CString -> IO ()

wrap_FixedError :: Parser m -> (T.Text -> m Bool) -> IO (FunPtr FixedErrorFunc)
wrap_FixedError p io =
	newcb_FixedError $ \ctx cmsg ->
	catchRefIO p ctx $ do
		msg <- peekUTF8 cmsg
		parserToIO p (io msg)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_warning"
	getcb_warning :: Ptr Context -> IO (FunPtr FixedErrorFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_error"
	getcb_error :: Ptr Context -> IO (FunPtr FixedErrorFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_warning"
	setcb_warning :: Ptr Context -> FunPtr FixedErrorFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_error"
	setcb_error :: Ptr Context -> FunPtr FixedErrorFunc -> IO ()

foreign import ccall "wrapper"
	newcb_FixedError :: FixedErrorFunc -> IO (FunPtr FixedErrorFunc)

-- }}}

-- }}}

withParserIO :: Parser m -> (Ptr Context -> IO a) -> IO a
withParserIO p io = withForeignPtr (parserHandle p) io

peekUTF8 :: CString -> IO T.Text
peekUTF8 = fmap (TE.decodeUtf8) . B.packCString

peekUTF8Len :: CStringLen -> IO T.Text
peekUTF8Len = fmap (TE.decodeUtf8) . B.packCStringLen

withUTF8 :: T.Text -> (CString -> IO a) -> IO a
withUTF8 = BU.unsafeUseAsCString . TE.encodeUtf8

freeFunPtr :: FunPtr a -> IO ()
freeFunPtr ptr = if ptr == nullFunPtr
	then return ()
	else freeHaskellFunPtr ptr

void :: Functor f => f a -> f ()
void = fmap (const ())

foreign import ccall unsafe "hslibxml-shim.h hslibxml_alloc_parser"
	cAllocParser :: CString -> IO (Ptr Context)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_free_parser"
	cFreeParser :: Ptr Context -> IO ()

foreign import ccall safe "libxml/parser.h xmlParseChunk"
	cParseChunk :: Ptr Context -> CString -> CInt -> CInt -> IO CInt

foreign import ccall safe "libxml/parser.h xmlStopParser"
	cStopParser :: Ptr Context -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_want_callback"
	cWantCallback :: Ptr Context -> Ptr a -> IO CInt

-- Unbound callback FFI definitions {{{

{-

data Entity = Entity

data ParserInput = ParserInput

data Enumeration = Enumeration

data ElementContent = ElementContent

data XmlError = XmlError

type IsStandaloneSAXFunc = Ptr Context -> IO CInt

type HasInternalSubsetSAXFunc = Ptr Context -> IO CInt

type HasExternalSubsetSAXFunc = Ptr Context -> IO CInt

type ExternalEntityLoader = CString -> CString -> Ptr Context -> IO (Ptr ParserInput)

type GetEntitySAXFunc = Ptr Context -> CString -> IO (Ptr Entity)

type EntityDeclSAXFunc = Ptr Context -> CString -> CInt -> CString -> CString -> CString -> IO ()

type NotationDeclSAXFunc = Ptr Context -> CString -> CString -> CString -> IO ()

type AttributeDeclSAXFunc = Ptr Context -> CString -> CString -> CInt -> CInt -> CString -> Ptr Enumeration -> IO ()

type ElementDeclSAXFunc = Ptr Context -> CString -> CInt -> Ptr ElementContent -> IO ()

type UnparsedEntityDeclSAXFunc = Ptr Context -> CString -> CString -> CString -> CString -> IO ()

type GetParameterEntitySAXFunc = Ptr Context -> CString -> IO (Ptr Entity)

type XmlStructuredErrorFunc = Ptr Context -> Ptr XmlError -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_isStandalone"
	getcb_isStandalone :: Ptr Context -> IO (FunPtr IsStandaloneSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_hasInternalSubset"
	getcb_hasInternalSubset :: Ptr Context -> IO (FunPtr HasInternalSubsetSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_hasExternalSubset"
	getcb_hasExternalSubset :: Ptr Context -> IO (FunPtr HasExternalSubsetSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_resolveEntity"
	getcb_resolveEntity :: Ptr Context -> IO (FunPtr ResolveEntitySAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_getEntity"
	getcb_getEntity :: Ptr Context -> IO (FunPtr GetEntitySAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_entityDecl"
	getcb_entityDecl :: Ptr Context -> IO (FunPtr EntityDeclSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_notationDecl"
	getcb_notationDecl :: Ptr Context -> IO (FunPtr NotationDeclSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_attributeDecl"
	getcb_attributeDecl :: Ptr Context -> IO (FunPtr AttributeDeclSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_elementDecl"
	getcb_elementDecl :: Ptr Context -> IO (FunPtr ElementDeclSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_unparsedEntityDecl"
	getcb_unparsedEntityDecl :: Ptr Context -> IO (FunPtr UnparsedEntityDeclSAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_getParameterEntity"
	getcb_getParameterEntity :: Ptr Context -> IO (FunPtr GetParameterEntitySAXFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_serror"
	getcb_serror :: Ptr Context -> IO (FunPtr XmlStructuredErrorFunc)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_isStandalone"
	setcb_isStandalone :: Ptr Context -> FunPtr IsStandaloneSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_hasInternalSubset"
	setcb_hasInternalSubset :: Ptr Context -> FunPtr HasInternalSubsetSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_hasExternalSubset"
	setcb_hasExternalSubset :: Ptr Context -> FunPtr HasExternalSubsetSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_resolveEntity"
	setcb_resolveEntity :: Ptr Context -> FunPtr ResolveEntitySAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_getEntity"
	setcb_getEntity :: Ptr Context -> FunPtr GetEntitySAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_entityDecl"
	setcb_entityDecl :: Ptr Context -> FunPtr EntityDeclSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_notationDecl"
	setcb_notationDecl :: Ptr Context -> FunPtr NotationDeclSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_attributeDecl"
	setcb_attributeDecl :: Ptr Context -> FunPtr AttributeDeclSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_elementDecl"
	setcb_elementDecl :: Ptr Context -> FunPtr ElementDeclSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_unparsedEntityDecl"
	setcb_unparsedEntityDecl :: Ptr Context -> FunPtr UnparsedEntityDeclSAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_getParameterEntity"
	setcb_getParameterEntity :: Ptr Context -> FunPtr GetParameterEntitySAXFunc -> IO ()

foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_serror"
	setcb_serror :: Ptr Context -> FunPtr XmlStructuredErrorFunc -> IO ()

-}

-- }}}

A hslibxml-shim.c => hslibxml-shim.c +464 -0
@@ 0,0 1,464 @@
#define _GNU_SOURCE

#include "hslibxml-shim.h"
#include <string.h>
#include <stdio.h>

typedef struct UserData UserData;
struct UserData
{
	FixedErrorFunc warning;
	FixedErrorFunc error;
	xmlExternalEntityLoader resolveEntity;
};

static xmlParserInput *
hslibxml_entity_loader(const char *publicId, const char *systemId, xmlParserCtxt *ctx)
{
	UserData *user_data = (UserData *)ctx->_private;
	if (user_data && user_data->resolveEntity)
	{
		return user_data->resolveEntity(publicId, systemId, ctx);
	}
	return NULL;
}

xmlParserCtxt *
hslibxml_alloc_parser(const char *filename)
{
	xmlSAXHandler sax;
	xmlParserCtxt *ctx;
	UserData *user_data;
	
	static int entity_resolver_set = 0;
	if (entity_resolver_set == 0)
	{
		entity_resolver_set = 1;
		xmlSetExternalEntityLoader(hslibxml_entity_loader);
	}
	
	user_data = calloc(1, sizeof(UserData));
	
	memset(&sax, 0, sizeof(xmlSAXHandler));
	sax.initialized = XML_SAX2_MAGIC;
	
	ctx = xmlCreatePushParserCtxt(&sax, NULL, NULL, 0, filename);
	ctx->replaceEntities = 1;
	ctx->_private = user_data;
	return ctx;
}

void
hslibxml_free_parser(xmlParserCtxt *ctx)
{
	xmlFreeParserCtxt(ctx);
}

int
hslibxml_want_callback(xmlParserCtxt *ctx, void *cb_ctx)
{
	if (ctx->replaceEntities)
	{
		return 1;
	}
	
	return (ctx == cb_ctx);
}

internalSubsetSAXFunc
hslibxml_getcb_internalSubset(xmlParserCtxt *ctx)
{
	return ctx->sax->internalSubset;
}

isStandaloneSAXFunc
hslibxml_getcb_isStandalone(xmlParserCtxt *ctx)
{
	return ctx->sax->isStandalone;
}

hasInternalSubsetSAXFunc
hslibxml_getcb_hasInternalSubset(xmlParserCtxt *ctx)
{
	return ctx->sax->hasInternalSubset;
}

hasExternalSubsetSAXFunc
hslibxml_getcb_hasExternalSubset(xmlParserCtxt *ctx)
{
	return ctx->sax->hasExternalSubset;
}

xmlExternalEntityLoader
hslibxml_getcb_resolveEntity(xmlParserCtxt *ctx)
{
	UserData *user_data = (UserData *)ctx->_private;
	return user_data->resolveEntity;
}

getEntitySAXFunc
hslibxml_getcb_getEntity(xmlParserCtxt *ctx)
{
	return ctx->sax->getEntity;
}

entityDeclSAXFunc
hslibxml_getcb_entityDecl(xmlParserCtxt *ctx)
{
	return ctx->sax->entityDecl;
}

notationDeclSAXFunc
hslibxml_getcb_notationDecl(xmlParserCtxt *ctx)
{
	return ctx->sax->notationDecl;
}

attributeDeclSAXFunc
hslibxml_getcb_attributeDecl(xmlParserCtxt *ctx)
{
	return ctx->sax->attributeDecl;
}

elementDeclSAXFunc
hslibxml_getcb_elementDecl(xmlParserCtxt *ctx)
{
	return ctx->sax->elementDecl;
}

unparsedEntityDeclSAXFunc
hslibxml_getcb_unparsedEntityDecl(xmlParserCtxt *ctx)
{
	return ctx->sax->unparsedEntityDecl;
}

startDocumentSAXFunc
hslibxml_getcb_startDocument(xmlParserCtxt *ctx)
{
	return ctx->sax->startDocument;
}

endDocumentSAXFunc
hslibxml_getcb_endDocument(xmlParserCtxt *ctx)
{
	return ctx->sax->endDocument;
}

referenceSAXFunc
hslibxml_getcb_reference(xmlParserCtxt *ctx)
{
	return ctx->sax->reference;
}

charactersSAXFunc
hslibxml_getcb_characters(xmlParserCtxt *ctx)
{
	return ctx->sax->characters;
}

ignorableWhitespaceSAXFunc
hslibxml_getcb_ignorableWhitespace(xmlParserCtxt *ctx)
{
	return ctx->sax->ignorableWhitespace;
}

processingInstructionSAXFunc
hslibxml_getcb_processingInstruction(xmlParserCtxt *ctx)
{
	return ctx->sax->processingInstruction;
}

commentSAXFunc
hslibxml_getcb_comment(xmlParserCtxt *ctx)
{
	return ctx->sax->comment;
}

FixedErrorFunc
hslibxml_getcb_warning(xmlParserCtxt *ctx)
{
	UserData *user_data = (UserData *)ctx->_private;
	return user_data->warning;
}

FixedErrorFunc
hslibxml_getcb_error(xmlParserCtxt *ctx)
{
	UserData *user_data = (UserData *)ctx->_private;
	return user_data->error;
}

fatalErrorSAXFunc
hslibxml_getcb_fatalError(xmlParserCtxt *ctx)
{
	return ctx->sax->fatalError;
}

getParameterEntitySAXFunc
hslibxml_getcb_getParameterEntity(xmlParserCtxt *ctx)
{
	return ctx->sax->getParameterEntity;
}

cdataBlockSAXFunc
hslibxml_getcb_cdataBlock(xmlParserCtxt *ctx)
{
	return ctx->sax->cdataBlock;
}

externalSubsetSAXFunc
hslibxml_getcb_externalSubset(xmlParserCtxt *ctx)
{
	return ctx->sax->externalSubset;
}

startElementNsSAX2Func
hslibxml_getcb_startElementNs(xmlParserCtxt *ctx)
{
	return ctx->sax->startElementNs;
}

endElementNsSAX2Func
hslibxml_getcb_endElementNs(xmlParserCtxt *ctx)
{
	return ctx->sax->endElementNs;
}

xmlStructuredErrorFunc
hslibxml_getcb_serror(xmlParserCtxt *ctx)
{
	return ctx->sax->serror;
}

void
hslibxml_setcb_internalSubset(xmlParserCtxt *ctx, internalSubsetSAXFunc cb)
{
	ctx->sax->internalSubset = cb;
}

void
hslibxml_setcb_isStandalone(xmlParserCtxt *ctx, isStandaloneSAXFunc cb)
{
	ctx->sax->isStandalone = cb;
}

void
hslibxml_setcb_hasInternalSubset(xmlParserCtxt *ctx, hasInternalSubsetSAXFunc cb)
{
	ctx->sax->hasInternalSubset = cb;
}

void
hslibxml_setcb_hasExternalSubset(xmlParserCtxt *ctx, hasExternalSubsetSAXFunc cb)
{
	ctx->sax->hasExternalSubset = cb;
}

void
hslibxml_setcb_resolveEntity(xmlParserCtxt *ctx, xmlExternalEntityLoader cb)
{
	UserData *user_data = (UserData *)ctx->_private;
	user_data->resolveEntity = cb;
}

void
hslibxml_setcb_getEntity(xmlParserCtxt *ctx, getEntitySAXFunc cb)
{
	ctx->sax->getEntity = cb;
}

void
hslibxml_setcb_entityDecl(xmlParserCtxt *ctx, entityDeclSAXFunc cb)
{
	ctx->sax->entityDecl = cb;
}

void
hslibxml_setcb_notationDecl(xmlParserCtxt *ctx, notationDeclSAXFunc cb)
{
	ctx->sax->notationDecl = cb;
}

void
hslibxml_setcb_attributeDecl(xmlParserCtxt *ctx, attributeDeclSAXFunc cb)
{
	ctx->sax->attributeDecl = cb;
}

void
hslibxml_setcb_elementDecl(xmlParserCtxt *ctx, elementDeclSAXFunc cb)
{
	ctx->sax->elementDecl = cb;
}

void
hslibxml_setcb_unparsedEntityDecl(xmlParserCtxt *ctx, unparsedEntityDeclSAXFunc cb)
{
	ctx->sax->unparsedEntityDecl = cb;
}

void
hslibxml_setcb_startDocument(xmlParserCtxt *ctx, startDocumentSAXFunc cb)
{
	ctx->sax->startDocument = cb;
}

void
hslibxml_setcb_endDocument(xmlParserCtxt *ctx, endDocumentSAXFunc cb)
{
	ctx->sax->endDocument = cb;
}

void
hslibxml_setcb_reference(xmlParserCtxt *ctx, referenceSAXFunc cb)
{
	ctx->sax->reference = cb;
	
	if (cb == NULL)
	{ ctx->replaceEntities = 1; }
	
	else
	{ ctx->replaceEntities = 0; }
}

void
hslibxml_setcb_characters(xmlParserCtxt *ctx, charactersSAXFunc cb)
{
	ctx->sax->characters = cb;
}

void
hslibxml_setcb_ignorableWhitespace(xmlParserCtxt *ctx, ignorableWhitespaceSAXFunc cb)
{
	ctx->sax->ignorableWhitespace = cb;
}

void
hslibxml_setcb_processingInstruction(xmlParserCtxt *ctx, processingInstructionSAXFunc cb)
{
	ctx->sax->processingInstruction = cb;
}

void
hslibxml_setcb_comment(xmlParserCtxt *ctx, commentSAXFunc cb)
{
	ctx->sax->comment = cb;
}

static void
hslibxml_on_warning(void *data, const char *format, ...)
{
	xmlParserCtxt *ctx;
	UserData *user_data;
	char *msg;
	va_list params;
	int rc;
	
	ctx = (xmlParserCtxt *)data;
	user_data = (UserData *)ctx->_private;
	
	va_start(params, format);
	rc = vasprintf(&msg, format, params);
	if (rc == -1)
	{
		/* try to get something to the user */
		user_data->warning(ctx, format);
		return;
	}
	
	user_data->warning(ctx, msg);
	free(msg);
}

static void
hslibxml_on_error(void *data, const char *format, ...)
{
	xmlParserCtxt *ctx;
	UserData *user_data;
	char *msg;
	va_list params;
	int rc;
	
	ctx = (xmlParserCtxt *)data;
	user_data = (UserData *)ctx->_private;
	
	va_start(params, format);
	rc = vasprintf(&msg, format, params);
	if (rc == -1)
	{
		/* try to get something to the user */
		user_data->error(ctx, format);
		return;
	}
	
	user_data->error(ctx, msg);
	free(msg);
}

void
hslibxml_setcb_warning(xmlParserCtxt *ctx, FixedErrorFunc cb)
{
	UserData *user_data = (UserData *)ctx->_private;
	if (cb == NULL)
	{ ctx->sax->warning = NULL; }
	
	else
	{ ctx->sax->warning = hslibxml_on_warning; }
	
	user_data->warning = cb;
}

void
hslibxml_setcb_error(xmlParserCtxt *ctx, FixedErrorFunc cb)
{
	UserData *user_data = (UserData *)ctx->_private;
	if (cb == NULL)
	{ ctx->sax->error = NULL; }
	
	else
	{ ctx->sax->error = hslibxml_on_error; }
	
	user_data->error = cb;
}

void
hslibxml_setcb_fatalError(xmlParserCtxt *ctx, fatalErrorSAXFunc cb)
{
	ctx->sax->fatalError = cb;
}

void
hslibxml_setcb_getParameterEntity(xmlParserCtxt *ctx, getParameterEntitySAXFunc cb)
{
	ctx->sax->getParameterEntity = cb;
}

void
hslibxml_setcb_cdataBlock(xmlParserCtxt *ctx, cdataBlockSAXFunc cb)
{
	ctx->sax->cdataBlock = cb;
}

void
hslibxml_setcb_externalSubset(xmlParserCtxt *ctx, externalSubsetSAXFunc cb)
{
	ctx->sax->externalSubset = cb;
}

void
hslibxml_setcb_startElementNs(xmlParserCtxt *ctx, startElementNsSAX2Func cb)
{
	ctx->sax->startElementNs = cb;
}

void
hslibxml_setcb_endElementNs(xmlParserCtxt *ctx, endElementNsSAX2Func cb)
{
	ctx->sax->endElementNs = cb;
}

void
hslibxml_setcb_serror(xmlParserCtxt *ctx, xmlStructuredErrorFunc cb)
{
	ctx->sax->serror = cb;
}

A hslibxml-shim.h => hslibxml-shim.h +174 -0
@@ 0,0 1,174 @@
#ifndef HSLIBXML_SHIM_H
#define HSLIBXML_SHIM_H

#include <libxml/parser.h>

/* Versions of the error handling callbacks with fixed arity */
typedef void(*FixedErrorFunc)(void *ctx, const char *msg);

xmlParserCtxt *
hslibxml_alloc_parser(const char *filename);

void
hslibxml_free_parser(xmlParserCtxt *ctx);

int
hslibxml_want_callback(xmlParserCtxt *ctx, void *cb_ctx);

internalSubsetSAXFunc
hslibxml_getcb_internalSubset(xmlParserCtxt *ctx);

isStandaloneSAXFunc
hslibxml_getcb_isStandalone(xmlParserCtxt *ctx);

hasInternalSubsetSAXFunc
hslibxml_getcb_hasInternalSubset(xmlParserCtxt *ctx);

hasExternalSubsetSAXFunc
hslibxml_getcb_hasExternalSubset(xmlParserCtxt *ctx);

xmlExternalEntityLoader
hslibxml_getcb_resolveEntity(xmlParserCtxt *ctx);

getEntitySAXFunc
hslibxml_getcb_getEntity(xmlParserCtxt *ctx);

entityDeclSAXFunc
hslibxml_getcb_entityDecl(xmlParserCtxt *ctx);

notationDeclSAXFunc
hslibxml_getcb_notationDecl(xmlParserCtxt *ctx);

attributeDeclSAXFunc
hslibxml_getcb_attributeDecl(xmlParserCtxt *ctx);

elementDeclSAXFunc
hslibxml_getcb_elementDecl(xmlParserCtxt *ctx);

unparsedEntityDeclSAXFunc
hslibxml_getcb_unparsedEntityDecl(xmlParserCtxt *ctx);

startDocumentSAXFunc
hslibxml_getcb_startDocument(xmlParserCtxt *ctx);

endDocumentSAXFunc
hslibxml_getcb_endDocument(xmlParserCtxt *ctx);

referenceSAXFunc
hslibxml_getcb_reference(xmlParserCtxt *ctx);

charactersSAXFunc
hslibxml_getcb_characters(xmlParserCtxt *ctx);

ignorableWhitespaceSAXFunc
hslibxml_getcb_ignorableWhitespace(xmlParserCtxt *ctx);

processingInstructionSAXFunc
hslibxml_getcb_processingInstruction(xmlParserCtxt *ctx);

commentSAXFunc
hslibxml_getcb_comment(xmlParserCtxt *ctx);

FixedErrorFunc
hslibxml_getcb_warning(xmlParserCtxt *ctx);

FixedErrorFunc
hslibxml_getcb_error(xmlParserCtxt *ctx);

getParameterEntitySAXFunc
hslibxml_getcb_getParameterEntity(xmlParserCtxt *ctx);

cdataBlockSAXFunc
hslibxml_getcb_cdataBlock(xmlParserCtxt *ctx);

externalSubsetSAXFunc
hslibxml_getcb_externalSubset(xmlParserCtxt *ctx);

startElementNsSAX2Func
hslibxml_getcb_startElementNs(xmlParserCtxt *ctx);

endElementNsSAX2Func
hslibxml_getcb_endElementNs(xmlParserCtxt *ctx);

xmlStructuredErrorFunc
hslibxml_getcb_serror(xmlParserCtxt *ctx);

void
hslibxml_setcb_internalSubset(xmlParserCtxt *ctx, internalSubsetSAXFunc cb);

void
hslibxml_setcb_isStandalone(xmlParserCtxt *ctx, isStandaloneSAXFunc cb);

void
hslibxml_setcb_hasInternalSubset(xmlParserCtxt *ctx, hasInternalSubsetSAXFunc cb);

void
hslibxml_setcb_hasExternalSubset(xmlParserCtxt *ctx, hasExternalSubsetSAXFunc cb);

void
hslibxml_setcb_resolveEntity(xmlParserCtxt *ctx, xmlExternalEntityLoader cb);

void
hslibxml_setcb_getEntity(xmlParserCtxt *ctx, getEntitySAXFunc cb);

void
hslibxml_setcb_entityDecl(xmlParserCtxt *ctx, entityDeclSAXFunc cb);

void
hslibxml_setcb_notationDecl(xmlParserCtxt *ctx, notationDeclSAXFunc cb);

void
hslibxml_setcb_attributeDecl(xmlParserCtxt *ctx, attributeDeclSAXFunc cb);

void
hslibxml_setcb_elementDecl(xmlParserCtxt *ctx, elementDeclSAXFunc cb);

void
hslibxml_setcb_unparsedEntityDecl(xmlParserCtxt *ctx, unparsedEntityDeclSAXFunc cb);

void
hslibxml_setcb_startDocument(xmlParserCtxt *ctx, startDocumentSAXFunc cb);

void
hslibxml_setcb_endDocument(xmlParserCtxt *ctx, endDocumentSAXFunc cb);

void
hslibxml_setcb_reference(xmlParserCtxt *ctx, referenceSAXFunc cb);

void
hslibxml_setcb_characters(xmlParserCtxt *ctx, charactersSAXFunc cb);

void
hslibxml_setcb_ignorableWhitespace(xmlParserCtxt *ctx, ignorableWhitespaceSAXFunc cb);

void
hslibxml_setcb_processingInstruction(xmlParserCtxt *ctx, processingInstructionSAXFunc cb);

void
hslibxml_setcb_comment(xmlParserCtxt *ctx, commentSAXFunc cb);

void
hslibxml_setcb_warning(xmlParserCtxt *ctx, FixedErrorFunc cb);

void
hslibxml_setcb_error(xmlParserCtxt *ctx, FixedErrorFunc cb);

void
hslibxml_setcb_getParameterEntity(xmlParserCtxt *ctx, getParameterEntitySAXFunc cb);

void
hslibxml_setcb_cdataBlock(xmlParserCtxt *ctx, cdataBlockSAXFunc cb);

void
hslibxml_setcb_externalSubset(xmlParserCtxt *ctx, externalSubsetSAXFunc cb);

void
hslibxml_setcb_startElementNs(xmlParserCtxt *ctx, startElementNsSAX2Func cb);

void
hslibxml_setcb_endElementNs(xmlParserCtxt *ctx, endElementNsSAX2Func cb);

void
hslibxml_setcb_serror(xmlParserCtxt *ctx, xmlStructuredErrorFunc cb);

#endif

M libxml-sax.cabal => libxml-sax.cabal +6 -5
@@ 1,5 1,5 @@
name: libxml-sax
version: 0.6.1.1
version: 0.7
synopsis: Bindings for the libXML2 SAX interface
license: MIT
license-file: license.txt


@@ 12,6 12,7 @@ category: Foreign, Text, XML, Parsing
stability: experimental
homepage: http://john-millikin.com/software/bindings/libxml-sax/
tested-with: GHC==6.12.1
extra-source-files: hslibxml-shim.h

source-repository head
  type: bazaar


@@ 19,18 20,18 @@ source-repository head

library
  ghc-options: -Wall
  cc-options: -Wall

  build-depends:
      base >= 4 && < 5
    , bytestring >= 0.9 && < 0.10
    , text >= 0.7 && < 0.12
    , xml-types >= 0.1 && < 0.2

  build-tools:
    c2hs >= 0.15
    , xml-types >= 0.3 && < 0.4

  exposed-modules:
    Text.XML.LibXML.SAX

  c-sources: hslibxml-shim.c

  extra-libraries: xml2
  pkgconfig-depends: libxml-2.0

A scripts/common.bash => scripts/common.bash +63 -0
@@ 0,0 1,63 @@
PATH="$PATH:$PWD/cabal-dev/bin/"

VERSION=$(awk '/^version:/{print $2}' libxml-sax.cabal)

CABAL_DEV=$(which cabal-dev)
ANANSI=$(which anansi)
XELATEX=$(which xelatex)
XZ=$(which xz)

require_cabal_dev()
{
	if [ -z "$CABAL_DEV" ]; then
		echo "Can't find 'cabal-dev' executable; make sure it exists on your "'$PATH'
		echo "Cowardly refusing to fuck with the global package database"
		exit 1
	fi
}

require_anansi()
{
	if [ -z "$ANANSI" ]; then
		echo "Can't find 'anansi' executable; running '$CABAL_DEV install anansi'"
		require_cabal_dev
		$CABAL_DEV install anansi &> /dev/null
		if [ "$?" -ne "0" ]; then
			echo "Installation failed; please install Anansi manually somehow"
			exit 1
		fi
		ANANSI=$(which anansi)
		echo "Success; anansi = $ANANSI"
	fi
}

require_xelatex()
{
	if [ -z "$XELATEX" ]; then
		echo "Can't find 'xelatex' executable; make sure it exists on your "'$PATH'
		exit 1
	fi
}

make_pdf()
{
	require_anansi
	require_xelatex
	
	rm -f *.{aux,tex,idx,log,out,toc,pdf}
	$ANANSI -w -l latex-noweb -o libxml-sax.tex libxml-sax.anansi || exit 1
	$XELATEX libxml-sax.tex > /dev/null || exit 1
	$XELATEX libxml-sax.tex > /dev/null || exit 1
	rm -f *.{aux,tex,idx,log,out,toc}
	mv libxml-sax.pdf "libxml-sax_$VERSION.pdf"
}

clean_dev_install()
{
	# require_anansi
	require_cabal_dev
	
	rm -rf hs dist
	# $ANANSI -o hs libxml-sax.anansi || exit 1
	$CABAL_DEV install || exit 1
}

A scripts/run-tests => scripts/run-tests +20 -0
@@ 0,0 1,20 @@
#!/bin/bash
if [ ! -f 'libxml-sax.cabal' ]; then
	echo -n "Can't find libxml-sax.cabal; please run this script as"
	echo -n " ./scripts/run-tests from within the libxml-sax source"
	echo " directory"
	exit 1
fi

. scripts/common.bash

require_cabal_dev

clean_dev_install

pushd tests
rm -rf dist
$CABAL_DEV -s ../cabal-dev install || exit 1
popd

cabal-dev/bin/libxml-sax_tests

A tests/Properties.hs => tests/Properties.hs +273 -0
@@ 0,0 1,273 @@
{-# LANGUAGE OverloadedStrings #-}

-- Copyright (C) 2011 John Millikin <jmillikin@gmail.com>
--
-- See license.txt for details
module Main (tests, main) where

import           Control.Monad (forM_)

import qualified Data.ByteString.Char8 as B8
import           Data.IORef (newIORef, readIORef, writeIORef, modifyIORef)
import qualified Data.Text as T

import qualified Text.XML.LibXML.SAX as SAX
import qualified Data.XML.Types as X

import           Test.HUnit
import qualified Test.Framework as F
import           Test.Framework.Providers.HUnit (testCase)

tests :: [F.Test]
tests = [ test_Instruction
        , test_Comment
        , test_InternalSubset
        , test_InternalSubsetEmpty
        , test_ExternalSubset
        , test_Element
        , test_Content
        , test_ContentNoReference
        , test_PlainCDATA
        , test_PassthroughCDATA
        , test_AttributeContent
        , test_AttributeContentNoReference
        , test_AttributeOrder
        ]

main :: IO ()
main = F.defaultMain tests

test_Instruction :: F.Test
test_Instruction = test_Chunks "instruction"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedInstruction (\pi -> add (X.EventInstruction pi))
	)
	[ ("<?something foo bar?>",
	   [ X.EventInstruction (X.Instruction "something" "foo bar")
	   ])
	, ("<doc/>", [])
	]

test_Comment :: F.Test
test_Comment = test_Chunks "comment"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedComment (\txt -> add (X.EventComment txt))
	)
	[ ("<!-- something foo bar -->",
	   [ X.EventComment " something foo bar "
	   ])
	, ("<doc/>", [])
	]

test_InternalSubsetEmpty :: F.Test
test_InternalSubsetEmpty = test_Chunks "internal subset (empty)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedComment (\txt -> add (X.EventComment txt))
		set SAX.parsedInternalSubset (\name id -> add (X.EventBeginDoctype name id))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE PUBLIC \"foo\" \"bar\" [\n",
	   [
	   ])
	, ("]", [])
	, (">",
	   [ X.EventBeginDoctype "SOME_DOCTYPE" (Just (X.PublicID "foo" "bar"))
	   ])
	, ("<doc/>", [])
	]

test_InternalSubset :: F.Test
test_InternalSubset = test_Chunks "internal subset"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedComment (\txt -> add (X.EventComment txt))
		set SAX.parsedInternalSubset (\name id -> add (X.EventBeginDoctype name id))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE PUBLIC \"foo\" \"bar\" [\n",
	   [
	   ])
	, ("<!ENTITY ent \"some entity\">",
	   [ X.EventBeginDoctype "SOME_DOCTYPE" (Just (X.PublicID "foo" "bar"))
	   ])
	, ("]", [])
	, (">", [])
	, ("<doc/>", [])
	]

test_ExternalSubset :: F.Test
test_ExternalSubset = test_Chunks "external subset"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedComment (\txt -> add (X.EventComment txt))
		set SAX.parsedExternalSubset (\name id -> add (X.EventBeginDoctype name id))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE PUBLIC \"foo\" \"bar\" [\n",
	   [
	   ])
	, ("<!ENTITY ent \"some entity\">",
	   [
	   ])
	, ("]", [])
	, (">",
	   [ X.EventBeginDoctype "SOME_DOCTYPE" (Just (X.PublicID "foo" "bar"))
	   ])
	, ("<doc/>", [])
	]

test_Element :: F.Test
test_Element = test_Chunks "element begin/end"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
	)
	[ ("<doc>",
	   [ X.EventBeginElement "doc" []
	   ])
	, ("</doc>",
	   [ X.EventEndElement "doc"
	   ])
	]

test_Content :: F.Test
test_Content = test_Chunks "content"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
		set SAX.parsedCharacters (\txt -> add (X.EventContent (X.ContentText txt)))
		set SAX.parsedReference (\name -> add (X.EventContent (X.ContentEntity name)))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE [<!ENTITY ref \"some reference\">]>",
	  [
	  ])
	, ("<doc>",
	   [ X.EventBeginElement "doc" []
	   ])
	, (" text &ref; <",
	   [ X.EventContent (X.ContentText " text ")
	   , X.EventContent (X.ContentEntity "ref")
	   , X.EventContent (X.ContentText " ")
	   ])
	, ("/doc>",
	   [ X.EventEndElement "doc"
	   ])
	]

test_ContentNoReference :: F.Test
test_ContentNoReference = test_Chunks "content (no reference CB)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedCharacters (\txt -> add (X.EventContent (X.ContentText txt)))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE [<!ENTITY ref \"some reference\">]>",
	  [
	  ])
	, ("<doc>", [])
	, (" text &ref; <",
	   [ X.EventContent (X.ContentText " text ")
	   , X.EventContent (X.ContentText "some reference")
	   , X.EventContent (X.ContentText " ")
	   ])
	, ("/doc>",
	   [
	   ])
	]

test_PlainCDATA :: F.Test
test_PlainCDATA = test_Chunks "cdata (plain)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedCharacters (\txt -> add (X.EventContent (X.ContentText txt)))
	)
	[ ("<doc>", [])
	, ("<![CDATA[<cdata>]]>",
	   [ X.EventContent (X.ContentText "<cdata>")
	   ])
	, ("</doc>", [])
	]

test_PassthroughCDATA :: F.Test
test_PassthroughCDATA = test_Chunks "cdata (passthrough)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedCharacters (\txt -> add (X.EventContent (X.ContentText txt)))
		set SAX.parsedCDATA (\txt -> add (X.EventCDATA txt))
	)
	[ ("<doc>", [])
	, ("<![CDATA[<cdata>]]>",
	   [ X.EventCDATA "<cdata>"
	   ])
	, ("</doc>", [])
	]

test_AttributeContent :: F.Test
test_AttributeContent = test_Chunks "attribute content"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
		set SAX.parsedReference (\name -> add (X.EventContent (X.ContentEntity name)))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE [<!ENTITY ref \"some reference\">]>",
	  [
	  ])
	, ("<doc a='text &ref; text'/>",
	   [ X.EventBeginElement "doc" [("a", [X.ContentText "text ", X.ContentEntity "ref", X.ContentText " text"])]
	   , X.EventEndElement "doc"
	   ])
	]

test_AttributeContentNoReference :: F.Test
test_AttributeContentNoReference = test_Chunks "attribute content (no reference CB)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
	)
	[ ("<!DOCTYPE SOME_DOCTYPE [<!ENTITY ref \"some reference\">]>",
	  [
	  ])
	, ("<doc a='text &ref; text'/>",
	   [ X.EventBeginElement "doc" [("a", [X.ContentText "text some reference text"])]
	   , X.EventEndElement "doc"
	   ])
	]

test_AttributeOrder :: F.Test
test_AttributeOrder = test_Chunks "attribute order"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
		set SAX.parsedCharacters (\txt -> add (X.EventContent (X.ContentText txt)))
		set SAX.parsedReference (\name -> add (X.EventContent (X.ContentEntity name)))
		set SAX.parsedCDATA (\txt -> add (X.EventCDATA txt))
	)
	[ ("<doc z='1' a='2' b='3'/>",
	   [ X.EventBeginElement "doc" [("z", [X.ContentText "1"]), ("a", [X.ContentText "2"]), ("b", [X.ContentText "3"])]
	   , X.EventEndElement "doc"
	   ])
	]

test_Chunks :: String -> (SAX.Parser IO -> (X.Event -> IO Bool) -> IO ()) -> [(String, [X.Event])] -> F.Test
test_Chunks name setup chunks = testCase name $ do
	ref <- newIORef []
	p <- SAX.newParserIO Nothing
	
	SAX.setCallback p SAX.reportError (error . T.unpack)
	
	let add ev = modifyIORef ref (ev:) >> return True
	setup p add
	
	forM_ chunks $ \(chunk, expected) -> do
		SAX.parseBytes p (B8.pack chunk)
		result <- fmap reverse (readIORef ref)
		writeIORef ref []
		assertEqual ("chunk " ++ show chunk) expected result
	
	SAX.parseComplete p
	result <- fmap reverse (readIORef ref)
	assertEqual "eof" [] result

A tests/libxml-sax-tests.cabal => tests/libxml-sax-tests.cabal +19 -0
@@ 0,0 1,19 @@
name: libxml-sax-tests
version: 0
build-type: Simple
cabal-version: >= 1.6

executable libxml-sax_tests
  main-is: Properties.hs
  ghc-options: -Wall -O2

  build-depends:
      base > 3 && < 5
    , bytestring
    , containers
    , libxml-sax
    , text
    , xml-types
    , HUnit == 1.2.*
    , test-framework >= 0.2 && < 0.4
    , test-framework-hunit == 0.2.6