~singpolyma/haskell-libxml-sax

6c5566c315f17eea3608cdd9355400bf0e77015f — John Millikin 12 years ago 5946d5a
Handle magic entity references like ``&`` in attribute values when
reference preservation is enabled.
3 files changed, 35 insertions(+), 9 deletions(-)

M Text/XML/LibXML/SAX.hs
M libxml-sax.cabal
M tests/Properties.hs
M Text/XML/LibXML/SAX.hs => Text/XML/LibXML/SAX.hs +16 -5
@@ 55,6 55,7 @@ import           Control.Monad (when, unless)
import qualified Control.Monad.ST as ST
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as BU
import           Data.Char (chr, isDigit)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.XML.Types as X


@@ 246,10 247,13 @@ wrap_beginElement :: Parser m -> (X.Name -> [(X.Name, [X.Content])] -> m Bool) -
wrap_beginElement p io =
	newcb_startElementNs $ \ctx cln cpfx cns _ _ n_attrs _ raw_attrs ->
	catchRefIO p ctx $ do
		refCB <- getcb_reference ctx
		let hasRefCB = refCB /= nullFunPtr
		
		ns <- maybePeek peekUTF8 (castPtr cns)
		pfx <- maybePeek peekUTF8 (castPtr cpfx)
		ln <- peekUTF8 (castPtr cln)
		attrs <- peekAttributes (castPtr raw_attrs) n_attrs
		attrs <- peekAttributes hasRefCB (castPtr raw_attrs) n_attrs
		parserToIO p (io (X.Name ln ns pfx) attrs)

foreign import ccall unsafe "hslibxml-shim.h hslibxml_getcb_startElementNs"


@@ 261,8 265,8 @@ foreign import ccall unsafe "hslibxml-shim.h hslibxml_setcb_startElementNs"
foreign import ccall "wrapper"
	newcb_startElementNs :: StartElementNsSAX2Func -> IO (FunPtr StartElementNsSAX2Func)

peekAttributes :: Ptr CString -> CInt -> IO [(X.Name, [X.Content])]
peekAttributes ptr = loop 0 where
peekAttributes :: Bool -> Ptr CString -> CInt -> IO [(X.Name, [X.Content])]
peekAttributes hasRefCB ptr = loop 0 where
	loop _      0 = return []
	loop offset n = do
		local <- peekUTF8 =<< peekElemOff ptr (offset + 0)


@@ 273,7 277,9 @@ peekAttributes ptr = loop 0 where
		val_end <- peekElemOff ptr (offset + 4)
		val <- peekUTF8Len (val_begin, minusPtr val_end val_begin)
		
		let content = parseAttributeContent val
		let content = if hasRefCB
			then parseAttributeContent val
			else [X.ContentText val]
		let attr = (X.Name local ns prefix, content)
		attrs <- loop (offset + 5) (n - 1)
		


@@ 285,7 291,12 @@ parseAttributeContent = parse . T.unpack where
		(cs,_):_ -> cs
		_ -> error "parseAttributeContent: no parse"
	parser = ReadP.manyTill content ReadP.eof
	content = reference +++ text
	content = charRef +++ reference +++ text
	charRef = do
		void (ReadP.string "&#")
		val <- ReadP.munch1 (isDigit)
		void (ReadP.char ';')
		return (X.ContentText (T.singleton (chr (read val))))
	reference = do
		void (ReadP.char '&')
		name <- ReadP.munch1 (/= ';')

M libxml-sax.cabal => libxml-sax.cabal +2 -2
@@ 1,5 1,5 @@
name: libxml-sax
version: 0.7
version: 0.7.1
synopsis: Bindings for the libXML2 SAX interface
license: MIT
license-file: license.txt


@@ 19,7 19,7 @@ source-repository head
  location: http://john-millikin.com/software/bindings/libxml-sax/

library
  ghc-options: -Wall
  ghc-options: -Wall -O2
  cc-options: -Wall

  build-depends:

M tests/Properties.hs => tests/Properties.hs +17 -2
@@ 32,6 32,7 @@ tests = [ test_Instruction
        , test_AttributeContent
        , test_AttributeContentNoReference
        , test_AttributeOrder
        , test_AttributeContentAmpersand
        ]

main :: IO ()


@@ 214,8 215,9 @@ test_AttributeContent = test_Chunks "attribute content"
	[ ("<!DOCTYPE SOME_DOCTYPE [<!ENTITY ref \"some reference\">]>",
	  [
	  ])
	, ("<doc a='text &ref; text'/>",
	   [ X.EventBeginElement "doc" [("a", [X.ContentText "text ", X.ContentEntity "ref", X.ContentText " text"])]
	, ("<doc a='text &amp; &ref; text'/>",
	   [ X.EventBeginElement "doc" [("a", [ X.ContentText "text ", X.ContentText "&", X.ContentText " "
	                                      , X.ContentEntity "ref", X.ContentText " text"])]
	   , X.EventEndElement "doc"
	   ])
	]


@@ 236,6 238,19 @@ test_AttributeContentNoReference = test_Chunks "attribute content (no reference 
	   ])
	]

test_AttributeContentAmpersand :: F.Test
test_AttributeContentAmpersand = test_Chunks "attribute content (with ampersand)"
	(\p add -> do
		let set cb st = SAX.setCallback p cb st
		set SAX.parsedBeginElement (\n as -> add (X.EventBeginElement n as))
		set SAX.parsedEndElement (\n -> add (X.EventEndElement n))
	)
	[ ("<doc a='&amp;foo'/>",
	   [ X.EventBeginElement "doc" [("a", [X.ContentText "&foo"])]
	   , X.EventEndElement "doc"
	   ])
	]

test_AttributeOrder :: F.Test
test_AttributeOrder = test_Chunks "attribute order"
	(\p add -> do