~singpolyma/network-protocol-xmpp

ac4ef197fc37e386b3fea58cf57b64bacb24c2f7 — John Millikin 14 years ago 2f2cd14
Extracted incremental XML parsing to a separate library.
5 files changed, 51 insertions(+), 288 deletions(-)

D Network/Protocol/XMPP/IncrementalXML.hs
M Network/Protocol/XMPP/Stream.hs
M Network/Protocol/XMPP/Util.hs
D incremental-xml.c
D incremental-xml.h
D Network/Protocol/XMPP/IncrementalXML.hs => Network/Protocol/XMPP/IncrementalXML.hs +0 -149
@@ 1,149 0,0 @@
{- Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
   
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}

{-# LANGUAGE ForeignFunctionInterface #-}
module Network.Protocol.XMPP.IncrementalXML (
	 Parser
	,Event(..)
	,Attribute(..)
	,newParser
	,incrementalParse
	) where

import Data.IORef (newIORef, readIORef, writeIORef, IORef)
import Foreign.C (CInt, CString, withCStringLen, peekCString, peekCStringLen)
import qualified Foreign as F
import Control.Exception (bracket)
import Text.XML.HXT.DOM.QualifiedName (mkQName, QName)

data ParserStruct = ParserStruct
data Parser = Parser !(F.ForeignPtr ParserStruct)

data Event =
	  BeginElement QName [Attribute]
	| EndElement QName
	| Characters String
	| ParseError String
	deriving (Show, Eq)

data Attribute = Attribute QName String
	deriving (Show, Eq)

newParser :: IO Parser
newParser = do
	ptr <- c_incremental_parser_new
	autoptr <- F.newForeignPtr c_incremental_parser_free ptr
	return $ Parser autoptr

incrementalParse :: Parser -> String -> IO [Event]
incrementalParse (Parser autoptr) s = do
	events <- newIORef []
	
	withCStringLen s $ \(cs, cs_len) -> do
	F.withForeignPtr autoptr $ \ptr -> do
	withFunPtr (onBeginElement events) wrappedBegin $ \b -> do
	withFunPtr (onEndElement events) wrappedEnd $ \e -> do
	withFunPtr (onCharacters events) wrappedText $ \t -> do
		retval <- (c_incremental_parse ptr cs (fromIntegral cs_len) b e t)
		(readIORef events) >>= (return . checkReturn retval)

checkReturn :: CInt -> [Event] -> [Event]
checkReturn r es = es ++ case r of
	0 -> []
	_ -> [ParseError (show r)]

withFunPtr :: a -> (a -> IO (F.FunPtr a)) -> (F.FunPtr a -> IO b) -> IO b
withFunPtr f mkPtr block = bracket (mkPtr f) F.freeHaskellFunPtr block

-- localname, prefix, namespace, value_begin, value_end
data CAttribute = CAttribute CString CString CString CString CString

splitCAttributes :: CInt -> F.Ptr CString -> IO [CAttribute]
splitCAttributes = splitCAttributes' 0

splitCAttributes' _      0 _     = return []
splitCAttributes' offset n attrs = do
	c_ln <- F.peekElemOff attrs (offset + 0)
	c_prefix <- F.peekElemOff attrs (offset + 1)
	c_ns <- F.peekElemOff attrs (offset + 2)
	c_vbegin <- F.peekElemOff attrs (offset + 3)
	c_vend <- F.peekElemOff attrs (offset + 4)
	as <- splitCAttributes' (offset + 5) (n - 1) attrs
	return (CAttribute c_ln c_prefix c_ns c_vbegin c_vend : as)

convertCAttribute :: CAttribute -> IO Attribute
convertCAttribute (CAttribute c_ln c_pfx c_ns c_vbegin c_vend) = do
	ln <- peekCString c_ln
	pfx <- peekNullable c_pfx
	ns <- peekNullable c_ns
	val <- peekCStringLen (c_vbegin, F.minusPtr c_vend c_vbegin)
	return (Attribute (mkQName pfx ln ns) val)

peekNullable :: CString -> IO String
peekNullable ptr
	| ptr == F.nullPtr = return ""
	| otherwise        = peekCString ptr

onBeginElement :: IORef [Event] -> F.Ptr () -> CString -> CString -> CString -> CInt -> F.Ptr () -> CInt -> CInt -> F.Ptr CString -> IO ()
onBeginElement eventref _ cln cpfx cns _ _ n_attrs _ raw_attrs = do
	ns <- peekCString cns
	pfx <- peekNullable cpfx
	ln <- peekCString cln
	es <- readIORef eventref
	c_attrs <- splitCAttributes n_attrs raw_attrs
	attrs <- mapM convertCAttribute c_attrs
	writeIORef eventref (es ++ [BeginElement (mkQName pfx ln ns) attrs])

onEndElement :: IORef [Event] -> F.Ptr () -> CString -> CString -> CString -> IO ()
onEndElement eventref _ cln cpfx cns = do
	ns <- peekCString cns
	pfx <- peekNullable cpfx
	ln <- peekCString cln
	es <- readIORef eventref
	writeIORef eventref (es ++ [EndElement (mkQName pfx ln ns)])

onCharacters :: IORef [Event] -> F.Ptr () -> CString -> CInt -> IO ()
onCharacters eventref _ ctext ctextlen = do
	text <- (peekCStringLen (ctext, fromIntegral ctextlen))
	es <- readIORef eventref
	writeIORef eventref (es ++ [Characters text])

type StartElementNsSAX2Func = (F.Ptr () -> CString -> CString -> CString -> CInt -> F.Ptr () -> CInt -> CInt -> F.Ptr CString -> IO ())
type EndElementNsSAX2Func = (F.Ptr () -> CString -> CString -> CString -> IO ())
type CharactersSAXFunc = (F.Ptr () -> CString -> CInt -> IO ())

foreign import ccall "wrapper"
	wrappedBegin :: StartElementNsSAX2Func -> IO (F.FunPtr StartElementNsSAX2Func)

foreign import ccall "wrapper"
	wrappedEnd :: EndElementNsSAX2Func -> IO (F.FunPtr EndElementNsSAX2Func)

foreign import ccall "wrapper"
	wrappedText :: CharactersSAXFunc -> IO (F.FunPtr CharactersSAXFunc)

foreign import ccall "incremental-xml.h incremental_parser_new"
	c_incremental_parser_new :: IO (F.Ptr ParserStruct)

foreign import ccall "incremental-xml.h incremental_parse"
	c_incremental_parse :: F.Ptr ParserStruct -> CString -> CInt
	                    -> F.FunPtr StartElementNsSAX2Func
	                    -> F.FunPtr EndElementNsSAX2Func
	                    -> F.FunPtr CharactersSAXFunc
	                    -> IO CInt

foreign import ccall "incremental-xml.h &incremental_parser_free"
	c_incremental_parser_free :: F.FunPtr (F.Ptr ParserStruct -> IO ())


M Network/Protocol/XMPP/Stream.hs => Network/Protocol/XMPP/Stream.hs +19 -18
@@ 41,7 41,7 @@ import Text.XML.HXT.Arrow ((>>>))
import qualified Text.XML.HXT.Arrow as A
import qualified Text.XML.HXT.DOM.Interface as DOM
import qualified Text.XML.HXT.DOM.XmlNode as XN
import qualified Network.Protocol.XMPP.IncrementalXML as XML
import qualified Text.XML.LibXML.SAX as SAX

-- TLS support
import qualified Network.GnuTLS as GnuTLS


@@ 50,7 50,7 @@ import Foreign.C (peekCAStringLen)

import Network.Protocol.XMPP.JID (JID)
import Network.Protocol.XMPP.SASL (Mechanism, findMechanism)
import Network.Protocol.XMPP.Util (eventsToTree, mkQName, mkElement)
import qualified Network.Protocol.XMPP.Util as Util

maxXMPPVersion :: XMPPVersion
maxXMPPVersion = XMPPVersion 1 0


@@ 59,7 59,7 @@ data Stream = Stream
	{
		 streamHandle   :: Handle
		,streamJID      :: JID
		,streamParser   :: XML.Parser
		,streamParser   :: SAX.Parser
		,streamLanguage :: XMLLanguage
		,streamVersion  :: XMPPVersion
		,streamFeatures :: [StreamFeature]


@@ 120,7 120,7 @@ beginStream' jid h = do
		" version='1.0'" ++
		" xmlns:stream='http://etherx.jabber.org/streams'>"
	
	parser <- XML.newParser
	parser <- SAX.newParser
	hPutStr h xmlHeader
	[startStreamEvent] <- readEventsUntil startOfStream h parser
	featureTree <- getTree' h parser


@@ 131,13 131,14 @@ beginStream' jid h = do
	return $ Stream h jid parser language version features
	
	where
		streamName = mkQName "http://etherx.jabber.org/streams" "stream"
		streamName = Util.mkQName "http://etherx.jabber.org/streams" "stream"
		
		startOfStream depth event = case (depth, event) of
			(1, (XML.BeginElement elemName _)) -> streamName == elemName
			(1, (SAX.BeginElement elemName _)) ->
				streamName == Util.convertQName elemName
			_ -> False

parseStartStream :: XML.Event -> (XMLLanguage, XMPPVersion)
parseStartStream :: SAX.Event -> (XMLLanguage, XMPPVersion)
parseStartStream e = (XMLLanguage "en", XMPPVersion 1 0) -- TODO

parseFeatures :: DOM.XmlTree -> [StreamFeature]


@@ 147,7 148,7 @@ parseFeatures t =
		>>> A.getChildren
		>>> A.arrL (\t' -> [parseFeature t'])) t
	where
		featuresName = mkQName "http://etherx.jabber.org/streams" "features"
		featuresName = Util.mkQName "http://etherx.jabber.org/streams" "features"

parseFeature :: DOM.XmlTree -> StreamFeature
parseFeature t = lookupDef FeatureUnknown qname [


@@ 165,7 166,7 @@ parseFeatureTLS t = FeatureStartTLS True -- TODO: detect whether or not required

parseFeatureSASL :: DOM.XmlTree -> StreamFeature
parseFeatureSASL t = let
	mechName = mkQName "urn:ietf:params:xml:ns:xmpp-sasl" "mechanism"
	mechName = Util.mkQName "urn:ietf:params:xml:ns:xmpp-sasl" "mechanism"
	rawMechanisms = A.runLA (
		A.getChildren
		>>> A.hasQName mechName


@@ 181,12 182,12 @@ parseFeatureSASL t = let
getTree :: Stream -> IO DOM.XmlTree
getTree s = getTree' (streamHandle s) (streamParser s)

getTree' :: Handle -> XML.Parser -> IO DOM.XmlTree
getTree' :: Handle -> SAX.Parser -> IO DOM.XmlTree
getTree' h p = do
	events <- readEventsUntil finished h p
	return $ eventsToTree events
	return $ Util.eventsToTree events
	where
		finished 0 (XML.EndElement _) = True
		finished 0 (SAX.EndElement _) = True
		finished _ _ = False

putTree :: Stream -> DOM.XmlTree -> IO ()


@@ 200,12 201,12 @@ putTree s t = do

-------------------------------------------------------------------------------

readEventsUntil :: (Int -> XML.Event -> Bool) -> Handle -> XML.Parser -> IO [XML.Event]
readEventsUntil :: (Int -> SAX.Event -> Bool) -> Handle -> SAX.Parser -> IO [SAX.Event]
readEventsUntil done h parser = readEventsUntil' done 0 [] $ do
	char <- hGetChar h
	XML.incrementalParse parser [char]
	SAX.incrementalParse parser [char]

readEventsUntil' :: (Int -> XML.Event -> Bool) -> Int -> [XML.Event] -> IO [XML.Event] -> IO [XML.Event]
readEventsUntil' :: (Int -> SAX.Event -> Bool) -> Int -> [SAX.Event] -> IO [SAX.Event] -> IO [SAX.Event]
readEventsUntil' done depth accum getEvents = do
	events <- getEvents
	let (done', depth', accum') = readEventsStep done events depth accum


@@ 213,12 214,12 @@ readEventsUntil' done depth accum getEvents = do
		then return accum'
		else readEventsUntil' done depth' accum' getEvents

readEventsStep :: (Int -> XML.Event -> Bool) -> [XML.Event] -> Int -> [XML.Event] -> (Bool, Int, [XML.Event])
readEventsStep :: (Int -> SAX.Event -> Bool) -> [SAX.Event] -> Int -> [SAX.Event] -> (Bool, Int, [SAX.Event])
readEventsStep _ [] depth accum = (False, depth, accum)
readEventsStep done (e:es) depth accum = let
	depth' = depth + case e of
		(XML.BeginElement _ _) -> 1
		(XML.EndElement _) -> (- 1)
		(SAX.BeginElement _ _) -> 1
		(SAX.EndElement _) -> (- 1)
		_ -> 0
	accum' = accum ++ [e]
	in if done depth' e then (True, depth', accum')

M Network/Protocol/XMPP/Util.hs => Network/Protocol/XMPP/Util.hs +32 -24
@@ 16,36 16,37 @@

module Network.Protocol.XMPP.Util (
	 eventsToTree
	,convertAttr
	,convertQName
	,mkElement
	,mkAttr
	,mkQName
	) where

import qualified Text.XML.HXT.DOM.XmlNode as XN
import qualified Text.XML.HXT.DOM.QualifiedName as QN
import Text.XML.HXT.DOM.TypeDefs (XmlTree)
import qualified Network.Protocol.XMPP.IncrementalXML as XML
import qualified Text.XML.HXT.DOM.Interface as DOM
import qualified Text.XML.LibXML.SAX as SAX

-------------------------------------------------------------------------------
-- For converting incremental XML event lists to HXT trees
-------------------------------------------------------------------------------

-- This function assumes the input list is valid. No validation is performed.
eventsToTree :: [XML.Event] -> XmlTree
eventsToTree :: [SAX.Event] -> DOM.XmlTree
eventsToTree es = XN.mkRoot [] (eventsToTrees es)

eventsToTrees :: [XML.Event] -> [XmlTree]
eventsToTrees :: [SAX.Event] -> [DOM.XmlTree]
eventsToTrees es = map blockToTree (splitBlocks es)

-- Split event list into a sequence of "blocks", which are the events including
-- and between a pair of tags. <start><start2/></start> and <start/> are both
-- single blocks.
splitBlocks :: [XML.Event] -> [[XML.Event]]
splitBlocks :: [SAX.Event] -> [[SAX.Event]]
splitBlocks es = ret where (_, _, ret) = foldl splitBlocks' (0, [], []) es

splitBlocks' :: (Int, [XML.Event], [[XML.Event]])
                -> XML.Event
                -> (Int, [XML.Event], [[XML.Event]])
splitBlocks' :: (Int, [SAX.Event], [[SAX.Event]])
                -> SAX.Event
                -> (Int, [SAX.Event], [[SAX.Event]])
splitBlocks' (depth, accum, allAccum) e =
	if depth' == 0 then
		(depth', [], allAccum ++ [accum'])


@@ 54,36 55,43 @@ splitBlocks' (depth, accum, allAccum) e =
	where
		accum' = accum ++ [e]
		depth' = depth + case e of
			(XML.BeginElement _ _) -> 1
			(XML.EndElement _) -> (- 1)
			(SAX.BeginElement _ _) -> 1
			(SAX.EndElement _) -> (- 1)
			_ -> 0

blockToTree :: [XML.Event] -> XmlTree
blockToTree :: [SAX.Event] -> DOM.XmlTree
blockToTree [] = error "No blocks"
blockToTree (begin:rest) = let end = (last rest) in case (begin, end) of
	(XML.BeginElement qname attrs, XML.EndElement _) ->
		XN.mkElement qname (map convertAttr attrs) (eventsToTrees (init rest))
	(XML.Characters s, _) -> XN.mkText s
	(_, XML.ParseError _) -> undefined
	fff -> error ("Got unexpected: " ++ (show fff))
	(SAX.BeginElement qname attrs, SAX.EndElement _) ->
		XN.mkElement (convertQName qname)
			(map convertAttr attrs)
			(eventsToTrees (init rest))
	(SAX.Characters s, _) -> XN.mkText s
	(_, SAX.ParseError text) -> error text
	unexpected -> error ("Got unexpected: " ++ (show unexpected))

convertAttr :: XML.Attribute -> XmlTree
convertAttr (XML.Attribute qname value) = XN.NTree (XN.mkAttrNode qname) [XN.mkText value]
convertAttr :: SAX.Attribute -> DOM.XmlTree
convertAttr (SAX.Attribute qname value) = XN.NTree
	(XN.mkAttrNode (convertQName qname))
	[XN.mkText value]

convertQName :: SAX.QName -> DOM.QName
convertQName (SAX.QName ns _ local) = mkQName ns local

-------------------------------------------------------------------------------
-- Utility function for building XML trees
-------------------------------------------------------------------------------

mkElement :: (String, String) -> [(String, String, String)] -> [XmlTree] -> XmlTree
mkElement :: (String, String) -> [(String, String, String)] -> [DOM.XmlTree] -> DOM.XmlTree
mkElement (ns, localpart) attrs children = let
	qname = mkQName ns localpart
	attrs' = [mkAttr ans alp text | (ans, alp, text) <- attrs]
	in XN.mkElement qname attrs' children

mkAttr :: String -> String -> String -> XmlTree
mkAttr :: String -> String -> String -> DOM.XmlTree
mkAttr ns localpart text = XN.mkAttr (mkQName ns localpart) [XN.mkText text]

mkQName :: String -> String -> QN.QName
mkQName :: String -> String -> DOM.QName
mkQName ns localpart = case ns of
	"" -> QN.mkName localpart
	_ -> QN.mkNsName localpart ns
	"" -> DOM.mkName localpart
	_ -> DOM.mkNsName localpart ns

D incremental-xml.c => incremental-xml.c +0 -66
@@ 1,66 0,0 @@
/* Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
   
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <string.h>
#include <assert.h>
#include "incremental-xml.h"

struct _IncrementalParser
{
	xmlSAXHandler handler;
	xmlParserCtxt *context;
};

IncrementalParser *
incremental_parser_new ()
{
	IncrementalParser *parser;
	parser = malloc (sizeof (IncrementalParser));
	assert (parser != NULL);
	
	memset (&(parser->handler), 0, sizeof (parser->handler));
	parser->handler.initialized = XML_SAX2_MAGIC;
	
	parser->context = xmlCreatePushParserCtxt (
		&(parser->handler), parser, NULL, 0, NULL);
	assert (parser->context != NULL);
	
	return parser;
}

void
incremental_parser_free (IncrementalParser *p)
{
	xmlClearParserCtxt (p->context);
	xmlFreeParserCtxt (p->context);
	free (p);
}

int
incremental_parse (
	IncrementalParser *parser,
	const char *text,
	int text_len,
	startElementNsSAX2Func begin,
	endElementNsSAX2Func end,
	charactersSAXFunc text_handler)
{
	xmlParserCtxt *c = parser->context;
	c->sax->startElementNs = begin;
	c->sax->endElementNs = end;
	c->sax->characters = text_handler;
	return xmlParseChunk (c, text, text_len, 0);
}

D incremental-xml.h => incremental-xml.h +0 -31
@@ 1,31 0,0 @@
/* Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
   
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <libxml/parser.h>

typedef struct _IncrementalParser IncrementalParser;

IncrementalParser *
incremental_parser_new ();

void
incremental_parser_free (IncrementalParser *);

int
incremental_parse (IncrementalParser *, const char *, int,
                   startElementNsSAX2Func,
                   endElementNsSAX2Func,
                   charactersSAXFunc);