~singpolyma/haskell-libxml-sax

f93a9ad06ea9633525604ba44a1e62639fd026c7 — John Millikin 11 years ago 8867930
Free parser input streams in parseComplete.

These buffers can be quite large, so if they are retained until the 
Haskell garbage collector runs hslibxml_free_parser, then memory
use might become very large when parsing many documents in a loop.

Fixes a memory growth issue reported by Bram Schuur.
3 files changed, 32 insertions(+), 1 deletions(-)

M cbits/hslibxml-shim.c
M cbits/hslibxml-shim.h
M lib/Text/XML/LibXML/SAX.hs
M cbits/hslibxml-shim.c => cbits/hslibxml-shim.c +25 -0
@@ 4,6 4,10 @@
#include <string.h>
#include <stdio.h>

/* for hslibxml_parse_complete */
#include <libxml/parserInternals.h>


typedef struct UserData UserData;
struct UserData
{


@@ 55,6 59,27 @@ hslibxml_free_parser(xmlParserCtxt *ctx)
}

int
hslibxml_parse_complete(xmlParserCtxt *ctx) {
	xmlParserInputPtr input;
	int rc;
	
	rc = xmlParseChunk(ctx, NULL, 0, 1);
	
	/* Optimization: delete input stream buffers when there's nothing
	 * more to be parsed.
	 *
	 * These buffers can be quite large, so if they are retained until the
	 * Haskell garbage collector runs hslibxml_free_parser, then memory
	 * use might become very large when parsing many documents in a loop.
	 */
	while ((input = inputPop(ctx)) != NULL) {
		xmlFreeInputStream(input);
	}
	
	return rc;
}

int
hslibxml_want_callback(xmlParserCtxt *ctx, void *cb_ctx)
{
	if (ctx->replaceEntities)

M cbits/hslibxml-shim.h => cbits/hslibxml-shim.h +3 -0
@@ 13,6 13,9 @@ void
hslibxml_free_parser(xmlParserCtxt *ctx);

int
hslibxml_parse_complete(xmlParserCtxt *ctx);

int
hslibxml_want_callback(xmlParserCtxt *ctx, void *cb_ctx);

internalSubsetSAXFunc

M lib/Text/XML/LibXML/SAX.hs => lib/Text/XML/LibXML/SAX.hs +4 -1
@@ 132,7 132,7 @@ parseBytes p bytes = parseImpl p $ \h ->
-- closed correctly.
-- 
parseComplete :: Parser m -> m ()
parseComplete p = parseImpl p (\h -> cParseChunk h nullPtr 0 1)
parseComplete p = parseImpl p (\h -> cParseComplete h)

-- Callbacks {{{



@@ 635,6 635,9 @@ foreign import ccall unsafe "hslibxml-shim.h hslibxml_free_parser"
foreign import ccall safe "libxml/parser.h xmlParseChunk"
	cParseChunk :: Ptr Context -> CString -> CInt -> CInt -> IO CInt

foreign import ccall safe "hslibxml-shim.h hslibxml_parse_complete"
	cParseComplete :: Ptr Context -> IO CInt

foreign import ccall safe "libxml/parser.h xmlStopParser"
	cStopParser :: Ptr Context -> IO ()