~singpolyma/biboumi

3d92360310d8e35394109058ff723da57af5b380 — Florent Le Coz 10 years ago 8acd7a0
Use the Expat library directly instead of relying on expatpp

And now we handle namespaces, yay.
And a nice little test.
M CMakeLists.txt => CMakeLists.txt +5 -1
@@ 13,11 13,14 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -fsanitize=address")
#
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
find_package(Cryptopp REQUIRED)
include(FindEXPAT)
find_package(EXPAT REQUIRED)

include_directories("src/")
# the SYSTEM flag tells the compiler that we don't care about warnings
# coming from these headers.
include_directories(SYSTEM ${CRYPTO++_INCLUDE_DIR})
include_directories(SYSTEM ${EXPAT_INCLUDE_DIRS})

#
## utils


@@ 47,7 50,8 @@ target_link_libraries(irc network utils)
file(GLOB source_xmpp
  src/xmpp/*.[hc]pp)
add_library(xmpp STATIC ${source_xmpp})
target_link_libraries(xmpp bridge network utils ${CRYPTO++_LIBRARIES} expatpp)
target_link_libraries(xmpp bridge network utils
  ${CRYPTO++_LIBRARIES} ${EXPAT_LIBRARIES})

#
## bridge

M src/test.cpp => src/test.cpp +19 -1
@@ 10,7 10,7 @@
#include <utils/encoding.hpp>
#include <string.h>

#include <fstream>
#include <xmpp/xmpp_parser.hpp>

int main()
{


@@ 44,5 44,23 @@ int main()
  std::string coucou("\u0002\u0002COUCOU\u0003");
  remove_irc_colors(coucou);
  assert(coucou == "COUCOU");

  /**
   * XML parsing
   */
  XmppParser xml;
  const std::string doc = "<stream xmlns='stream_ns'><stanza b='c'>inner<child1/><child2 xmlns='child2_ns'/>tail</stanza></stream>";
  xml.add_stanza_callback([](const Stanza& stanza)
      {
        assert(stanza.get_name() == "stream_ns:stanza");
        assert(stanza["b"] == "c");
        assert(stanza.get_inner() == "inner");
        assert(stanza.get_tail() == "");
        assert(stanza.get_child("stream_ns:child1") != nullptr);
        assert(stanza.get_child("stream_ns:child2") == nullptr);
        assert(stanza.get_child("child2_ns:child2") != nullptr);
        assert(stanza.get_child("child2_ns:child2")->get_tail() == "tail");
      });
  xml.feed(doc.data(), doc.size(), true);
  return 0;
}

M src/xmpp/xmpp_component.cpp => src/xmpp/xmpp_component.cpp +20 -11
@@ 10,6 10,15 @@
#include <hex.h>
#include <sha.h>

#define STREAM_NS        "http://etherx.jabber.org/streams"
#define COMPONENT_NS     "jabber:component:accept"
#define MUC_NS           "http://jabber.org/protocol/muc"
#define MUC_USER_NS      MUC_NS"#user"
#define DISCO_NS         "http://jabber.org/protocol/disco"
#define DISCO_ITEMS_NS   DISCO_NS"#items"
#define DISCO_INFO_NS    DISCO_NS"#info"


XmppComponent::XmppComponent(const std::string& hostname, const std::string& secret):
  served_hostname(hostname),
  secret(secret),


@@ 21,11 30,11 @@ XmppComponent::XmppComponent(const std::string& hostname, const std::string& sec
                                                  std::placeholders::_1));
  this->parser.add_stream_close_callback(std::bind(&XmppComponent::on_remote_stream_close, this,
                                                  std::placeholders::_1));
  this->stanza_handlers.emplace("handshake",
  this->stanza_handlers.emplace(COMPONENT_NS":handshake",
                                std::bind(&XmppComponent::handle_handshake, this,std::placeholders::_1));
  this->stanza_handlers.emplace("presence",
  this->stanza_handlers.emplace(COMPONENT_NS":presence",
                                std::bind(&XmppComponent::handle_presence, this,std::placeholders::_1));
  this->stanza_handlers.emplace("message",
  this->stanza_handlers.emplace(COMPONENT_NS":message",
                                std::bind(&XmppComponent::handle_message, this,std::placeholders::_1));
}



@@ 49,8 58,8 @@ void XmppComponent::on_connected()
{
  std::cout << "connected to XMPP server" << std::endl;
  XmlNode node("stream:stream", nullptr);
  node["xmlns"] = "jabber:component:accept";
  node["xmlns:stream"] = "http://etherx.jabber.org/streams";
  node["xmlns"] = COMPONENT_NS;
  node["xmlns:stream"] = STREAM_NS;
  node["to"] = "irc.abricot";
  this->send_stanza(node);
}


@@ 62,7 71,7 @@ void XmppComponent::on_connection_close()

void XmppComponent::parse_in_buffer()
{
  this->parser.XML_Parse(this->in_buf.data(), this->in_buf.size(), false);
  this->parser.feed(this->in_buf.data(), this->in_buf.size(), false);
  this->in_buf.clear();
}



@@ 122,7 131,7 @@ void XmppComponent::send_stream_error(const std::string& name, const std::string
{
  XmlNode node("stream:error", nullptr);
  XmlNode error(name, nullptr);
  error["xmlns"] = "urn:ietf:params:xml:ns:xmpp-streams";
  error["xmlns"] = STREAM_NS;
  if (!explanation.empty())
    error.set_inner(explanation);
  error.close();


@@ 161,7 170,7 @@ void XmppComponent::handle_presence(const Stanza& stanza)
        bridge->join_irc_channel(iid, to.resource);
      else if (type == "unavailable")
        {
          XmlNode* status = stanza.get_child("status");
          XmlNode* status = stanza.get_child(MUC_USER_NS":status");
          bridge->leave_irc_channel(std::move(iid), status ? std::move(status->get_inner()) : "");
        }
    }


@@ 172,7 181,7 @@ void XmppComponent::handle_message(const Stanza& stanza)
  Bridge* bridge = this->get_user_bridge(stanza["from"]);
  Jid to(stanza["to"]);
  Iid iid(to.local);
  XmlNode* body = stanza.get_child("body");
  XmlNode* body = stanza.get_child(COMPONENT_NS":body");
  if (stanza["type"] == "groupchat")
    {
      if (to.resource.empty())


@@ 214,7 223,7 @@ void XmppComponent::send_user_join(const std::string& from, const std::string& n
  node["from"] = from + "@" + this->served_hostname + "/" + nick;

  XmlNode x("x");
  x["xmlns"] = "http://jabber.org/protocol/muc#user";
  x["xmlns"] = MUC_USER_NS;

  // TODO: put real values here
  XmlNode item("item");


@@ 235,7 244,7 @@ void XmppComponent::send_self_join(const std::string& from, const std::string& n
  node["from"] = from + "@" + this->served_hostname + "/" + nick;

  XmlNode x("x");
  x["xmlns"] = "http://jabber.org/protocol/muc#user";
  x["xmlns"] = MUC_USER_NS;

  // TODO: put real values here
  XmlNode item("item");

M src/xmpp/xmpp_parser.cpp => src/xmpp/xmpp_parser.cpp +42 -18
@@ 1,21 1,56 @@
#include <xmpp/xmpp_parser.hpp>
#include <xmpp/xmpp_stanza.hpp>

#include <iostream>
/**
 * Expat handlers. Called by the Expat library, never by ourself.
 * They just forward the call to the XmppParser corresponding methods.
 */

static void start_element_handler(void* user_data, const XML_Char* name, const XML_Char** atts)
{
  static_cast<XmppParser*>(user_data)->start_element(name, atts);
}

static void end_element_handler(void* user_data, const XML_Char* name)
{
  static_cast<XmppParser*>(user_data)->end_element(name);
}

static void character_data_handler(void *user_data, const XML_Char *s, int len)
{
  static_cast<XmppParser*>(user_data)->char_data(s, len);
}

/**
 * XmppParser class
 */

XmppParser::XmppParser():
  level(0),
  current_node(nullptr)
{
  // Create the expat parser
  this->parser = XML_ParserCreateNS("UTF-8", ':');
  XML_SetUserData(this->parser, static_cast<void*>(this));

  // Install Expat handlers
  XML_SetElementHandler(this->parser, &start_element_handler, &end_element_handler);
  XML_SetCharacterDataHandler(this->parser, &character_data_handler);
}

XmppParser::~XmppParser()
{
  if (this->current_node)
    delete this->current_node;
  XML_ParserFree(this->parser);
}

void XmppParser::startElement(const XML_Char* name, const XML_Char** attribute)
void XmppParser::feed(const char* data, const int len, const bool is_final)
{
  XML_Parse(this->parser, data, len, is_final);
}

void XmppParser::start_element(const XML_Char* name, const XML_Char** attribute)
{
  level++;



@@ 29,9 64,9 @@ void XmppParser::startElement(const XML_Char* name, const XML_Char** attribute)
    this->stream_open_event(*this->current_node);
}

void XmppParser::endElement(const XML_Char* name)
void XmppParser::end_element(const XML_Char* name)
{
  assert(name == this->current_node->get_name());
  (void)name;
  level--;
  this->current_node->close();
  if (level == 1)


@@ 50,18 85,12 @@ void XmppParser::endElement(const XML_Char* name)
    this->current_node->delete_all_children();
}

void XmppParser::charData(const XML_Char* data, int len)
void XmppParser::char_data(const XML_Char* data, int len)
{
  if (this->current_node->has_children())
    this->current_node->get_last_child()->set_tail(std::string(data, len));
    this->current_node->get_last_child()->add_to_tail(std::string(data, len));
  else
    this->current_node->set_inner(std::string(data, len));
}

void XmppParser::startNamespace(const XML_Char* prefix, const XML_Char* uri)
{
  std::cout << "startNamespace: " << prefix << ":" << uri << std::endl;
  this->namespaces.emplace(std::make_pair(prefix, uri));
    this->current_node->add_to_inner(std::string(data, len));
}

void XmppParser::stanza_event(const Stanza& stanza) const


@@ 82,11 111,6 @@ void XmppParser::stream_close_event(const XmlNode& node) const
    callback(node);
}

void XmppParser::endNamespace(const XML_Char* coucou)
{
  std::cout << "endNamespace: " << coucou << std::endl;
}

void XmppParser::add_stanza_callback(std::function<void(const Stanza&)>&& callback)
{
  this->stanza_callbacks.emplace_back(std::move(callback));

M src/xmpp/xmpp_parser.hpp => src/xmpp/xmpp_parser.hpp +23 -23
@@ 1,29 1,33 @@
#ifndef XMPP_PARSER_INCLUDED
# define XMPP_PARSER_INCLUDED

#include <functional>
#include <stack>

#include <xmpp/xmpp_stanza.hpp>

#include <expatpp.h>
#include <functional>

#include <expat.h>

/**
 * A SAX XML parser that builds XML nodes and spawns events when a complete
 * stanza is received (an element of level 2), or when the document is
 * opened (an element of level 1)
 * opened/closed (an element of level 1)
 *
 * After a stanza_event has been spawned, we delete the whole stanza. This
 * means that even with a very long document (in XMPP the document is
 * potentially infinite), the memory then is never exhausted as long as each
 * potentially infinite), the memory is never exhausted as long as each
 * stanza is reasonnably short.
 *
 * The element names generated by expat contain the namespace of the
 * element, a colon (':') and then the actual name of the element.  To get
 * an element "x" with a namespace of "http://jabber.org/protocol/muc", you
 * just look for an XmlNode named "http://jabber.org/protocol/muc:x"
 *
 * TODO: enforce the size-limit for the stanza (limit the number of childs
 * it can contain). For example forbid the parser going further than level
 * 20 (arbitrary number here), and each XML node to have more than 15 childs
 * (arbitrary number again).
 */
class XmppParser: public expatpp
class XmppParser
{
public:
  explicit XmppParser();


@@ 31,13 35,16 @@ public:

public:
  /**
   * Feed the parser with some XML data
   */
  void feed(const char* data, const int len, const bool is_final);
  /**
   * Add one callback for the various events that this parser can spawn.
   */
  void add_stanza_callback(std::function<void(const Stanza&)>&& callback);
  void add_stream_open_callback(std::function<void(const XmlNode&)>&& callback);
  void add_stream_close_callback(std::function<void(const XmlNode&)>&& callback);

private:
  /**
   * Called when a new XML element has been opened. We instanciate a new
   * XmlNode and set it as our current node. The parent of this new node is


@@ 46,7 53,7 @@ private:
   *
   * We spawn a stream_event with this node if this is a level-1 element.
   */
  void startElement(const XML_Char* name, const XML_Char** attribute);
  void start_element(const XML_Char* name, const XML_Char** attribute);
  /**
   * Called when an XML element has been closed. We close the current_node,
   * set our current_node as the parent of the current_node, and if that was


@@ 55,19 62,11 @@ private:
   * And we then delete the stanza (and everything under it, its children,
   * attribute, etc).
   */
  void endElement(const XML_Char* name);
  void end_element(const XML_Char* name);
  /**
   * Some inner or tail data has been parsed
   */
  void charData(const XML_Char* data, int len);
  /**
   * TODO use that.
   */
  void startNamespace(const XML_Char* prefix, const XML_Char* uri);
  /**
   * TODO and that.
   */
  void endNamespace(const XML_Char* prefix);
  void char_data(const XML_Char* data, int len);
  /**
   * Calls all the stanza_callbacks one by one.
   */


@@ 82,6 81,11 @@ private:
   */
  void stream_close_event(const XmlNode& node) const;

private:
  /**
   * Expat structure.
   */
  XML_Parser parser;
  /**
   * The current depth in the XML document
   */


@@ 98,10 102,6 @@ private:
  std::vector<std::function<void(const Stanza&)>> stanza_callbacks;
  std::vector<std::function<void(const XmlNode&)>> stream_open_callbacks;
  std::vector<std::function<void(const XmlNode&)>> stream_close_callbacks;
  /**
   * TODO: also use that.
   */
  std::stack<std::pair<std::string, std::string>> namespaces;
};

#endif // XMPP_PARSER_INCLUDED

M src/xmpp/xmpp_stanza.cpp => src/xmpp/xmpp_stanza.cpp +15 -0
@@ 72,16 72,31 @@ void XmlNode::set_tail(const std::string& data)
  this->tail = data;
}

void XmlNode::add_to_tail(const std::string& data)
{
  this->tail += data;
}

void XmlNode::set_inner(const std::string& data)
{
  this->inner = xml_escape(data);
}

void XmlNode::add_to_inner(const std::string& data)
{
  this->inner += xml_escape(data);
}

std::string XmlNode::get_inner() const
{
  return this->inner;
}

std::string XmlNode::get_tail() const
{
  return this->tail;
}

XmlNode* XmlNode::get_child(const std::string& name) const
{
  for (auto& child: this->children)

M src/xmpp/xmpp_stanza.hpp => src/xmpp/xmpp_stanza.hpp +16 -4
@@ 5,8 5,6 @@
#include <string>
#include <vector>

#include <expatpp.h>

std::string xml_escape(const std::string& data);

/**


@@ 52,16 50,30 @@ public:
   */
  void set_tail(const std::string& data);
  /**
   * Set the content of the inner, that is the text inside this node
   * TODO: escape it here.
   * Append the given data to the content of the tail. This exists because
   * the expat library may provide the complete text of an element in more
   * than one call
   */
  void add_to_tail(const std::string& data);
  /**
   * Set the content of the inner, that is the text inside this node.
   */
  void set_inner(const std::string& data);
  /**
   * Append the given data to the content of the inner. For the reason
   * described in add_to_tail comment.
   */
  void add_to_inner(const std::string& data);
  /**
   * Get the content of inner
   * TODO: unescape it here.
   */
  std::string get_inner() const;
  /**
   * Get the content of the tail
   */
  std::string get_tail() const;
  /**
   * Get a pointer to the first child element with that name
   */
  XmlNode* get_child(const std::string& name) const;