think more more

导航

c++引擎中xml库的使用选择

c++引擎中xml库的使用选择

最近需要在c++引擎中使用xml解析及生成功能,对选择xml库的过程记录一下

xml benchmark

  • 参考 XML Benchmark Results 10.10.2009 的结果 初步选择使用RapidXML
  • 查看RapidXML自己的benchmark数据,性能接近strlen

使用示例

参考各种数据对比,决定使用RapidXML,解析及生成的示例代码如下

#include <iostream>
#include <string>
#include <vector>
#include <sstream> // for stringstream

#include "rapidxml.hpp"
#include "rapidxml_print.hpp"

using namespace std;

void parse() {
    string s =
        "<xml>"
        "<doc length=\"5\">"
        "<para id=\"0\" offset=\"0\">"
        "<sent id=\"0\" cont=\"apple\">"
        "<word id=\"0\" cont=\"apple\" pos=\"n\" ne=\"O\" head=\"2\" relation=\"ATT\"/>"
        "</sent>"
        "</para>"
        "</doc>"
        "</xml>";

    vector<char> xml(s.begin(), s.end());
    xml.emplace_back('\0');
    rapidxml::xml_document<> xml_doc;
    try {
        xml_doc.parse<rapidxml::parse_default>(xml.data());

        auto dump_attribute = [](rapidxml::xml_node<> * node) {
            for (rapidxml::xml_attribute<> * attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute()) {
                std::cout << "    attribute name=" << attribute->name() << " value=" << attribute->value() << endl;
            }
        };
        rapidxml::xml_node<>* xml4nlp = xml_doc.first_node("xml");
        for (rapidxml::xml_node<> * doc = xml4nlp->first_node("doc"); doc; doc = doc->next_sibling()) {
            for (rapidxml::xml_node<>* para = doc->first_node("para"); para; para = para->next_sibling()) {
                for (rapidxml::xml_node<>* sent = para->first_node("sent"); sent; sent = sent->next_sibling()) {
                    for (rapidxml::xml_node<>* word = sent->first_node("word"); word; word = word->next_sibling()) {
                        dump_attribute(word);
                    }
                }
            }
        }
    }
    catch (rapidxml::parse_error e) {
        std::cout << e.what() << std::endl;
    }
}

void generate() {
    // 1.DOM
    rapidxml::xml_document<> doc;

    // 2.node_declaration
    rapidxml::xml_node<>* declaration = doc.allocate_node(rapidxml::node_declaration);
    declaration->append_attribute(doc.allocate_attribute("version", "1.0"));
    declaration->append_attribute(doc.allocate_attribute("encoding", "UTF-8"));
    doc.append_node(declaration);

    // 3. node_doctype
    rapidxml::xml_node<>* document_type = doc.allocate_node(rapidxml::node_doctype, "name", "document");
    doc.append_node(document_type);

    // 4.node_element
    rapidxml::xml_node<>* root = doc.allocate_node(rapidxml::node_element, "xml");
    doc.append_node(root);

    // 5.node_comment
    rapidxml::xml_node<>* comment = doc.allocate_node(rapidxml::node_comment, 0, "This is a node for comment!");
    root->append_node(comment);

    rapidxml::xml_node<>* para = doc.allocate_node(rapidxml::node_element, "para");
    root->append_node(para);

    // 6.set attribute
    rapidxml::xml_node<>* sent = doc.allocate_node(rapidxml::node_element, "sent");
    para->append_node(sent);
    sent->append_attribute(doc.allocate_attribute("id", doc.allocate_string("0")));
    sent->append_attribute(doc.allocate_attribute("cont", doc.allocate_string("apple")));

    stringstream output;
    output << doc;
    cout << output.str() << endl;
}

int main(int argc, char** argv) {
    parse();
    //generate();
    return 0;
}

Reference

posted on 2021-01-21 11:11  duothink  阅读(652)  评论(0)    收藏  举报