类xml数据格式解析

需要解析一种类xml的数据文件,数据格式1如下:

<head> //文件头
    <type>xtype</type>
    <condition>
        key1=value1
        key2=value2
    </condition>
    <mea>
        key3=value3
        key4=value4
    </mea>
    <xxxx>//多个
     ...
     </xxx>
</head>
<data> //数据域,多个
phi rcs ang
1    2    3
2    3    4
</data>
<data>
phi rcs ang
3    4    5
4    5    6
</data>
数据格式2:
#xtype
//comment
[condition]
    项1=值1
    项2=值2
[/condition]
[mea]
    key3=value3
    key4=value4
[/mea]
[data]
phi rcs ang
1    1    1
2    2    2
[/data]
[data]
phi rcs ang
3    1    1
4    2    2
[/data]

该数据格式类似xml,我们需要解析的是head中的所有标签,及标签中的键值对(key=value),并将data域中的数据保存成浮点型数组。

采用类似xml的解析方式,递归进行解析

 具体代码如下:

#ifndef MYPARSEGJBDATA_H_20170114
#define MYPARSEGJBDATA_H_20170114

#include <string>
#include <map>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;

/*file format1:
<head>
    <type>xtype</type>
    <conditon>
        key1=value1
        key2=value2
    </condition>
    <mea>
        key3=value3
        key4=value4
    </mea>
</head>
<data>
phi rcs ang
1    1    1
2    2    2
</data>
*/

typedef struct stMyDataLabel{
    string Label;//标签名称
    vector<string> Content;//对于<lable>content</label>的形式,纯字符串,非键值对
    vector<float>  Values;//对应数据域,一维数组表示二维数组;数组的列数为Contens'size
    map<string,string> KeyVal;//<label>内的键值对
    vector<stMyDataLabel> SubItems;//子节点列表

    stMyDataLabel(){
        Label = "";
        Content.clear();
        Values.clear();
        KeyVal.clear();
        SubItems.clear();
    }
    void Clear(){//递归清空内存
        for (vector<stMyDataLabel>::iterator itr = SubItems.begin();itr!=SubItems.end();itr++)
        {
            itr->Clear();
        }
        Content.clear();
        Values.clear();
        KeyVal.clear();
        SubItems.clear();
    }

    friend ostream& operator <<(ostream& myout,stMyDataLabel& m_Data){//输出流
        myout<<m_Data.Label<<endl;
        for (vector<string>::iterator sitr = m_Data.Content.begin(); sitr != m_Data.Content.end(); sitr++)
        {
            myout<<"\t"<<*sitr<<"\t";
        }
        myout<<endl;
        for (auto mitr = m_Data.KeyVal.begin(); mitr != m_Data.KeyVal.end(); mitr++)
        {
            myout<<"\t"<<mitr->first<<" = "<<mitr->second<<"\t";
        }
        myout<<endl;

        int dsize = m_Data.Content.size();
        int curdidx = 0;
        for(vector<float>::iterator fitr = m_Data.Values.begin(); fitr != m_Data.Values.end(); fitr++){
            myout<<"\t"<<*fitr<<"\t";
            curdidx++;
            if(curdidx >= dsize){
                cout<<endl;
                curdidx=0;
            }
        }
        myout<<endl;
        for (vector<stMyDataLabel>::iterator itr = m_Data.SubItems.begin();itr != m_Data.SubItems.end(); itr++)
        {
            myout<<*itr<<endl;
        }
        return myout;
    }
}stMyDataLabel;

class MyDataParse{//数据类型1的解析类
public:
    vector<stMyDataLabel> m_Data;
    stMyDataLabel m_Header;
    char m_StartLabel;
    char m_EndLabel;
public:
    MyDataParse(){
        m_StartLabel = '<';
        m_EndLabel = '>';
    }
    void Init(){
        m_Header.Clear();
        for (vector<stMyDataLabel>::iterator itr = m_Data.begin();itr!=m_Data.end();itr++)
        {
            itr->Clear();
        }
    }

    void Print(ostream& myout){
        myout<<m_Header<<endl;
        for (vector<stMyDataLabel>::iterator ditr = m_Data.begin();ditr!=m_Data.end();ditr++)
            myout<<*ditr<<endl;
    }

    bool Parse(char* filename){
        if(filename == NULL) return false;
        ifstream myin(filename,ios::in);
        bool flag = ParseLabel(myin,m_Header);
        if(!flag) return false;
        while(!myin.eof()){
            stMyDataLabel data;
            flag = ParseLabel(myin,data);
            m_Data.push_back(data);
        }
        return flag;
    }

    bool ParseLabel(istream& myin,stMyDataLabel& label){
        string str;
        bool flag = true;
        do{
            myin >> str;
            int sidx=0,eidx=0;
            sidx = str.find(m_StartLabel);
            eidx = str.find(m_EndLabel);
            if(sidx >= 0 && eidx >= 0 && eidx > sidx){//<lable> or </label>
                if(str.at(sidx+1) != '/')//start of <label>
                {
                    //string substr(int pos = 0,int n = npos) const;//返回pos开始的n个字符组成的字符串
                    bool isSub=false;
                    string lableName = str.substr(sidx+1,eidx-sidx-1);
                    if(label.Label != "") isSub = true;//start of subitem's <label>

                    //find </label>
                    int sidx2=0,eidx2=0;
                    sidx2 = str.rfind(m_StartLabel);
                    eidx2 = str.rfind(m_EndLabel);                
                    if(eidx != eidx2 && str.at(sidx2+1) == '/')//<label>content</label>, has </label>
                    {
                        string strelab = str.substr(sidx2+2,eidx2-sidx2-2);
                        if(strelab == lableName){
                            if(isSub){
                                stMyDataLabel sublabel;
                                sublabel.Label = lableName;
                                sublabel.Content.clear();
                                sublabel.Content.push_back( str.substr(eidx+1,sidx2-eidx-1) );
                                label.SubItems.push_back(sublabel);
                            }
                            else{
                                label.Label = lableName;
                                label.Content.clear();
                                label.Content.push_back( str.substr(eidx+1,sidx2-eidx-1) );
                            }
                            continue;
                        }
                        else{
                            return false;
                        }                        
                    }
                    else{
                        if(isSub){
                            stMyDataLabel sublabel;
                            sublabel.Label = lableName;
                            label.SubItems.push_back(sublabel);
                            int curIdx = label.SubItems.size()-1;
                            bool bres = ParseLabel(myin,label.SubItems.at(curIdx));
                            if(!bres) return false;//subitem format error
                            else continue;//
                        }
                        else
                            label.Label = lableName;
                    }
                }
                else{// </lable>
                    string elabel = str.substr(sidx+2,eidx-sidx-2);
                    if(elabel == label.Label){//end of this label
                        return true;
                    }
                    else{//format error
                        return false;
                    }
                }
            }
            else{//content
                if(label.Label == "data" || label.Label == "DATA"){//data block
                    // 判断字符串是不是数字
                    stringstream sin(str);
                    float val;
                    if(!(sin >> val))//不是数字
                        label.Content.push_back(str);
                    else
                        label.Values.push_back(val);
                }
                else//header
                {
                    int idx = str.find('=');
                    if(idx >= 0){//key=value
                        string strkey = str.substr(0,idx);
                        string strval = str.substr(idx+1);
                        label.KeyVal.insert(make_pair(strkey,strval));
                    }
                    else{
                        label.Content.push_back(str);
                    }
                }
            }
        }while(flag);        
    }
    // 判断字符串是不是数字
    bool isNum(string str)
    {
        stringstream sin(str);
        double d;
        char c;
        if(!(sin >> d))
            return false;
        if (sin >> c)
            return false;
        return true;
    }
};

/*file format2:
#xtype
//comment
[condition]
    项1=值1
    项2=值2
[/condition]
[mea]
    key3=value3
    key4=value4
[/mea]
[data]
phi rcs ang
1    1    1
2    2    2
[/data]
*/
class MyDataParse2{//数据类型2的解析类
public:
    stMyDataLabel m_Data;//single root
    char m_StartLabel;
    char m_EndLabel;
public:
    MyDataParse2(){
        m_StartLabel = '[';
        m_EndLabel = ']';
    }
    void Init(){
        m_Data.Clear();
    }

    void Print(ostream& myout){
        myout<<m_Data;
    }

    bool Parse(char* filename){
        if(filename == NULL) return false;
        ifstream myin(filename,ios::in);
        string str;
        myin>>str;
        if(str.at(0) == '#'){
            m_Data.Label = str.substr(1);
        }
        bool flag = true;
        while(!myin.eof() && flag){
            flag = ParseLabel(myin,m_Data);
        }
        return flag;
    }

    bool ParseLabel(istream& myin,stMyDataLabel& label){
        string str;
        bool flag = true;
        do{
            myin >> str;
            if(str.substr(0,2) == "//")//comment,skip
                continue;

            int sidx=0,eidx=0;
            sidx = str.find(m_StartLabel);
            eidx = str.find(m_EndLabel);
            if(sidx >= 0 && eidx >= 0 && eidx > sidx){//[lable] or [/label]
                if(str.at(sidx+1) != '/')//start of <label>
                {
                    //string substr(int pos = 0,int n = npos) const;//返回pos开始的n个字符组成的字符串
                    bool isSub=false;
                    string lableName = str.substr(sidx+1,eidx-sidx-1);
                    if(label.Label != "") isSub = true;//start of subitem's [label]

                    if(isSub){
                        stMyDataLabel sublabel;
                        sublabel.Label = lableName;
                        label.SubItems.push_back(sublabel);
                        int curIdx = label.SubItems.size()-1;
                        bool bres = ParseLabel(myin,label.SubItems.at(curIdx));
                        if(!bres) return false;//subitem format error
                        else continue;//
                    }
                    else
                        label.Label = lableName;
                }
                else{// </lable>
                    string elabel = str.substr(sidx+2,eidx-sidx-2);
                    if(elabel == label.Label){//end of this label
                        return true;
                    }
                    else{//format error
                        return false;
                    }
                }
            }
            else{//content
                if(label.Label == "data" || label.Label == "DATA"){//data block
                    // 判断字符串是不是数字
                    stringstream sin(str);
                    float val;
                    if(!(sin >> val))//不是数字
                        label.Content.push_back(str);
                    else
                        label.Values.push_back(val);
                }
                else//header
                {
                    int idx = str.find('=');
                    if(idx >= 0){//key=value
                        string strkey = str.substr(0,idx);
                        string strval = str.substr(idx+1);
                        label.KeyVal.insert(make_pair(strkey,strval));
                    }
                    else{
                        label.Content.push_back(str);
                    }
                }
            }
        }while(!myin.eof());        
    }
};
#endif

 

测试(命令行方式)

MyDataParse parse;
parse.Init();
parse.Parse("test.txt");//待解析的数据文件
parse.Print(cout);

 测试数据:

//test11.txt
<head>
    <type>xtype</type>
    <condition>
        key1=value1
        key2=value2
    </condition>
    <mea>
        key3=value3
        key4=value4
    </mea>
</head>
<data>
phi rcs ang
1    2    3
2    3    4
</data>
<data>
phi rcs ang
3    4    5
4    5    6
</data>
//test12.txt
<head>
    <type>xtype</type>
    <condition>
        key1=value1
        key2=value2
    </condition>
    <group>
        <part1>
            key3=value3
            key4=value4
        </part1>
        <part2>
            key3=value3
            key4=value4
        </part2>
    </group>
</head>
<data>
phi rcs ang
1    2    3
2    3    4
</data>
<data>
phi rcs ang
3    4    5
4    5    6
</data>
//test21.txt
#xtype
//comment
[condition]
    项1=值1
    项2=值2
[/condition]
[mea]
    key3=value3
    key4=value4
[/mea]
[data]
phi rcs ang
1    1    1
2    2    2
[/data]
[data]
phi rcs ang
3    1    1
4    2    2
[/data]
//test22.txt
#xtype
//comment
[condition]
    项1=值1
    项2=值2
[/condition]
[group]
    [part1]
        key3=value3
    [/part1]
    [part2]
        key4=value4
        key5=value5
    [/part2]
[/group]
[data]
phi rcs ang
1    1    1
2    2    2
[/data]
[data]
phi rcs ang
3    1    1
4    2    2
[/data]

 

posted @ 2017-01-15 14:23  小小鸟的大梦想  阅读(825)  评论(0编辑  收藏  举报