XML在web的应用是很广泛的,但对于普通c++程序员恐怕用得不多,xml灵活的格式使得一些设置文件描述变得很容易,但是应用他总是困难的,网络上XML解析器庞大的吓人,如果为了解析很简单的XML也要学习一大堆库有点恐怖,反正我是没兴趣,用MSXML版本的不同也很烦人,COM的应用也很啰嗦,所以我写了一个简单的解析器,可以供简单应用,高手就不要看了,这个解析器只支持XML的子集如下:(用于设置文件是足够了,还可以扩展)
<?xml?> <根元素> <元素 属性列/> <元素 属性列/> <元素 属性列/>文本</元素> <元素><子元素>文本</子元素><子元素>文本</子元素></元素> </根元素>
可以解析成
<SXmlUnknow> <SXMLElement> <SXMLElement 属性列/> <SXMLElement 属性列/> <SXMLElement 属性列/>SXMLText</元素> <SXMLElement><SXMLElement>SXMLText</SXMLElement><SXMLElement>SXMLText</SXMLElement></SXMLElement> </SXMLElement>
例:
解析器源文件:SXML.h
/* 支持XML的子集只支持元素、属性和文本,可用于简单的XML设置文件的解析,采用标准C++不依赖平台及编译器 支持类似文档: <SXMLElement> <SXMLElement 属性列/> <SXMLElement 属性列/> <SXMLElement 属性列/>SXMLText</元素> <SXMLElement><SXMLElement>SXMLText</SXMLElement><SXMLElement>SXMLText</SXMLElement></SXMLElement> </SXMLElement>
提供两个函数用于实体引用, SXmlEntityXml 用于 "</">&/'" ======> {"<", """, ">", "&", "'"} SXmlEntityParse 用于 {"<", """, ">", "&", "'"} =====> "</">&/'" 测试版 llbird wushaojian@21cn.com http://blog.csdn.net/wujian53 */
#ifndef _SXML_H #define _SXML_H #pragma warning(disable: 4530) #pragma warning(disable: 4786) #include <map> #include <iterator> #include <fstream> #include <vector> #include <string> #include <algorithm> #include <sstream> #include <exception> #include <list> using namespace std;
struct AttributeType { string _Text; template<typename T> T operator=(T val){ stringstream ss; ss << val; _Text = ss.str(); return val; } AttributeType& operator=(AttributeType& val){ _Text = val._Text; return val; } operator const char*() { return _Text.c_str(); } operator string() { return _Text; } operator int() { return atoi(_Text.c_str()); } operator long() { return atol(_Text.c_str()); } operator float() { return atof(_Text.c_str()); } operator double() { return atof(_Text.c_str()); } }; inline ostream& operator<<(ostream &out, AttributeType x){ return out << x._Text;} inline istream& operator>>(istream &in, AttributeType x) { return in >> x._Text; }
#ifdef _USE_STRING typedef string ValueType; #else typedef AttributeType ValueType; #endif
enum SXML_Type{ XML_UNKNOW, XML_ELEMENT, XML_PI, XML_COMMENT, XML_Text, XML_CHAR_DATA, XML_DOC_Type, XML_DOCUMENT}; ///异常 class SXmlExecption : public exception { string _Src; public: SXmlExecption(string s) : _Src(s) { } ~SXmlExecption()throw() { } const char* what() const throw() { return _Src.c_str(); } }; //除去头尾空格 inline void trim_c_str(const char *&fptr, const char *&lptr) { while(fptr<lptr && isspace(*fptr)) fptr++; while(fptr<lptr && isspace(*(lptr-1))) lptr--; } //除去头尾空格 inline string& trim_string(string& str) { string::size_type pos; for(pos=0; pos<str.length() && isspace(str.at(pos)); pos++); str.erase(0, pos); for(pos=str.length()-1; pos>=0 && isspace(str.at(pos)); pos--); str.erase(pos + 1); return str; } ///将字符转换为实体引用 '<' --> "<" inline string SXmlEntityXml(string str) { char *entity[] = {"lt", "quot", "gt", "amp", "apos"}; char *ref = "</">&/'", *f; for(string::size_type pos=0; pos < str.length(); pos++) if((f = strchr(ref, str[pos]))) str.replace(pos, 1, string("&") + entity[f-ref] + ";"); return str; } ///将实体引用转换为字符 "<" --> '<' inline string SXmlEntityParse(string str) { char *entity[] = {"lt", "quot", "gt", "amp", "apos"}; char *ref = "</">&/'", i; string r; for(string::size_type pos = 0, bpos, epos, rlen = 1; (bpos=str.find('&', pos))!=string::npos; pos = bpos + rlen) { epos = str.find(';', bpos); if(epos == string::npos) throw SXmlExecption("找不到实体引用的右边界';'"); r.assign(str.begin()+bpos+1, str.begin()+epos); for(i = 0; i < 5; i++) if(r == entity[i]) { str.replace(bpos, epos - bpos + 1, 1, ref[i]); break; } if(i == 5) throw SXmlExecption("不支持的实体引用!"); } return str; } ///属性列 struct SXmlAttribute { map<string, ValueType> _Map; void clear() { _Map.clear(); } map<string, ValueType>::size_type size() { return _Map.size(); } ValueType& value(string key) { return _Map[key]; } string xml() { string str; for(map<string, ValueType>::iterator iter=_Map.begin(); iter!=_Map.end(); iter++) str += " " + iter->first + "=/"" + (string)(iter->second) + "/""; return str; } ///属性列处理: key1="val_1" key2="val_2" key3="&#XXX;" void parse(const char *first, const char *last) { trim_c_str(first, last); for(const char *ptr=first, *fptr, *L, *R; ptr < last; ptr=R+1) { if((fptr = find(ptr, last, '='))==last) break; trim_c_str(ptr, fptr); if((L = find(fptr+1, last, '/"')) != last) R = find(L+1, last, '/"'); if(L==last || R==last) break; _Map[string(ptr, fptr)] = string(L+1, R); } } }; ///节点基类 struct SXmlNode { SXML_Type _Type; SXmlNode* _pParent; string _Text; SXmlNode(SXmlNode *parent=NULL) : _pParent(parent) { } virtual ~SXmlNode(){} SXmlNode* get_parent() { return _pParent; } SXML_Type& type() { return _Type; } string& operator=(const char *s) { return _Text=s, _Text; } string& operator=(string& s) { return _Text=s, _Text; } virtual string xml() { return _Text; } //>内存数据转换为XML operator string&() { return _Text; } void parse(const char *p) { parse(p, p+strlen(p)); } //>解析XML void parse(const char *f, const char *l) { _Text.assign(f, l); } }; ///字符块节点 struct SXmlText : SXmlNode { SXmlText(SXmlNode *parent=NULL) : SXmlNode(parent) { _Type = XML_Text; } }; ///其他节点 struct SXmlUnknow : SXmlNode { SXmlUnknow(SXmlNode *parent=NULL) : SXmlNode(parent) { _Type = XML_UNKNOW; } };
#ifdef _USE_LIST typedef list<SXmlNode*> SXmlNodeList ; #else typedef vector<SXmlNode*> SXmlNodeList ; #endif ///元素节点 struct SXmlElement : public SXmlNode { SXmlNodeList _pNodes; SXmlAttribute _Attributes; typedef SXmlNodeList::iterator iterator; SXmlElement(SXmlNode *parent=NULL) : SXmlNode(parent) { _Type = XML_ELEMENT; } virtual ~SXmlElement() { clear(); } string& tag() { return _Text; } SXmlNodeList::size_type size() { return _pNodes.size(); } SXmlNodeList& nodes() { return _pNodes; } iterator begin() { return _pNodes.begin(); } iterator end() { return _pNodes.end(); } ValueType& value(string key) { return _Attributes.value(key); } ValueType& operator[](string key) { return _Attributes.value(key); } SXmlElement& operator =(const SXmlElement& x) { _Text = x._Text; _pNodes = x._pNodes; return *this; } SXmlNode& at(SXmlNodeList::size_type n = 0) { if(n<0 || n>=size()) throw SXmlExecption("子节点编号超出范围!"); SXmlNodeList::iterator iter = _pNodes.begin(); advance(iter, n); return *(*iter); } SXmlNodeList::size_type count() { return size(); } SXmlNodeList::size_type count(string tag_name) { SXmlNodeList::iterator iter; SXmlNodeList::size_type i; for(iter = _pNodes.begin(), i = 0; iter != _pNodes.end(); iter++) if((*iter)->_Type==XML_ELEMENT&& (*iter)->_Text==tag_name ) i++; return i; } SXmlElement& newItem(string tag_name) { string &str = trim_string(tag_name); if(!str.length()) throw SXmlExecption("元素节点标签不应为空字符串!"); SXmlElement* p = new SXmlElement(this); p->tag() = str; _pNodes.push_back((SXmlNode*)p); return (*p); } SXmlElement& item(string name, SXmlNodeList::size_type n = 0) { SXmlNodeList::iterator iter; SXmlNodeList::size_type i; for(iter = _pNodes.begin(), i = 0; iter != _pNodes.end(); iter++) if((*iter)->_Type==XML_ELEMENT&& (*iter)->_Text==name && i++==n) return *((SXmlElement*)(*iter)); throw SXmlExecption("找不到/"" + name +"/"子节点!"); } string& text() { for(SXmlNodeList::iterator iter = _pNodes.begin(); iter != _pNodes.end(); iter++) { if((*iter)->_Type == XML_Text) return (string&)(*((SXmlText*)(*iter))); } SXmlNode* p = (SXmlNode*)new SXmlText(this); _pNodes.insert(_pNodes.begin(), 1, p); return (string&)(*p); } ///清除所有节点 void clear() { _Attributes.clear(); for(SXmlNodeList::iterator iter=_pNodes.begin(); iter!=_pNodes.end(); iter++) delete (*iter); _pNodes.clear(); } virtual string xml() { string str; if(type() == XML_ELEMENT) { str = string("<") + _Text; if(_Attributes.size()) str += _Attributes.xml(); if(_pNodes.size()) { SXmlNodeList::iterator iter; for(iter=_pNodes.begin(), str += '>'; iter!=_pNodes.end(); iter++) str += (*iter)->xml(); str += "</" + _Text + ">"; } else str += "/>"; } else if(_pNodes.size()) { SXmlNodeList::iterator iter; for(iter=_pNodes.begin(); iter!=_pNodes.end(); iter++) str += (*iter)->xml(); } return str; } void parse(const char *str) { parse(str, str + strlen(str)); } void parse(const char *first, const char *last) { const char *ptr = first;//ptr:base pointer const char *fptr, *nptr, *tptr; //fptr:find pointer nptr:next pointer tptr : temp pointer SXmlNode *newptr;//create SXmlNode while(ptr < last) { fptr = find(ptr, last, '<'); if(ptr != fptr) { newptr = new SXmlText(this); newptr->_Text.assign(ptr, fptr); _pNodes.push_back(newptr); } if(fptr == last) break; nptr = find(fptr, last, '>'); if(nptr == last) throw SXmlExecption(string(fptr, nptr+1) + "找不到标签的右边界'>'"); switch(*(fptr + 1)) { case '?': newptr = new SXmlUnknow(this); newptr->parse(fptr, nptr + 1); _pNodes.push_back(newptr); ptr = nptr + 1; break; case '!': if(!(*(fptr + 2)=='-' && *(fptr + 3)=='-'))//不是寻常的注释 { const char *cdata_L = "CDATA", *cdata_R = "]]>",*doc_Type_L = "DOCTYPE", *doc_Type_R = "]>"; if((tptr = search(fptr, nptr, cdata_L, cdata_L+4)) != nptr) if((tptr = search(tptr, last, cdata_R, cdata_R+3)) != last) nptr = tptr + 2; else throw SXmlExecption("CDataSection can not find /"]]>/""); else if((tptr = search(fptr, nptr, doc_Type_L, doc_Type_L+7)) != nptr) { if((tptr = search(tptr, last, doc_Type_R, doc_Type_R+2)) != last) nptr = tptr + 1; else throw SXmlExecption("DOCTYPE can not find /"]>/""); } } newptr = new SXmlUnknow(this); newptr->parse(fptr, nptr + 1); _pNodes.push_back(newptr); ptr = nptr + 1; break; default: char find_str[] = "/n/r/t/x20/>"; SXmlElement *new_elem = new SXmlElement(this); _pNodes.push_back(new_elem); tptr = find_first_of(fptr, nptr, find_str, find_str + 6); new_elem->_Text = string(fptr+1, tptr-fptr-1); if(*(nptr-1) == '/')//<elemet .../> { new_elem->_Attributes.parse(tptr, nptr-1); ptr = nptr + 1; } else //</elemet> { new_elem->_Attributes.parse(tptr, nptr); string str = string("</") + new_elem->_Text; tptr = search(nptr+1, last, str.begin(), str.end()); if(tptr == last) throw SXmlExecption(string(fptr, nptr+1) + "找不到结束标签</element>"); new_elem->parse(nptr+1, tptr); nptr = find(tptr, last, '>'); if(nptr == last) throw SXmlExecption(string(fptr, nptr+1) + "找不到标签的右边界'>'"); ptr = nptr + 1; } } //switch(*(fptr + 1)) mean: <X }//while(ptr < last) } }; ///DOM解析 class SXmlDOM : public SXmlElement { public: SXmlDOM() { _Type = XML_DOCUMENT; } void parseFile(string file_name) { vector<char> buf; ifstream rf(file_name.c_str()); if(rf) { rf >> noskipws; copy(istream_iterator<char>(rf), istream_iterator<char>(), back_inserter(buf)); clear(); parse(&*buf.begin(), &*buf.end()); } else throw SXmlExecption("无法打开读入文件!"); } void saveFile(string file_name) { ofstream rf(file_name.c_str()); if(rf) rf << xml(); else throw SXmlExecption("无法打开写入文件!"); } SXmlElement& root() { for(SXmlNodeList::iterator iter = _pNodes.begin(); iter != _pNodes.end(); iter++) if((*iter)->_Type == XML_ELEMENT) return *((SXmlElement*)(*iter)); throw SXmlExecption("找不到根元素!"); } };
#endif //_SXML_H
|