bencoding编码解析器

类别:编程语言 点击:0 评论:0 推荐:

BT客户端开始一个下载首先要处理的就是torrent文件.
而torrent文件使用bencoding编码.
所以实现bencoding编码的解析器,就是第一步工作.

Bencoding is done as follows:
Strings are length-prefixed base ten followed by a colon and the string. For example \'4:spam\' corresponds to \'spam\'.
Integers are represented by an \'i\' followed by the number in base 10 followed by an \'e\'. For example \'i3e\' corresponds to 3 and \'i-3e\' corresponds to -3. Integers have no size limitation. \'i-0e\' is invalid. All encodings with a leading zero, such as \'i03e\', are invalid, other than \'i0e\', which of course corresponds to 0.
Lists are encoded as an \'l\' followed by their elements (also bencoded) followed by an \'e\'. For example \'l4:spam4:eggse\' corresponds to [\'spam\', \'eggs\'].
Dictionaries are encoded as a \'d\' followed by a list of alternating keys and their corresponding values followed by an \'e\'. For example, \'d3:cow3:moo4:spam4:eggse\' corresponds to {\'cow\': \'moo\', \'spam\': \'eggs\'} and \'d4:spaml1:a1:bee\' corresponds to {\'spam\': [\'a\', \'b\']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

下面是实现的bencoding解码器的VC++源代码:

// BEncode.h: interface for the CBEncode class. // ////////////////////////////////////////////////////////////////////// #if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_) #define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_ #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #pragma warning( disable : 4786 ) #pragma warning( disable : 4355 ) #include <list> #include <map> #include <string> #include <vector> using namespace std; enum BEncodeParserErrorCode { enm_BEncodeErr_noerr = 0,//没有错误 enm_BEncodeErr_errString,//错误的字符串 enm_BEncodeErr_errInt,//错误的整型数据 enm_BEncodeErr_errList,//错误的列表 enm_BEncodeErr_errDict,//错误的词典结构 enm_BEncodeErr_End,//文本结束 enm_BEncodeErr_unknown//未知错误 }; enum BEncodeObjectType { enum_BEncodeType_Objectbase = 0, enum_BEncodeType_String, enum_BEncodeType_Int, enum_BEncodeType_List, enum_BEncodeType_Dict, }; class CBEncodeObjectBase { public: CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();} virtual ~CBEncodeObjectBase(){}; void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;} public: BEncodeObjectType m_type; //对象类型 char * szPos; //对象在字符串中的位置 int ilen;//对象的数据长度 BEncodeParserErrorCode m_error;//错误值 }; class CBEncodeInt : public CBEncodeObjectBase { public: CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {} virtual ~CBEncodeInt(){} public: int m_iValue;//整型对象的值 }; class CBEncodeString : public CBEncodeObjectBase { public: CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;} virtual ~CBEncodeString(){} public: bool getstring(string & strValue) { if(m_error == enm_BEncodeErr_noerr && m_szData) { strValue.assign(m_szData,m_ilen); return true; } return false; } char * m_szData; int m_ilen; }; class CBEncodeList : public CBEncodeObjectBase { public: CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {} virtual ~CBEncodeList(){clear();} void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); } public: list<CBEncodeObjectBase*> m_listObj; }; class CBEncodeDict : public CBEncodeObjectBase { public: CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {} virtual ~CBEncodeDict(){clear();} CBEncodeObjectBase* getvalue(const char * szName) { map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName); if(it != m_mapObj.end()) return it->second; return NULL; } void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); m_mapObj.clear(); } public: map<string,CBEncodeObjectBase*> m_mapObj;// list<CBEncodeObjectBase*> m_listObj;//真正的对象保存在list中,list是一个name对象一个value对象.map只是一个映射表,引用了指针而已 }; class CBEncode { public: bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool parse(const char * szData); CBEncode(); virtual ~CBEncode(); void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); } public: list<CBEncodeObjectBase*> m_listObj; CBEncodeObjectBase* m_plastObj;//解析出来的最后一个对象 char * m_szTxt; }; #endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)

// BEncode.cpp: implementation of the CBEncode class. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "BEncode.h" ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// CBEncode::CBEncode() { m_plastObj = NULL; m_szTxt = NULL; } CBEncode::~CBEncode() { clear(); } bool CBEncode::parse(const char *szData) { if(szData == NULL||*szData==NULL) return false; clear(); m_szTxt = (char*)szData; char * szCurPos = (char*)szData; int iendpos; while(*szCurPos) { if(*szCurPos== 'i') { if(!readint(szCurPos,iendpos,m_listObj)) break;//遇到任何错误都终止整个解析 szCurPos+=iendpos; } else if(*szCurPos== 'l') { if(!readlist(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } else if(*szCurPos== 'd') { if(!readdict(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } else { if(!readstring(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } } if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr) return true; return false; } //从当前位置读取一个字符串 bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeString * pNewString = new CBEncodeString; pNewString->szPos = szCurPos; char szLen[20]; int i = 0; while(*szTmp>='0'&&*szTmp<='9') szLen[i++]=*(szTmp++); szLen[i]=0; if(*szTmp==':') { int ilen = atoi(szLen); if(ilen>0) { pNewString->m_szData = ++szTmp; pNewString->m_ilen = ilen; szTmp+=ilen; } else pNewString->m_error = enm_BEncodeErr_errString; } else pNewString->m_error = enm_BEncodeErr_errString; listObj.push_back(pNewString); iendpos = szTmp-szCurPos; m_plastObj = pNewString; m_plastObj->ilen = iendpos; return pNewString->m_error == enm_BEncodeErr_noerr?true:false; } //读取一个整型数据 bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeInt * pNewInt= new CBEncodeInt; pNewInt->szPos = szCurPos; if(*szTmp == 'i') { szTmp++; char szLen[20]; int i = 0; while(*szTmp>='0'&&*szTmp<='9') szLen[i++]=*(szTmp++); szLen[i]=0; if(*szTmp=='e') { pNewInt->m_iValue = atoi(szLen); ++szTmp; } else pNewInt->m_error = enm_BEncodeErr_errInt; } else pNewInt->m_error = enm_BEncodeErr_errInt; listObj.push_back(pNewInt); iendpos = szTmp-szCurPos; m_plastObj = pNewInt; m_plastObj->ilen = iendpos; return pNewInt->m_error == enm_BEncodeErr_noerr?true:false; }
//读取一个列表 bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeList * pNewList= new CBEncodeList; pNewList->szPos = szCurPos; if(*szTmp == 'l') { szTmp++; int ilistendpos; while(*szTmp!='e') { if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewList->m_listObj)) break;//遇到任何错误都终止整个解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewList->m_error = enm_BEncodeErr_errList; else szTmp++; } else pNewList->m_error = enm_BEncodeErr_errList; listObj.push_back(pNewList); iendpos = szTmp-szCurPos; m_plastObj = pNewList; m_plastObj->ilen = iendpos; return pNewList->m_error == enm_BEncodeErr_noerr?true:false; } //读取一个字典 bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeDict * pNewDict= new CBEncodeDict; pNewDict->szPos = szCurPos; if(*szTmp == 'd') { szTmp++; int ilistendpos; string strname; while(*szTmp!='e') { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; if(m_plastObj->m_type !=enum_BEncodeType_String) break; strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen); szTmp+=ilistendpos; if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewDict->m_listObj)) break;//遇到任何错误都终止整个解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj)); } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewDict->m_error = enm_BEncodeErr_errDict; else szTmp++; } else pNewDict->m_error = enm_BEncodeErr_errDict; listObj.push_back(pNewDict); iendpos = szTmp-szCurPos; m_plastObj = pNewDict; m_plastObj->ilen = iendpos; return pNewDict->m_error == enm_BEncodeErr_noerr?true:false; }


 

本文地址:http://com.8s8s.com/it/it23738.htm