00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "gloox.h"
00016
00017 #include "parser.h"
00018
00019 namespace gloox
00020 {
00021
00022 Parser::Parser( TagHandler *ph )
00023 : m_tagHandler( ph ), m_current( 0 ), m_root( 0 ), m_state( Initial ),
00024 m_preamble( 0 ), m_quote( false )
00025 {
00026 }
00027
00028 Parser::~Parser()
00029 {
00030 delete m_root;
00031 }
00032
00033 bool Parser::feed( const std::string& data )
00034 {
00035 std::string::const_iterator it = data.begin();
00036 for( ; it != data.end(); ++it )
00037 {
00038 const unsigned char c = (*it);
00039
00040
00041 if( !isValid( c ) )
00042 {
00043 cleanup();
00044 return false;
00045 }
00046
00047 switch( m_state )
00048 {
00049 case Initial:
00050 m_tag = "";
00051 if( isWhitespace( c ) )
00052 break;
00053
00054 switch( c )
00055 {
00056 case '<':
00057 m_state = TagOpening;
00058 break;
00059 case '>':
00060 default:
00061
00062
00063 break;
00064 }
00065 break;
00066 case TagOpening:
00067 if( isWhitespace( c ) )
00068 break;
00069
00070 switch( c )
00071 {
00072 case '<':
00073 case '>':
00074 case '!':
00075 cleanup();
00076 return false;
00077 break;
00078 case '/':
00079 m_state = TagClosingSlash;
00080 break;
00081 case '?':
00082 m_state = TagNameCollect;
00083 m_preamble = 1;
00084 break;
00085 default:
00086 m_tag += c;
00087 m_state = TagNameCollect;
00088 break;
00089 }
00090 break;
00091 case TagNameCollect:
00092 if( isWhitespace( c ) )
00093 {
00094 m_state = TagNameComplete;
00095 break;
00096 }
00097
00098 switch( c )
00099 {
00100 case '<':
00101 case '?':
00102 case '!':
00103 cleanup();
00104 return false;
00105 break;
00106 case '/':
00107 m_state = TagOpeningSlash;
00108 break;
00109 case '>':
00110 addTag();
00111 m_state = TagInside;
00112 break;
00113 default:
00114 m_tag += c;
00115 break;
00116 }
00117 break;
00118 case TagInside:
00119 m_tag = "";
00120 switch( c )
00121 {
00122 case '<':
00123 addCData();
00124 m_state = TagOpening;
00125 break;
00126 default:
00127 m_cdata += c;
00128 break;
00129 }
00130 break;
00131 case TagOpeningSlash:
00132 if( isWhitespace( c ) )
00133 break;
00134
00135 if( c == '>' )
00136 {
00137 addTag();
00138 if( !closeTag() )
00139 {
00140 cleanup();
00141 return false;
00142 }
00143
00144 m_state = Initial;
00145 }
00146 else
00147 {
00148 cleanup();
00149 return false;
00150 }
00151 break;
00152 case TagClosingSlash:
00153 if( isWhitespace( c ) )
00154 break;
00155
00156 switch( c )
00157 {
00158 case '>':
00159 case '<':
00160 case '/':
00161 cleanup();
00162 return false;
00163 break;
00164 default:
00165 m_tag += c;
00166 m_state = TagClosing;
00167 break;
00168 }
00169 break;
00170 case TagClosing:
00171 switch( c )
00172 {
00173 case '<':
00174 case '/':
00175 cleanup();
00176 return false;
00177 break;
00178 case '>':
00179 if( !closeTag() )
00180 {
00181 cleanup();
00182 return false;
00183 }
00184
00185 m_state = Initial;
00186 break;
00187 default:
00188 m_tag += c;
00189 break;
00190 }
00191 break;
00192 case TagNameComplete:
00193 if( isWhitespace( c ) )
00194 break;
00195
00196 switch( c )
00197 {
00198 case '<':
00199 cleanup();
00200 return false;
00201 break;
00202 case '/':
00203 m_state = TagOpeningSlash;
00204 break;
00205 case '>':
00206 if( m_preamble == 1 )
00207 {
00208 cleanup();
00209 return false;
00210 }
00211 m_state = TagInside;
00212 addTag();
00213 break;
00214 case '?':
00215 if( m_preamble == 1 )
00216 m_preamble = 2;
00217 else
00218 {
00219 cleanup();
00220 return false;
00221 }
00222 break;
00223 default:
00224 m_attrib += c;
00225 m_state = TagAttribute;
00226 break;
00227 }
00228 break;
00229 case TagAttribute:
00230 if( isWhitespace( c ) )
00231 {
00232 m_state = TagAttributeComplete;
00233 break;
00234 }
00235
00236 switch( c )
00237 {
00238 case '<':
00239 case '/':
00240 case '>':
00241 cleanup();
00242 return false;
00243 break;
00244 case '=':
00245 m_state = TagAttributeEqual;
00246 break;
00247 default:
00248 m_attrib += c;
00249 }
00250 break;
00251 case TagAttributeComplete:
00252 if( isWhitespace( c ) )
00253 break;
00254
00255 switch( c )
00256 {
00257 case '=':
00258 m_state = TagAttributeEqual;
00259 break;
00260 case '<':
00261 case '/':
00262 case '>':
00263 default:
00264 cleanup();
00265 return false;
00266 break;
00267 }
00268 break;
00269 case TagAttributeEqual:
00270 if( isWhitespace( c ) )
00271 break;
00272
00273 switch( c )
00274 {
00275 case '"':
00276 m_quote = true;
00277 case '\'':
00278 m_state = TagValue;
00279 break;
00280 case '=':
00281 case '<':
00282 case '>':
00283 default:
00284 cleanup();
00285 return false;
00286 break;
00287 }
00288 break;
00289 case TagValue:
00290 switch( c )
00291 {
00292 case '<':
00293 cleanup();
00294 return false;
00295 break;
00296 case '\'':
00297 if( m_quote )
00298 {
00299 m_value += c;
00300 break;
00301 }
00302 case '"':
00303 addAttribute();
00304 m_state = TagNameComplete;
00305 m_quote = false;
00306 break;
00307 case '>':
00308 default:
00309 m_value += c;
00310 }
00311 break;
00312 default:
00313
00314 break;
00315 }
00316
00317 }
00318
00319 return true;
00320 }
00321
00322 void Parser::addTag()
00323 {
00324 if( !m_root )
00325 {
00326
00327 m_root = new Tag( m_tag, "", true );
00328 m_current = m_root;
00329 }
00330 else
00331 {
00332
00333 m_current = new Tag( m_current, m_tag, "", true );
00334 }
00335
00336 if( m_attribs.size() )
00337 {
00338 m_current->setAttributes( m_attribs );
00339
00340 m_attribs.clear();
00341 }
00342
00343 if( m_tag == "stream:stream" )
00344 {
00345 streamEvent( m_root );
00346 cleanup();
00347 }
00348
00349
00350
00351 if( m_tag == "xml" && m_preamble == 2 )
00352 cleanup();
00353 }
00354
00355 void Parser::addAttribute()
00356 {
00357
00358 m_attribs.push_back( Tag::Attribute( Tag::relax( m_attrib ), Tag::relax( m_value ) ) );
00359 m_attrib = "";
00360 m_value = "";
00361
00362 }
00363
00364 void Parser::addCData()
00365 {
00366 if( m_current )
00367 {
00368 m_current->setCData( m_cdata );
00369
00370 m_cdata = "";
00371 }
00372 }
00373
00374 bool Parser::closeTag()
00375 {
00376
00377
00378 if( m_tag == "stream:stream" )
00379 return true;
00380
00381 if( !m_current || m_current->name() != m_tag )
00382 return false;
00383
00384
00385
00386
00387 if( m_current->parent() )
00388 m_current = m_current->parent();
00389 else
00390 {
00391
00392 streamEvent( m_root );
00393 cleanup();
00394 }
00395
00396 return true;
00397 }
00398
00399 void Parser::cleanup()
00400 {
00401 delete m_root;
00402 m_root = 0;
00403 m_current = 0;
00404 m_cdata = "";
00405 m_tag = "";
00406 m_attrib = "";
00407 m_value = "";
00408 m_attribs.clear();
00409 m_state = Initial;
00410 m_preamble = 0;
00411 }
00412
00413 bool Parser::isValid( unsigned char c )
00414 {
00415 return ( c != 0xc0 || c != 0xc1 || c < 0xf5 );
00416 }
00417
00418 bool Parser::isWhitespace( unsigned char c )
00419 {
00420 return ( c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20 );
00421 }
00422
00423 void Parser::streamEvent( Tag *tag )
00424 {
00425 if( m_tagHandler )
00426 m_tagHandler->handleTag( tag );
00427 }
00428
00429 }