filters

rtfimport_tokenizer.cpp

00001 /*
00002    This file is part of the KDE project
00003    Copyright (C) 2001 Ewald Snel <ewald@rambo.its.tudelft.nl>
00004    Copyright (C) 2001 Tomasz Grobelny <grotk@poczta.onet.pl>
00005    Copyright (C) 2005 Tommi Rantala <tommi.rantala@cs.helsinki.fi>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Library General Public
00009    License as published by the Free Software Foundation; either
00010    version 2 of the License, or (at your option) any later version.
00011 */
00012 
00013 #include <kdebug.h>
00014 
00015 #include "rtfimport_tokenizer.h"
00016 
00017 
00018 RTFTokenizer::RTFTokenizer()
00019 {
00020     tokenText.resize( 4113 );
00021     fileBuffer.resize( 4096 );
00022     infile = 0L;
00023 }
00024 
00029 void RTFTokenizer::open( QFile *in )
00030 {
00031     fileBufferPtr = 0L;
00032     fileBufferEnd = 0L;
00033     infile = in;
00034     type = RTFTokenizer::PlainText;
00035 }
00036 
00037 int RTFTokenizer::nextChar()
00038 {
00039     if ( fileBufferPtr == fileBufferEnd ) {
00040         int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
00041         fileBufferPtr = ( uchar* ) fileBuffer.data();
00042         fileBufferEnd = fileBufferPtr;
00043 
00044         if ( n <= 0 )
00045             return -1;
00046 
00047         fileBufferEnd = fileBufferPtr + n;
00048     }
00049     return *fileBufferPtr++;
00050 }
00051 
00052 
00056 void RTFTokenizer::next()
00057 {
00058     int ch;
00059     value=0;
00060     if (!infile)
00061     return;
00062 
00063     do {
00064         int n = nextChar();
00065 
00066         if ( n <= 0 ) {
00067             ch = '}';
00068             break;
00069         }
00070 
00071         ch = n;
00072     }
00073     while (ch == '\n' || ch == '\r' && ch != 0);
00074 
00075     // Skip one byte for prepend '@' to destinations
00076     text = (tokenText.data() + 1);
00077     hasParam = false;
00078 
00079     uchar *_text = (uchar *)text;
00080 
00081 
00082     if (ch == '{')
00083     type = RTFTokenizer::OpenGroup;
00084     else if (ch == '}')
00085     type = RTFTokenizer::CloseGroup;
00086     else if (ch == '\\')
00087     {
00088     type = RTFTokenizer::ControlWord;
00089 
00090         int n = nextChar();
00091 
00092         if ( n <= 0 ) {
00093             // Return CloseGroup on EOF
00094             type = RTFTokenizer::CloseGroup;
00095             return;
00096         }
00097     ch = n;
00098 
00099     // Type is either control word or control symbol
00100     if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
00101     {
00102         int v = 0;
00103 
00104         // Read alphabetic string (command)
00105         while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 && 
00106                   ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) )
00107         {
00108         *_text++ = ch;
00109 
00110                 int n = nextChar();
00111                 if ( n <= 0 ) {
00112                     ch = ' ';
00113                     break;
00114                 }
00115                 ch = n;
00116         }
00117 
00118         // Read numeric parameter (param)
00119         bool isneg = (ch == '-');
00120 
00121         if (isneg) {
00122                 int n = nextChar();
00123                 if ( n <= 0 ) {
00124                     type = RTFTokenizer::CloseGroup;
00125                     return;
00126                 }
00127         ch = n;
00128         }
00129 
00130         while (ch >= '0' && ch <= '9') {
00131         v    = (10 * v) + ch - '0';
00132         hasParam = true;
00133 
00134                 int n = nextChar();
00135 
00136                 if ( n <= 0 )
00137                     n = ' ';
00138                 ch = n;
00139             }
00140         value = isneg ? -v : v;
00141 
00142         // If delimiter is a space, it's part of the control word
00143         if (ch != ' ')
00144         {
00145         --fileBufferPtr;
00146         }
00147 
00148             *_text = 0; // Just put an end of string for the test, it can then be over-written again
00149             if ( !memcmp( tokenText.data()+1, "bin", 4 ) )
00150             {   // We have \bin, so we need to read the bytes
00151                 kdDebug(30515) << "Token:" << tokenText << endl;
00152                 if (value > 0)
00153                 {
00154                     kdDebug(30515) << "\\bin" << value << endl;
00155                     type = RTFTokenizer::BinaryData;
00156                     binaryData.resize(value);
00157                     for (int i=0; i<value; i++)
00158                     {
00159                         int n = nextChar();
00160                         if ( n <= 0 ) {
00161                             type = RTFTokenizer::CloseGroup;
00162                             break;
00163                         }
00164 
00165                         binaryData[i] = n;
00166                     }
00167                 }
00168             }
00169 
00170     }
00171     else if (ch=='\'')
00172     {
00173         // Got hex value, for example \'2d
00174 
00175         type = RTFTokenizer::ControlWord;
00176         *_text++ = ch;
00177 
00178         for(int i=0;i<2;i++)
00179         {
00180         int n = nextChar();
00181 
00182         if ( n <= 0 ) {
00183             if ( i == 0 ) {
00184                 type = RTFTokenizer::CloseGroup;
00185                 return;
00186             } else {
00187                         ch = ' ';
00188             break;
00189             }
00190         }
00191 
00192         ch = n;
00193 
00194         hasParam = true;
00195         value<<=4;
00196         value=value|((ch + ((ch & 16) ? 0 : 9)) & 0xf);
00197         }
00198         }
00199     else
00200     {
00201         type = RTFTokenizer::ControlWord;
00202         *_text++ = ch;
00203     }
00204     }
00205     else
00206     {
00207     type = RTFTokenizer::PlainText;
00208 
00209     // Everything until next backslash, opener or closer
00210     while ( ch != '\\' && ch != '{' && ch != '}' && ch != '\n' &&
00211         ch != '\r')
00212     {
00213         *_text++ = ch;
00214             if(fileBufferPtr >= fileBufferEnd)
00215                 break;
00216         ch = *fileBufferPtr++;
00217     }
00218         if(fileBufferPtr < fileBufferEnd)
00219           --fileBufferPtr; // give back the last char
00220     }
00221     *_text++ = 0;
00222 
00223 }
KDE Home | KDE Accessibility Home | Description of Access Keys