kspread

formula.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 2003,2004 Ariya Hidayat <ariya@kde.org>
00003    Copyright (C) 2005 Tomas Mecir <mecirt@gmail.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "formula.h"
00022 
00023 #include "kspread_cell.h"
00024 #include "kspread_sheet.h"
00025 #include "kspread_doc.h"
00026 #include "kspread_util.h"
00027 #include "kspread_value.h"
00028 
00029 #include "valuecalc.h"
00030 #include "valueconverter.h"
00031 #include "valueparser.h"
00032 
00033 #include "functions.h"
00034 
00035 #include <limits.h>
00036 
00037 #include <qregexp.h>
00038 #include <qstring.h>
00039 #include <qvaluevector.h>
00040 #include <qvaluestack.h>
00041 
00042 #include <klocale.h>
00043 
00044 /*
00045   To understand how this formula engine works, please refer to the documentation
00046   in file DESIGN.html.
00047 
00048   Useful references:
00049    - "Principles of Compiler Design", A.V.Aho, J.D.Ullman, Addison Wesley, 1978
00050    - "Writing Interactive Compilers and Interpreters", P.J. Brown,
00051      John Wiley and Sons, 1979.
00052    - "The Theory and Practice of Compiler Writing", J.Tremblay, P.G.Sorenson,
00053      McGraw-Hill, 1985.
00054    - "The Java(TM) Virtual Machine Specification", T.Lindholm, F.Yellin,
00055      Addison-Wesley, 1997.
00056    - "Java Virtual Machine", J.Meyer, T.Downing, O'Reilly, 1997.
00057 
00058  */
00059 
00060 
00061  /*
00062 TODO - features:
00063  - handle Intersection
00064  - cell reference is made relative (absolute now)
00065  - shared formula (different owner, same data)
00066  - relative internal representation (independent of owner)
00067  - OASIS support
00068 TODO - optimizations:
00069  - handle initial formula marker = (and +)
00070  - reuse constant already in the pool
00071  - reuse references already in the pool
00072  - expression optimization (e.g. 1+2+A1 becomes 3+A1)
00073  */
00074 
00075 namespace KSpread
00076 {
00077 
00078 class Opcode
00079 {
00080 public:
00081 
00082   enum { Nop = 0, Load, Ref, Cell, Range, Function, Add, Sub, Neg, Mul, Div,
00083     Pow, Concat, Not, Equal, Less, Greater };
00084 
00085   unsigned type;
00086   unsigned index;
00087 
00088   Opcode(): type(Nop), index(0) {};
00089   Opcode( unsigned t ): type(t), index(0) {};
00090   Opcode( unsigned t, unsigned i ): type(t), index(i) {};
00091 };
00092 
00093 class Formula::Private
00094 {
00095 public:
00096   Formula *formula;
00097   Cell *cell;
00098   Sheet *sheet;
00099   bool dirty;
00100   bool valid;
00101   QString expression;
00102   QValueVector<Opcode> codes;
00103   QValueVector<Value> constants;
00104 };
00105 
00106 class TokenStack : public QValueVector<Token>
00107 {
00108 public:
00109   TokenStack();
00110   bool isEmpty() const;
00111   unsigned itemCount() const;
00112   void push( const Token& token );
00113   Token pop();
00114   const Token& top();
00115   const Token& top( unsigned index );
00116 private:
00117   void ensureSpace();
00118   unsigned topIndex;
00119 };
00120 
00121 }
00122 
00123 using namespace KSpread;
00124 
00125 // for null token
00126 const Token Token::null;
00127 
00128 // helper function: return operator of given token text
00129 // e.g. "*" yields Operator::Asterisk, and so on
00130 Token::Op KSpread::matchOperator( const QString& text )
00131 {
00132   Token::Op result = Token::InvalidOp;
00133 
00134   if( text.length() == 1 )
00135   {
00136     QChar p = text[0];
00137     switch( p.unicode() )
00138     {
00139         case '+': result = Token::Plus; break;
00140         case '-': result = Token::Minus; break;
00141         case '*': result = Token::Asterisk; break;
00142         case '/': result = Token::Slash; break;
00143         case '^': result = Token::Caret; break;
00144         case ',': result = Token::Comma; break;
00145         case ';': result = Token::Semicolon; break;
00146         case '(': result = Token::LeftPar; break;
00147         case ')': result = Token::RightPar; break;
00148         case '&': result = Token::Ampersand; break;
00149         case '=': result = Token::Equal; break;
00150         case '<': result = Token::Less; break;
00151         case '>': result = Token::Greater; break;
00152         case '%': result = Token::Percent; break;
00153         default : result = Token::InvalidOp; break;
00154     }
00155   }
00156 
00157   if( text.length() == 2 )
00158   {
00159     if( text == "<>" ) result = Token::NotEqual;
00160     if( text == "<=" ) result = Token::LessEqual;
00161     if( text == ">=" ) result = Token::GreaterEqual;
00162     if( text == "==" ) result = Token::Equal;
00163   }
00164 
00165   return result;
00166 }
00167 
00168 // helper function: give operator precedence
00169 // e.g. "+" is 1 while "*" is 3
00170 static int opPrecedence( Token::Op op )
00171 {
00172   int prec = -1;
00173   switch( op )
00174   {
00175     case Token::Percent      : prec = 8; break;
00176     case Token::Caret        : prec = 7; break;
00177     case Token::Asterisk     : prec = 5; break;
00178     case Token::Slash        : prec = 6; break;
00179     case Token::Plus         : prec = 3; break;
00180     case Token::Minus        : prec = 3; break;
00181     case Token::Ampersand    : prec = 2; break;
00182     case Token::Equal        : prec = 1; break;
00183     case Token::NotEqual     : prec = 1; break;
00184     case Token::Less         : prec = 1; break;
00185     case Token::Greater      : prec = 1; break;
00186     case Token::LessEqual    : prec = 1; break;
00187     case Token::GreaterEqual : prec = 1; break;
00188     case Token::Semicolon    : prec = 0; break;
00189     case Token::RightPar     : prec = 0; break;
00190     case Token::LeftPar      : prec = -1; break;
00191     default: prec = -1; break;
00192   }
00193   return prec;
00194 }
00195 
00196 // helper function
00197 static Value tokenAsValue( const Token& token )
00198 {
00199   Value value;
00200   if( token.isBoolean() ) value = Value( token.asBoolean() );
00201   else if( token.isInteger() ) value = Value( token.asInteger() );
00202   else if( token.isFloat() ) value = Value( token.asFloat() );
00203   else if( token.isString() ) value = Value( token.asString() );
00204   return value;
00205 }
00206 
00207 /**********************
00208     Token
00209  **********************/
00210 
00211 // creates a token
00212 Token::Token( Type type, const QString& text, int pos )
00213 {
00214   m_type = type;
00215   m_text = text;
00216   m_pos = pos;
00217 }
00218 
00219 // copy constructor
00220 Token::Token( const Token& token )
00221 {
00222   m_type = token.m_type;
00223   m_text = token.m_text;
00224   m_pos = token.m_pos;
00225 }
00226 
00227 // assignment operator
00228 Token& Token::operator=( const Token& token )
00229 {
00230   m_type = token.m_type;
00231   m_text = token.m_text;
00232   m_pos = token.m_pos;
00233   return *this;
00234 }
00235 
00236 bool Token::asBoolean() const
00237 {
00238   if( !isBoolean() ) return false;
00239   return m_text.lower() == "true";
00240   // FIXME check also for i18n version
00241 }
00242 
00243 int Token::asInteger() const
00244 {
00245   if( isInteger() ) return m_text.toInt();
00246   else return 0;
00247 }
00248 
00249 double Token::asFloat() const
00250 {
00251   if( isFloat() ) return m_text.toDouble();
00252   else return 0.0;
00253 }
00254 
00255 QString Token::asString() const
00256 {
00257   if( isString() ) return m_text.mid( 1, m_text.length()-2 );
00258   else return QString::null;
00259 }
00260 
00261 Token::Op Token::asOperator() const
00262 {
00263   if( isOperator() ) return matchOperator( m_text );
00264   else return InvalidOp;
00265 }
00266 
00267 QString Token::sheetName() const
00268 {
00269   if( !isCell() && !isRange() ) return QString::null;
00270   int i = m_text.find( '!' );
00271   if( i < 0 ) return QString();
00272   QString sheet = m_text.left( i );
00273   return sheet;
00274 }
00275 
00276 QString Token::description() const
00277 {
00278   QString desc;
00279 
00280   switch (m_type )
00281   {
00282     case  Boolean:    desc = "Boolean"; break;
00283     case  Integer:    desc = "Integer"; break;
00284     case  Float:      desc = "Float"; break;
00285     case  String:     desc = "String"; break;
00286     case  Identifier: desc = "Identifier"; break;
00287     case  Cell:       desc = "Cell"; break;
00288     case  Range:      desc = "Range"; break;
00289     case  Operator:   desc = "Operator"; break;
00290     default:          desc = "Unknown"; break;
00291   }
00292 
00293   while( desc.length() < 10 ) desc.prepend( ' ' );
00294   desc.prepend( "  " );
00295   desc.prepend( QString::number( m_pos ) );
00296   desc.append( " : " ).append( m_text );
00297 
00298   return desc;
00299 }
00300 
00301 
00302 /**********************
00303     TokenStack
00304  **********************/
00305 
00306 TokenStack::TokenStack(): QValueVector<Token>()
00307 {
00308   topIndex = 0;
00309   ensureSpace();
00310 }
00311 
00312 bool TokenStack::isEmpty() const
00313 {
00314   return topIndex == 0;
00315 }
00316 
00317 unsigned TokenStack::itemCount() const
00318 {
00319   return topIndex;
00320 }
00321 
00322 void TokenStack::push( const Token& token )
00323 {
00324   ensureSpace();
00325   at( topIndex++ ) = token;
00326 }
00327 
00328 Token TokenStack::pop()
00329 {
00330   return (topIndex > 0 ) ? Token( at( --topIndex ) ) : Token();
00331 }
00332 
00333 const Token& TokenStack::top()
00334 {
00335   return top( 0 );
00336 }
00337 
00338 const Token& TokenStack::top( unsigned index )
00339 {
00340   if( topIndex > index )
00341     return at( topIndex-index-1 );
00342   return Token::null;
00343 }
00344 
00345 void TokenStack::ensureSpace()
00346 {
00347   while( topIndex >= size() )
00348     resize( size() + 10 );
00349 }
00350 
00351 /**********************
00352     FormulaPrivate
00353  **********************/
00354 
00355 // helper function: return true for valid identifier character
00356 bool KSpread::isIdentifier( QChar ch )
00357 {
00358   return ( ch.unicode() == '_' ) || (ch.unicode() == '$' ) || ( ch.isLetter() );
00359 }
00360 
00361 
00362 
00363 
00364 /**********************
00365     Formula
00366  **********************/
00367 
00368 // Constructor
00369 
00370 Formula::Formula (Sheet *sheet, Cell *cell)
00371 {
00372   d = new Private;
00373   d->cell = cell;
00374   d->sheet = sheet;
00375   clear();
00376 }
00377 
00378 Formula::Formula()
00379 {
00380   d = new Private;
00381   d->cell = 0;
00382   d->sheet = 0;
00383   clear();
00384 }
00385 
00386 // Destructor
00387 
00388 Formula::~Formula()
00389 {
00390   delete d;
00391 }
00392 
00393 Cell* Formula::cell() const
00394 {
00395   return d->cell;
00396 }
00397 
00398 Sheet* Formula::sheet() const
00399 {
00400   return d->sheet;
00401 }
00402 
00403 // Sets a new expression for this formula.
00404 // note that both the real lex and parse processes will happen later on
00405 // when needed (i.e. "lazy parse"), for example during formula evaluation.
00406 
00407 void Formula::setExpression( const QString& expr )
00408 {
00409   d->expression = expr;
00410   d->dirty = true;
00411   d->valid = false;
00412 }
00413 
00414 // Returns the expression associated with this formula.
00415 
00416 QString Formula::expression() const
00417 {
00418   return d->expression;
00419 }
00420 
00421 // Returns the validity of the formula.
00422 // note: empty formula is always invalid.
00423 
00424 bool Formula::isValid() const
00425 {
00426   if( d->dirty )
00427   {
00428     KLocale* locale = d->cell ? d->cell->locale() : 0;
00429     if ((!locale) && d->sheet)
00430       locale = d->sheet->doc()->locale();
00431     Tokens tokens = scan( d->expression, locale );
00432     if( tokens.valid() )
00433       compile( tokens );
00434     else
00435       d->valid = false;
00436   }
00437   return d->valid;
00438 }
00439 
00440 // Clears everything, also mark the formula as invalid.
00441 
00442 void Formula::clear()
00443 {
00444   d->expression = QString::null;
00445   d->dirty = true;
00446   d->valid = false;
00447   d->constants.clear();
00448   d->codes.clear();
00449 }
00450 
00451 // Returns list of token for the expression.
00452 // this triggers again the lexical analysis step. it is however preferable
00453 // (even when there's small performance penalty) because otherwise we need to
00454 // store parsed tokens all the time which serves no good purpose.
00455 
00456 Tokens Formula::tokens() const
00457 {
00458   KLocale* locale = d->cell ? d->cell->locale() : 0;
00459   if ((!locale) && d->sheet)
00460     locale = d->sheet->doc()->locale();
00461   return scan( d->expression, locale );
00462 }
00463 
00464 Tokens Formula::scan( const QString& expr, KLocale* locale ) const
00465 {
00466   // to hold the result
00467   Tokens tokens;
00468 
00469   // parsing state
00470   enum { Start, Finish, Bad, InNumber, InDecimal, InExpIndicator, InExponent,
00471     InString, InIdentifier, InCell, InRange, InSheetOrAreaName } state;
00472 
00473   // use locale settings if specified
00474   QString thousand = locale ? locale->thousandsSeparator() : "";
00475   QString decimal = locale ? locale->decimalSymbol() : ".";
00476 
00477   // initialize variables
00478   state = Start;
00479   unsigned int i = 0;
00480   QString ex = expr;
00481   QString tokenText;
00482   int tokenStart = 0;
00483 
00484   // first character must be equal sign (=)
00485   if( ex[0] != '=' )
00486     return tokens;
00487 
00488   // but the scanner should not see this equal sign
00489   ex.remove( 0, 1 );
00490 
00491   // force a terminator
00492   ex.append( QChar() );
00493 
00494   // main loop
00495   while( (state != Bad) && (state != Finish) && (i < ex.length()) )
00496   {
00497     QChar ch = ex[i];
00498 
00499     switch( state )
00500     {
00501 
00502     case Start:
00503 
00504        tokenStart = i;
00505 
00506        // skip any whitespaces
00507        if( ch.isSpace() ) i++;
00508 
00509        // check for number
00510        else if( ch.isDigit() )
00511        {
00512          state = InNumber;
00513        }
00514 
00515        // a string ?
00516        else if ( ch == '"' )
00517        {
00518          tokenText.append( ex[i++] );
00519          state = InString;
00520        }
00521 
00522        // beginning with alphanumeric ?
00523        // could be identifier, cell, range, or function...
00524        else if( isIdentifier( ch ) )
00525        {
00526          state = InIdentifier;
00527        }
00528 
00529        // aposthrophe (') marks sheet name for 3-d cell, e.g 'Sales Q3'!A4, or a named range
00530        else if ( ch.unicode() == '\'' )
00531        {
00532          i++;
00533          state = InSheetOrAreaName;
00534        }
00535 
00536        // decimal dot ?
00537        else if ( ch == decimal )
00538        {
00539          tokenText.append( ex[i++] );
00540          state = InDecimal;
00541        }
00542 
00543        // terminator character
00544        else if ( ch == QChar::null )
00545           state = Finish;
00546 
00547        // look for operator match
00548        else
00549        {
00550          int op;
00551          QString s;
00552 
00553          // check for two-chars operator, such as '<=', '>=', etc
00554          s.append( ch ).append( ex[i+1] );
00555          op = matchOperator( s );
00556 
00557          // check for one-char operator, such as '+', ';', etc
00558          if( op == Token::InvalidOp )
00559          {
00560            s = QString( ch );
00561            op = matchOperator( s );
00562          }
00563 
00564          // any matched operator ?
00565          if( op != Token::InvalidOp )
00566          {
00567            int len = s.length();
00568            i += len;
00569            tokens.append( Token( Token::Operator, s.left( len ), tokenStart ) );
00570          }
00571          else state = Bad;
00572         }
00573        break;
00574 
00575     case InIdentifier:
00576 
00577        // consume as long as alpha, dollar sign, underscore, or digit
00578        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00579 
00580        // a '!' ? then this must be sheet name, e.g "Sheet4!"
00581        else if( ch == '!' )
00582        {
00583           tokenText.append( ex[i++] );
00584           state = InCell;
00585        }
00586 
00587        // a '(' ? then this must be a function identifier
00588        else if( ch == '(' )
00589        {
00590            tokens.append (Token (Token::Identifier, tokenText, tokenStart));
00591            tokenStart = i;
00592            tokenText = "";
00593            state = Start;
00594        }
00595 
00596        // we're done with identifier
00597        else
00598        {
00599          // check for cell reference,  e.g A1, VV123, ...
00600          QRegExp exp("(\\$?)([a-zA-Z]+)(\\$?)([0-9]+)$");
00601          int n = exp.search( tokenText );
00602          if( n >= 0 )
00603            state = InCell;
00604          else
00605          {
00606            if ( isNamedArea( tokenText ) )
00607              tokens.append (Token (Token::Range, tokenText, tokenStart));
00608            else
00609              tokens.append (Token (Token::Identifier, tokenText, tokenStart));
00610            tokenStart = i;
00611            tokenText = "";
00612            state = Start;
00613          }
00614        }
00615        break;
00616 
00617     case InCell:
00618 
00619        // consume as long as alpha, dollar sign, underscore, or digit
00620        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00621 
00622        // we're done with cell ref, possibly with sheet name (like "Sheet2!B2")
00623        // note that "Sheet2!TotalSales" is also possible, in which "TotalSales" is a named area
00624        else
00625        {
00626 
00627          // check if it's a cell ref like A32, not named area
00628          QString cell;
00629          for( int j = tokenText.length()-1; j>=0; j-- )
00630            if( tokenText[j] == '!' )
00631                break;
00632            else
00633                cell.prepend( tokenText[j] );
00634          QRegExp exp("(\\$?)([a-zA-Z]+)(\\$?)([0-9]+)$");
00635          if( exp.search( cell ) != 0 )
00636          {
00637 
00638            // we're done with named area
00639            // (Tomas) huh? this doesn't seem to check for named areas ...
00640            tokens.append( Token( Token::Range, tokenText, tokenStart ) );
00641            tokenText = "";
00642            state = Start;
00643          }
00644 
00645          else
00646          {
00647 
00648            // so up to now we've got something like A2 or Sheet2!F4
00649            // check for range reference
00650            if( ch == ':' )
00651            {
00652              tokenText.append( ex[i++] );
00653              state = InRange;
00654            }
00655            else
00656            {
00657              // we're done with cell reference
00658              tokens.append( Token( Token::Cell, tokenText, tokenStart ) );
00659              tokenText = "";
00660              state = Start;
00661            }
00662          }
00663        }
00664        break;
00665 
00666     case InRange:
00667 
00668        // consume as long as alpha, dollar sign, underscore, or digit
00669        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00670 
00671        // we're done with range reference
00672        else
00673        {
00674          tokens.append( Token( Token::Range, tokenText, tokenStart ) );
00675          tokenText = "";
00676          state = Start;
00677        }
00678        break;
00679 
00680     case InSheetOrAreaName:
00681 
00682        // consume until '
00683         if ( ch.unicode() != '\'' )
00684           tokenText.append( ex[i++] );
00685 
00686        else
00687        {
00688          // eat the aposthrophe itself
00689          ++i;
00690          // must be followed by '!' to be sheet name
00691          if( ex[i] == '!' )
00692          {
00693            tokenText.append( ex[i++] );
00694            state = InCell;
00695          }
00696          else
00697          {
00698            if ( isNamedArea( tokenText ) )
00699              tokens.append (Token (Token::Range, tokenText, tokenStart));
00700            else
00701              tokens.append (Token (Token::Identifier, tokenText, tokenStart));
00702            tokenStart = i;
00703            tokenText = "";
00704            state = Start;
00705          }
00706        }
00707        break;
00708 
00709     case InNumber:
00710 
00711        // consume as long as it's digit
00712        if( ch.isDigit() ) tokenText.append( ex[i++] );
00713 
00714        // skip thousand separator
00715        else if( !thousand.isEmpty() && ( ch ==thousand[0] ) ) i++;
00716 
00717        // convert decimal separator to '.', also support '.' directly
00718        // we always support '.' because of bug #98455
00719        else if(( !decimal.isEmpty() && ( ch == decimal[0] ) ) || (ch == '.'))
00720        {
00721          tokenText.append( '.' );
00722          i++;
00723          state = InDecimal;
00724        }
00725 
00726        // exponent ?
00727        else if( ch.upper() == 'E' )
00728        {
00729          tokenText.append( 'E' );
00730          i++;
00731          state = InExpIndicator;
00732        }
00733 
00734        // reference sheet delimiter?
00735        else if( ch == '!' )
00736        {
00737          tokenText.append( ex[i++] );
00738          state = InCell;
00739        }
00740 
00741        // identifier?
00742        else if( isIdentifier( ch ) )
00743        {
00744          // has to be a sheet or area name then
00745          state = InIdentifier;
00746        }
00747 
00748        // we're done with integer number
00749        else
00750        {
00751          tokens.append( Token( Token::Integer, tokenText, tokenStart ) );
00752          tokenText = "";
00753          state = Start;
00754        };
00755        break;
00756 
00757     case InDecimal:
00758 
00759        // consume as long as it's digit
00760        if( ch.isDigit() ) tokenText.append( ex[i++] );
00761 
00762        // exponent ?
00763        else if( ch.upper() == 'E' )
00764        {
00765          tokenText.append( 'E' );
00766          i++;
00767          state = InExpIndicator;
00768        }
00769 
00770        // we're done with floating-point number
00771        else
00772        {
00773          tokens.append( Token( Token::Float, tokenText, tokenStart ) );
00774          tokenText = "";
00775          state = Start;
00776        };
00777        break;
00778 
00779     case InExpIndicator:
00780 
00781        // possible + or - right after E, e.g 1.23E+12 or 4.67E-8
00782        if( ( ch == '+' ) || ( ch == '-' ) ) tokenText.append( ex[i++] );
00783 
00784        // consume as long as it's digit
00785        else if( ch.isDigit() ) state = InExponent;
00786 
00787        // invalid thing here
00788        else state = Bad;
00789 
00790        break;
00791 
00792     case InExponent:
00793 
00794        // consume as long as it's digit
00795        if( ch.isDigit() ) tokenText.append( ex[i++] );
00796 
00797        // we're done with floating-point number
00798        else
00799        {
00800          tokens.append( Token( Token::Float, tokenText, tokenStart ) );
00801          tokenText = "";
00802          state = Start;
00803        };
00804        break;
00805 
00806     case InString:
00807 
00808        // consume until "
00809        if( ch != '"' ) tokenText.append( ex[i++] );
00810 
00811        else
00812        {
00813          tokenText.append( ch ); i++;
00814          tokens.append( Token( Token::String, tokenText, tokenStart ) );
00815          tokenText = "";
00816          state = Start;
00817        }
00818        break;
00819 
00820     case Bad:
00821     default:
00822        break;
00823     };
00824   };
00825 
00826   if( state == Bad )
00827     tokens.setValid( false );
00828 
00829   return tokens;
00830 }
00831 
00832 // will affect: dirty, valid, codes, constants
00833 void Formula::compile( const Tokens& tokens ) const
00834 {
00835   // initialize variables
00836   d->dirty = false;
00837   d->valid = false;
00838   d->codes.clear();
00839   d->constants.clear();
00840 
00841   // sanity check
00842   if( tokens.count() == 0 ) return;
00843 
00844   TokenStack syntaxStack;
00845   QValueStack<int> argStack;
00846   unsigned argCount = 1;
00847 
00848   for( unsigned i = 0; i <= tokens.count(); i++ )
00849   {
00850     // helper token: InvalidOp is end-of-formula
00851     Token token =  ( i < tokens.count() ) ? tokens[i] : Token( Token::Operator );
00852     Token::Type tokenType = token.type();
00853 
00854     // unknown token is invalid
00855     if( tokenType == Token::Unknown ) break;
00856 
00857     // for constants, push immediately to stack
00858     // generate code to load from a constant
00859     if ( ( tokenType == Token::Integer ) || ( tokenType == Token::Float ) ||
00860     ( tokenType == Token::String ) || ( tokenType == Token::Boolean ) )
00861     {
00862       syntaxStack.push( token );
00863       d->constants.append( tokenAsValue( token ) );
00864       d->codes.append( Opcode( Opcode::Load, d->constants.count()-1 ) );
00865     }
00866 
00867     // for cell, range, or identifier, push immediately to stack
00868     // generate code to load from reference
00869     if( ( tokenType == Token::Cell ) || ( tokenType == Token::Range ) ||
00870     ( tokenType == Token::Identifier ) )
00871     {
00872       syntaxStack.push( token );
00873       d->constants.append( Value( token.text() ) );
00874       if (tokenType == Token::Cell)
00875         d->codes.append( Opcode( Opcode::Cell, d->constants.count()-1 ) );
00876       else if (tokenType == Token::Range)
00877         d->codes.append( Opcode( Opcode::Range, d->constants.count()-1 ) );
00878       else
00879         d->codes.append( Opcode( Opcode::Ref, d->constants.count()-1 ) );
00880     }
00881 
00882     // are we entering a function ?
00883     // if token is operator, and stack already has: id ( arg
00884     if( tokenType == Token::Operator )
00885     if( syntaxStack.itemCount() >= 3 )
00886     {
00887         Token arg = syntaxStack.top();
00888         Token par = syntaxStack.top( 1 );
00889         Token id = syntaxStack.top( 2 );
00890         if( !arg.isOperator() )
00891         if( par.asOperator() == Token::LeftPar )
00892         if( id.isIdentifier() )
00893         {
00894           argStack.push( argCount );
00895           argCount = 1;
00896         }
00897      }
00898 
00899      // special case for percentage
00900     if( tokenType == Token::Operator )
00901     if( token.asOperator() == Token::Percent )
00902     if( syntaxStack.itemCount() >= 1 )
00903     if( !syntaxStack.top().isOperator() )
00904     {
00905       d->constants.append( Value( 0.01 ) );
00906       d->codes.append( Opcode( Opcode::Load, d->constants.count()-1 ) );
00907       d->codes.append( Opcode( Opcode::Mul ) );
00908     }
00909 
00910     // for any other operator, try to apply all parsing rules
00911     if( tokenType == Token::Operator )
00912     if( token.asOperator() != Token::Percent )
00913     {
00914       // repeat until no more rule applies
00915       for( ; ; )
00916       {
00917         bool ruleFound = false;
00918 
00919         // rule for function arguments, if token is ; or )
00920         // id ( arg1 ; arg2 -> id ( arg
00921         if( !ruleFound )
00922         if( syntaxStack.itemCount() >= 5 )
00923         if( ( token.asOperator() == Token::RightPar ) ||
00924         ( token.asOperator() == Token::Semicolon ) )
00925         {
00926           Token arg2 = syntaxStack.top();
00927           Token sep = syntaxStack.top( 1 );
00928           Token arg1 = syntaxStack.top( 2 );
00929           Token par = syntaxStack.top( 3 );
00930           Token id = syntaxStack.top( 4 );
00931           if( !arg2.isOperator() )
00932           if( sep.asOperator() == Token::Semicolon )
00933           if( !arg1.isOperator() )
00934           if( par.asOperator() == Token::LeftPar )
00935           if( id.isIdentifier() )
00936           {
00937             ruleFound = true;
00938             syntaxStack.pop();
00939             syntaxStack.pop();
00940             argCount++;
00941           }
00942         }
00943 
00944         // rule for function last argument:
00945         //  id ( arg ) -> arg
00946         if( !ruleFound )
00947         if( syntaxStack.itemCount() >= 4 )
00948         {
00949           Token par2 = syntaxStack.top();
00950           Token arg = syntaxStack.top( 1 );
00951           Token par1 = syntaxStack.top( 2 );
00952           Token id = syntaxStack.top( 3 );
00953           if( par2.asOperator() == Token::RightPar )
00954           if( !arg.isOperator() )
00955           if( par1.asOperator() == Token::LeftPar )
00956           if( id.isIdentifier() )
00957           {
00958             ruleFound = true;
00959             syntaxStack.pop();
00960             syntaxStack.pop();
00961             syntaxStack.pop();
00962             syntaxStack.pop();
00963             syntaxStack.push( arg );
00964             d->codes.append( Opcode( Opcode::Function, argCount ) );
00965             argCount = argStack.empty() ? 0 : argStack.pop();
00966           }
00967         }
00968 
00969         // rule for function call with parentheses, but without argument
00970         // e.g. "2*PI()"
00971         if( !ruleFound )
00972         if( syntaxStack.itemCount() >= 3 )
00973         {
00974           Token par2 = syntaxStack.top();
00975           Token par1 = syntaxStack.top( 1 );
00976           Token id = syntaxStack.top( 2 );
00977           if( par2.asOperator() == Token::RightPar )
00978           if( par1.asOperator() == Token::LeftPar )
00979           if( id.isIdentifier() )
00980           {
00981             ruleFound = true;
00982             syntaxStack.pop();
00983             syntaxStack.pop();
00984             syntaxStack.pop();
00985             syntaxStack.push( Token( Token::Integer ) );
00986             d->codes.append( Opcode( Opcode::Function, 0 ) );
00987           }
00988         }
00989 
00990         // rule for parenthesis:  ( Y ) -> Y
00991         if( !ruleFound )
00992         if( syntaxStack.itemCount() >= 3 )
00993         {
00994           Token right = syntaxStack.top();
00995           Token y = syntaxStack.top( 1 );
00996           Token left = syntaxStack.top( 2 );
00997           if( right.isOperator() )
00998           if( !y.isOperator() )
00999           if( left.isOperator() )
01000           if( right.asOperator() == Token::RightPar )
01001           if( left.asOperator() == Token::LeftPar )
01002           {
01003             ruleFound = true;
01004             syntaxStack.pop();
01005             syntaxStack.pop();
01006             syntaxStack.pop();
01007             syntaxStack.push( y );
01008           }
01009         }
01010 
01011         // rule for binary operator:  A (op) B -> A
01012         // conditions: precedence of op >= precedence of token
01013         // action: push (op) to result
01014         // e.g. "A * B" becomes "A" if token is operator "+"
01015         // exception: for caret (power operator), if op is another caret
01016         // then the rule doesn't apply, e.g. "2^3^2" is evaluated as "2^(3^2)"
01017         if( !ruleFound )
01018         if( syntaxStack.itemCount() >= 3 )
01019         {
01020           Token b = syntaxStack.top();
01021           Token op = syntaxStack.top( 1 );
01022           Token a = syntaxStack.top( 2 );
01023           if( !a.isOperator() )
01024           if( !b.isOperator() )
01025           if( op.isOperator() )
01026           if( token.asOperator() != Token::LeftPar )
01027           if( token.asOperator() != Token::Caret )
01028           if( opPrecedence( op.asOperator() ) >= opPrecedence( token.asOperator() ) )
01029           {
01030             ruleFound = true;
01031             syntaxStack.pop();
01032             syntaxStack.pop();
01033             syntaxStack.pop();
01034             syntaxStack.push( b );
01035             switch( op.asOperator() )
01036             {
01037               // simple binary operations
01038               case Token::Plus:         d->codes.append( Opcode::Add ); break;
01039               case Token::Minus:        d->codes.append( Opcode::Sub ); break;
01040               case Token::Asterisk:     d->codes.append( Opcode::Mul ); break;
01041               case Token::Slash:        d->codes.append( Opcode::Div ); break;
01042               case Token::Caret:        d->codes.append( Opcode::Pow ); break;
01043               case Token::Ampersand:    d->codes.append( Opcode::Concat ); break;
01044 
01045               // simple value comparisons
01046               case Token::Equal:        d->codes.append( Opcode::Equal ); break;
01047               case Token::Less:         d->codes.append( Opcode::Less ); break;
01048               case Token::Greater:      d->codes.append( Opcode::Greater ); break;
01049 
01050               // NotEqual is Equal, followed by Not
01051               case Token::NotEqual:
01052                 d->codes.append( Opcode::Equal );
01053                 d->codes.append( Opcode::Not );
01054                 break;
01055 
01056               // LessOrEqual is Greater, followed by Not
01057               case Token::LessEqual:
01058                 d->codes.append( Opcode::Greater );
01059                 d->codes.append( Opcode::Not );
01060                 break;
01061 
01062               // GreaterOrEqual is Less, followed by Not
01063               case Token::GreaterEqual:
01064                 d->codes.append( Opcode::Less );
01065                 d->codes.append( Opcode::Not );
01066                 break;
01067               default: break;
01068             };
01069           }
01070          }
01071 
01072          // rule for unary operator:  (op1) (op2) X -> (op1) X
01073          // conditions: op2 is unary, token is not '('
01074          // action: push (op2) to result
01075          // e.g.  "* - 2" becomes "*"
01076          if( !ruleFound )
01077          if( token.asOperator() != Token::LeftPar )
01078          if( syntaxStack.itemCount() >= 3 )
01079          {
01080             Token x = syntaxStack.top();
01081             Token op2 = syntaxStack.top( 1 );
01082             Token op1 = syntaxStack.top( 2 );
01083             if( !x.isOperator() )
01084             if( op1.isOperator() )
01085             if( op2.isOperator() )
01086             if( ( op2.asOperator() == Token::Plus ) ||
01087                ( op2.asOperator() == Token::Minus ) )
01088             {
01089               ruleFound = true;
01090               syntaxStack.pop();
01091               syntaxStack.pop();
01092               syntaxStack.push( x );
01093               if( op2.asOperator() == Token::Minus )
01094                 d->codes.append( Opcode( Opcode::Neg ) );
01095             }
01096           }
01097 
01098          // auxilary rule for unary operator:  (op) X -> X
01099          // conditions: op is unary, op is first in syntax stack, token is not '('
01100          // action: push (op) to result
01101          if( !ruleFound )
01102          if( token.asOperator() != Token::LeftPar )
01103          if( syntaxStack.itemCount() == 2 )
01104          {
01105             Token x = syntaxStack.top();
01106             Token op = syntaxStack.top( 1 );
01107             if( !x.isOperator() )
01108             if( op.isOperator() )
01109             if( ( op.asOperator() == Token::Plus ) ||
01110                ( op.asOperator() == Token::Minus ) )
01111             {
01112               ruleFound = true;
01113               syntaxStack.pop();
01114               syntaxStack.pop();
01115               syntaxStack.push( x );
01116               if( op.asOperator() == Token::Minus )
01117                 d->codes.append( Opcode( Opcode::Neg ) );
01118             }
01119           }
01120 
01121         if( !ruleFound ) break;
01122       }
01123 
01124       // can't apply rules anymore, push the token
01125       if( token.asOperator() != Token::Percent )
01126         syntaxStack.push( token );
01127     }
01128   }
01129 
01130   // syntaxStack must left only one operand and end-of-formula (i.e. InvalidOp)
01131   d->valid = false;
01132   if( syntaxStack.itemCount() == 2 )
01133   if( syntaxStack.top().isOperator() )
01134   if( syntaxStack.top().asOperator() == Token::InvalidOp )
01135   if( !syntaxStack.top(1).isOperator() )
01136     d->valid = true;
01137 
01138   // bad parsing ? clean-up everything
01139   if( !d->valid )
01140   {
01141     d->constants.clear();
01142     d->codes.clear();
01143   }
01144 }
01145 
01146 bool Formula::isNamedArea( const QString& expr ) const
01147 {
01148     QString tokenText( expr );
01149     // check for named areas ...
01150     if (d->sheet) {
01151         const QValueList<Reference> areas = d->sheet->doc()->listArea();
01152         QValueList<Reference>::const_iterator it;
01153         for (it = areas.begin(); it != areas.end(); ++it) {
01154             if ((*it).ref_name.lower() == tokenText.lower()) {
01155                  // we got a named area
01156                 return true;
01157             }
01158         }
01159     }
01160     return false;
01161 }
01162 
01163 
01164 // Evaluates the formula, returns the result.
01165 
01166 struct stackEntry {
01167   void reset () { row1 = col1 = row2 = col2 = -1; };
01168   Value val;
01169   int row1, col1, row2, col2;
01170 };
01171 
01172 Value Formula::eval() const
01173 {
01174   QValueStack<stackEntry> stack;
01175   stackEntry entry;
01176   unsigned index;
01177   Value val1, val2;
01178   QString c;
01179   QValueVector<Value> args;
01180 
01181   Sheet *sheet = 0;
01182   ValueParser* parser = 0;
01183   ValueConverter* converter = 0;
01184   ValueCalc* calc = 0;
01185 
01186   if (d->sheet)
01187   {
01188     sheet = d->sheet;
01189     converter = sheet->doc()->converter();
01190     calc = sheet->doc()->calc();
01191   }
01192   else
01193   {
01194     parser = new ValueParser( KGlobal::locale() );
01195     converter = new ValueConverter( parser );
01196     calc = new ValueCalc( converter );
01197   }
01198 
01199   Function* function;
01200   FuncExtra fe;
01201   fe.mycol = fe.myrow = 0;
01202   if (d->cell) {
01203     fe.mycol = d->cell->column();
01204     fe.myrow = d->cell->row();
01205   }
01206 
01207   if( d->dirty )
01208   {
01209     Tokens tokens = scan( d->expression );
01210     d->valid = tokens.valid();
01211     if( tokens.valid() )
01212       compile( tokens );
01213   }
01214 
01215   if( !d->valid )
01216     return Value::errorVALUE();
01217 
01218   for( unsigned pc = 0; pc < d->codes.count(); pc++ )
01219   {
01220     Value ret;   // for the function caller
01221     Opcode& opcode = d->codes[pc];
01222     index = opcode.index;
01223     switch( opcode.type )
01224     {
01225       // no operation
01226       case Opcode::Nop:
01227         break;
01228 
01229       // load a constant, push to stack
01230       case Opcode::Load:
01231         entry.reset();
01232         entry.val = d->constants[index];
01233         stack.push (entry);
01234         break;
01235 
01236       // unary operation
01237       case Opcode::Neg:
01238         entry.reset();
01239         entry.val = stack.pop().val;
01240         if (!entry.val.isError()) // do nothing if we got an error
01241           entry.val = calc->mul (entry.val, -1);
01242         stack.push (entry);
01243         break;
01244 
01245       // binary operation: take two values from stack, do the operation,
01246       // push the result to stack
01247       case Opcode::Add:
01248         entry.reset();
01249         val2 = stack.pop().val;
01250         val1 = stack.pop().val;
01251         val2 = calc->add( val1, val2 );
01252         entry.reset();
01253         entry.val = val2;
01254         stack.push (entry);
01255         break;
01256 
01257       case Opcode::Sub:
01258         val2 = stack.pop().val;
01259         val1 = stack.pop().val;
01260         val2 = calc->sub( val1, val2 );
01261         entry.reset();
01262         entry.val = val2;
01263         stack.push (entry);
01264         break;
01265 
01266       case Opcode::Mul:
01267         val2 = stack.pop().val;
01268         val1 = stack.pop().val;
01269         val2 = calc->mul( val1, val2 );
01270         entry.reset();
01271         entry.val = val2;
01272         stack.push (entry);
01273         break;
01274 
01275       case Opcode::Div:
01276         val2 = stack.pop().val;
01277         val1 = stack.pop().val;
01278         val2 = calc->div( val1, val2 );
01279         entry.reset();
01280         entry.val = val2;
01281         stack.push (entry);
01282         break;
01283 
01284       case Opcode::Pow:
01285         val2 = stack.pop().val;
01286         val1 = stack.pop().val;
01287         val2 = calc->pow( val1, val2 );
01288         entry.reset();
01289         entry.val = val2;
01290         stack.push (entry);
01291         break;
01292 
01293       // string concatenation
01294       case Opcode::Concat:
01295         val1 = converter->asString (stack.pop().val);
01296     val2 = converter->asString (stack.pop().val);
01297         if (val1.isError() || val2.isError())
01298       val1 = Value::errorVALUE();
01299     else
01300           val1.setValue( val2.asString().append( val1.asString() ) );
01301         entry.reset();
01302         entry.val = val1;
01303         stack.push (entry);
01304         break;
01305 
01306       // logical not
01307       case Opcode::Not:
01308         val1 = converter->asBoolean (stack.pop().val);
01309         if( val1.isError() )
01310       val1 = Value::errorVALUE();
01311     else
01312       val1.setValue( !val1.asBoolean() );
01313         entry.reset();
01314         entry.val = val1;
01315         stack.push (entry);
01316         break;
01317 
01318       // comparison
01319       case Opcode::Equal:
01320         val1 = stack.pop().val;
01321         val2 = stack.pop().val;
01322         if( !val1.allowComparison( val2 ) )
01323           val1 = Value::errorNA();
01324     else if( val2.compare( val1 ) == 0 )
01325           val1 = Value (true);
01326         else
01327           val1 = Value (false);
01328         entry.reset();
01329         entry.val = val1;
01330         stack.push (entry);
01331         break;
01332 
01333       // less than
01334       case Opcode::Less:
01335         val1 = stack.pop().val;
01336         val2 = stack.pop().val;
01337         if( !val1.allowComparison( val2 ) )
01338           val1 = Value::errorNA();
01339     else if( val2.compare( val1 ) < 0 )
01340           val1 = Value (true);
01341         else
01342           val1 = Value (false);
01343         entry.reset();
01344         entry.val = val1;
01345         stack.push (entry);
01346         break;
01347 
01348       // greater than
01349       case Opcode::Greater:
01350         val1 = stack.pop().val;
01351         val2 = stack.pop().val;
01352         if( !val1.allowComparison( val2 ) )
01353           val1 = Value::errorNA();
01354     else if( val2.compare( val1 ) > 0 )
01355           val1 = Value (true);
01356         else
01357           val1 = Value (false);
01358         entry.reset();
01359         entry.val = val1;
01360         stack.push (entry);
01361         break;
01362 
01363 
01364       case Opcode::Cell:
01365         c = d->constants[index].asString();
01366         val1 = Value::empty();
01367         entry.reset();
01368         if (sheet)
01369         {
01370           Point cell (c, sheet->workbook(), sheet);
01371           if (cell.isValid())
01372           {
01373             val1 = cell.sheet()->value (cell.column(), cell.row());
01374             // store the reference, so we can use it within functions
01375             entry.col1 = entry.col2 = cell.column();
01376             entry.row1 = entry.row2 = cell.row();
01377           }
01378         }
01379         entry.val = val1;
01380         stack.push (entry);
01381         break;
01382 
01383       case Opcode::Range:
01384         c = d->constants[index].asString();
01385         val1 = Value::empty();
01386         entry.reset();
01387         if (sheet)
01388         {
01389           Range range (c, sheet->workbook(), sheet);
01390           if (range.isValid())
01391           {
01392             val1 = range.sheet()->valueRange (range.startCol(), range.startRow(),
01393                 range.endCol(), range.endRow());
01394             // store the reference, so we can use it within functions
01395             entry.col1 = range.startCol();
01396             entry.row1 = range.startRow();
01397             entry.col2 = range.endCol();
01398             entry.row2 = range.endRow();
01399           }
01400         }
01401         entry.val = val1;
01402         stack.push (entry);
01403         break;
01404 
01405       case Opcode::Ref:
01406         val1 = d->constants[index];
01407         entry.reset();
01408         entry.val = val1;
01409         stack.push (entry);
01410         break;
01411 
01412       // calling function
01413       case Opcode::Function:
01414         if( stack.count() < index )
01415           // (Tomas) umm, how could that be ? I mean, the index value
01416           //  is computed from the stack *confused*
01417           return Value::errorVALUE(); // not enough arguments
01418 
01419         args.clear();
01420         fe.ranges.clear ();
01421         fe.ranges.reserve (index);
01422         fe.sheet = sheet;
01423         for( ; index; index-- )
01424         {
01425           stackEntry e = stack.pop();
01426           args.insert (args.begin(), e.val);
01427           // TODO: create and fill a FunctionExtra object, if needed
01428           // problem: we don't know if we need it, as we don't have the
01429           // fuction name yet ...
01430           fe.ranges[index - 1].col1 = e.col1;
01431           fe.ranges[index - 1].row1 = e.row1;
01432           fe.ranges[index - 1].col2 = e.col2;
01433           fe.ranges[index - 1].row2 = e.row2;
01434         }
01435 
01436         // function name as string value
01437         val1 = converter->asString (stack.pop().val);
01438         if( val1.isError() )
01439           return Value::errorVALUE();
01440         function = FunctionRepository::self()->function ( val1.asString() );
01441         if( !function )
01442           return Value::errorVALUE(); // no such function
01443 
01444         ret = function->exec (args, calc, &fe);
01445         entry.reset();
01446         entry.val = ret;
01447         stack.push (entry);
01448 
01449         break;
01450 
01451       default:
01452         break;
01453     }
01454   }
01455 
01456   if (!d->sheet) {
01457     delete parser;
01458     delete converter;
01459     delete calc;
01460   }
01461 
01462   // more than one value in stack ? unsuccesful execution...
01463   if( stack.count() != 1 )
01464     return Value::errorVALUE();
01465 
01466   return stack.pop().val;
01467 
01468 }
01469 
01470 // Debugging aid
01471 
01472 QString Formula::dump() const
01473 {
01474   QString result;
01475 
01476   if( d->dirty )
01477   {
01478     Tokens tokens = scan( d->expression );
01479     compile( tokens );
01480   }
01481 
01482   result = QString("Expression: [%1]\n").arg( d->expression );
01483 #if 0
01484   Value value = eval();
01485   result.append( QString("Result: %1\n").arg(
01486       converter->asString(value).asString() ) );
01487 #endif
01488 
01489   result.append("  Constants:\n");
01490   for( unsigned c = 0; c < d->constants.count(); c++ )
01491   {
01492     QString vtext;
01493     Value val = d->constants[c];
01494     if( val.isString() ) vtext = QString("[%1]").arg( val.asString() );
01495     else if( val.isNumber() ) vtext = QString("%1").arg( val.asFloat() );
01496     else if( val.isBoolean() ) vtext = QString("%1").arg( val.asBoolean() ? "True":"False");
01497     else if( val.isError() ) vtext = "error";
01498     else vtext = "???";
01499     result += QString("    #%1 = %2\n").arg(c).arg( vtext );
01500   }
01501 
01502   result.append("\n");
01503   result.append("  Code:\n");
01504   for( unsigned i = 0; i < d->codes.count(); i++ )
01505   {
01506     QString ctext;
01507     switch( d->codes[i].type )
01508     {
01509       case Opcode::Load:     ctext = QString("Load #%1").arg( d->codes[i].index ); break;
01510       case Opcode::Ref:      ctext = QString("Ref #%1").arg( d->codes[i].index ); break;
01511       case Opcode::Function: ctext = QString("Function (%1)").arg( d->codes[i].index ); break;
01512       case Opcode::Add:      ctext = "Add"; break;
01513       case Opcode::Sub:      ctext = "Sub"; break;
01514       case Opcode::Mul:      ctext = "Mul"; break;
01515       case Opcode::Div:      ctext = "Div"; break;
01516       case Opcode::Neg:      ctext = "Neg"; break;
01517       case Opcode::Concat:   ctext = "Concat"; break;
01518       case Opcode::Pow:      ctext = "Pow"; break;
01519       case Opcode::Equal:    ctext = "Equal"; break;
01520       case Opcode::Not:      ctext = "Not"; break;
01521       case Opcode::Less:     ctext = "Less"; break;
01522       case Opcode::Greater:  ctext = "Greater"; break;
01523       default: ctext = "Unknown"; break;
01524     }
01525     result.append( "   " ).append( ctext ).append("\n");
01526   }
01527 
01528   return result;
01529 }
01530 
01531 QTextStream& operator<<( QTextStream& ts, Formula formula )
01532 {
01533   ts << formula.dump();
01534   return ts;
01535 }
KDE Home | KDE Accessibility Home | Description of Access Keys