Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
CharTokenizer.h
Go to the documentation of this file.
1 
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef CHARTOKENIZER_H
8 #define CHARTOKENIZER_H
9 
10 #include "Tokenizer.h"
11 
12 namespace Lucene {
13 
15 class LPPAPI CharTokenizer : public Tokenizer {
16 public:
17  CharTokenizer(const ReaderPtr& input);
18  CharTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input);
19  CharTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input);
20  virtual ~CharTokenizer();
21 
23 
24 protected:
25  int32_t offset;
26  int32_t bufferIndex;
27  int32_t dataLen;
28 
29  static const int32_t MAX_WORD_LEN;
30  static const int32_t IO_BUFFER_SIZE;
31 
32  CharArray ioBuffer;
35 
36 public:
37  virtual bool incrementToken();
38  virtual void end();
39  virtual void reset(const ReaderPtr& input);
40 
41 protected:
45  virtual bool isTokenChar(wchar_t c) = 0;
46 
49  virtual wchar_t normalize(wchar_t c);
50 };
51 
52 }
53 
54 #endif

clucene.sourceforge.net