Lucene++ - a full-featured, c++ search engine
API Documentation
Main Page
Related Pages
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Pages
include
StandardTokenizer.h
Go to the documentation of this file.
1
2
// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3
// Distributable under the terms of either the Apache License (Version 2.0)
4
// or the GNU Lesser General Public License.
6
7
#ifndef STANDARDTOKENIZER_H
8
#define STANDARDTOKENIZER_H
9
10
#include "
Tokenizer.h
"
11
12
namespace
Lucene {
13
34
class
LPPAPI
StandardTokenizer
:
public
Tokenizer
{
35
public
:
38
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
const
ReaderPtr
& input);
39
41
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
const
AttributeSourcePtr
& source,
const
ReaderPtr
& input);
42
44
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
const
AttributeFactoryPtr
& factory,
const
ReaderPtr
& input);
45
46
virtual
~
StandardTokenizer
();
47
48
LUCENE_CLASS
(
StandardTokenizer
);
49
50
protected
:
52
StandardTokenizerImplPtr
scanner;
53
54
bool
replaceInvalidAcronym
;
55
int32_t
maxTokenLength
;
56
57
// this tokenizer generates three attributes: offset, positionIncrement and type
58
TermAttributePtr
termAtt
;
59
OffsetAttributePtr
offsetAtt
;
60
PositionIncrementAttributePtr
posIncrAtt
;
61
TypeAttributePtr
typeAtt
;
62
63
public
:
64
static
const
int32_t
ALPHANUM
;
65
static
const
int32_t
APOSTROPHE
;
66
static
const
int32_t
ACRONYM
;
67
static
const
int32_t
COMPANY
;
68
static
const
int32_t
EMAIL
;
69
static
const
int32_t
HOST
;
70
static
const
int32_t
NUM
;
71
static
const
int32_t
CJ
;
72
74
static
const
int32_t
ACRONYM_DEP
;
75
77
static
const
Collection<String>
TOKEN_TYPES();
78
79
protected
:
80
void
init(
const
ReaderPtr
& input,
LuceneVersion::Version
matchVersion);
81
82
public
:
84
void
setMaxTokenLength(int32_t length);
85
87
int32_t getMaxTokenLength();
88
90
virtual
bool
incrementToken();
91
92
virtual
void
end();
93
94
virtual
void
reset(
const
ReaderPtr
& input);
95
98
bool
isReplaceInvalidAcronym();
99
102
void
setReplaceInvalidAcronym(
bool
replaceInvalidAcronym);
103
};
104
105
}
106
107
#endif
clucene.sourceforge.net