Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
DocumentsWriter.h
Go to the documentation of this file.
1 
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
9 
10 #include "ByteBlockPool.h"
11 #include "RAMFile.h"
12 
13 namespace Lucene {
14 
54 class DocumentsWriter : public LuceneObject {
55 public:
57  virtual ~DocumentsWriter();
58 
60 
61 protected:
62  String docStoreSegment; // Current doc-store segment we are writing
63  int32_t docStoreOffset; // Current starting doc-store offset of current segment
64 
65  int32_t nextDocID; // Next docID to be added
66  int32_t numDocsInRAM; // # docs buffered in RAM
67 
69  static const int32_t MAX_THREAD_STATE;
71  MapThreadDocumentsWriterThreadState threadBindings;
72 
73  int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush)
74  bool aborting; // True if an abort is pending
75 
77 
80 
83 
86 
88  int64_t ramBufferSize;
91 
93  int64_t freeTrigger;
94  int64_t freeLevel;
95 
97  int32_t maxBufferedDocs;
98 
101 
102  bool closed;
103 
107 
110 
111 public:
113  static const int32_t OBJECT_HEADER_BYTES;
114  static const int32_t POINTER_NUM_BYTE;
115  static const int32_t INT_NUM_BYTE;
116  static const int32_t CHAR_NUM_BYTE;
117 
123  static const int32_t BYTES_PER_DEL_TERM;
124 
127  static const int32_t BYTES_PER_DEL_DOCID;
128 
132  static const int32_t BYTES_PER_DEL_QUERY;
133 
135  static const int32_t BYTE_BLOCK_SHIFT;
136  static const int32_t BYTE_BLOCK_SIZE;
137  static const int32_t BYTE_BLOCK_MASK;
138  static const int32_t BYTE_BLOCK_NOT_MASK;
139 
141  static const int32_t CHAR_BLOCK_SHIFT;
142  static const int32_t CHAR_BLOCK_SIZE;
143  static const int32_t CHAR_BLOCK_MASK;
144 
145  static const int32_t MAX_TERM_LENGTH;
146 
148  static const int32_t INT_BLOCK_SHIFT;
149  static const int32_t INT_BLOCK_SIZE;
150  static const int32_t INT_BLOCK_MASK;
151 
152  static const int32_t PER_DOC_BLOCK_SIZE;
153 
154 INTERNAL:
158  String segment; // Current segment we are working on
159 
160  int32_t numDocsInStore; // # docs written to doc stores
161 
162  bool flushPending; // True when a thread has decided to flush
163  bool bufferIsFull; // True when it's time to write segment
164 
166  int32_t maxFieldLength;
168 
170 
173 
176 
179 
180  int64_t numBytesAlloc;
181  int64_t numBytesUsed;
182 
183  // used only by assert
185 
186 public:
187  virtual void initialize();
188 
191 
193 
194  void updateFlushedDocCount(int32_t n);
195  int32_t getFlushedDocCount();
196  void setFlushedDocCount(int32_t n);
197 
199  bool hasProx();
200 
203 
204  void setMaxFieldLength(int32_t maxFieldLength);
206 
208  void setRAMBufferSizeMB(double mb);
209  double getRAMBufferSizeMB();
210 
212  void setMaxBufferedDocs(int32_t count);
213  int32_t getMaxBufferedDocs();
214 
216  String getSegment();
217 
219  int32_t getNumDocsInRAM();
220 
222  String getDocStoreSegment();
223 
225  int32_t getDocStoreOffset();
226 
229  String closeDocStore();
230 
232 
233  void message(const String& message);
234 
238 
239  void addOpenFile(const String& name);
240  void removeOpenFile(const String& name);
241 
242  void setAborting();
243 
246  void abort();
247 
249  bool pauseAllThreads();
250  void resumeAllThreads();
251 
252  bool anyChanges();
253 
254  void initFlushState(bool onlyDocStore);
255 
257  int32_t flush(bool _closeDocStore);
258 
260 
262  void createCompoundFile(const String& segment);
263 
266  bool setFlushPending();
267  void clearFlushPending();
268 
269  void pushDeletes();
270 
271  void close();
272 
273  void initSegmentName(bool onlyDocStore);
274 
279 
281  bool addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer);
282 
283  bool updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer);
284  bool updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm);
285 
286  int32_t getNumBufferedDeleteTerms(); // for testing
287  MapTermNum getBufferedDeleteTerms(); // for testing
288 
290  void remapDeletes(const SegmentInfosPtr& infos, Collection< Collection<int32_t> > docMaps, Collection<int32_t> delCounts, const OneMergePtr& merge, int32_t mergeDocCount);
291 
293  bool bufferDeleteTerm(const TermPtr& term);
295  bool bufferDeleteQuery(const QueryPtr& query);
296  bool deletesFull();
297  bool doApplyDeletes();
298 
300  int32_t getMaxBufferedDeleteTerms();
301 
302  bool hasDeletes();
303  bool applyDeletes(const SegmentInfosPtr& infos);
304  bool doBalanceRAM();
305 
306  void waitForWaitQueue();
307 
308  int64_t getRAMUsed();
309 
310  IntArray getIntBlock(bool trackAllocations);
311  void bytesAllocated(int64_t numBytes);
312  void bytesUsed(int64_t numBytes);
313  void recycleIntBlocks(Collection<IntArray> blocks, int32_t start, int32_t end);
314 
315  CharArray getCharBlock();
316  void recycleCharBlocks(Collection<CharArray> blocks, int32_t numBlocks);
317 
318  String toMB(int64_t v);
319 
328  void balanceRAM();
329 
330 protected:
332  void doAfterFlush();
333 
334  bool allThreadsIdle();
335 
336  void waitReady(const DocumentsWriterThreadStatePtr& state);
337 
338  bool timeToFlushDeletes();
339 
340  // used only by assert
341  bool checkDeleteTerm(const TermPtr& term);
342 
343  bool applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart);
344  void addDeleteTerm(const TermPtr& term, int32_t docCount);
345 
347  void addDeleteDocID(int32_t docID);
348  void addDeleteQuery(const QueryPtr& query, int32_t docID);
349 
351  void finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter);
352 
353  friend class WaitQueue;
354 };
355 
356 class DocState : public LuceneObject {
357 public:
358  DocState();
359  virtual ~DocState();
360 
362 
363 public:
366  int32_t maxFieldLength;
369  int32_t docID;
372 
373 public:
375  virtual bool testPoint(const String& name);
376 
377  void clear();
378 };
379 
381 class PerDocBuffer : public RAMFile {
382 public:
383  PerDocBuffer(const DocumentsWriterPtr& docWriter);
384  virtual ~PerDocBuffer();
385 
387 
388 protected:
390 
391 public:
393  void recycle();
394 
395 protected:
397  virtual ByteArray newBuffer(int32_t size);
398 };
399 
402 class DocWriter : public LuceneObject {
403 public:
404  DocWriter();
405  virtual ~DocWriter();
406 
408 
409 public:
411  int32_t docID;
412 
413 public:
414  virtual void finish() = 0;
415  virtual void abort() = 0;
416  virtual int64_t sizeInBytes() = 0;
417 
418  virtual void setNext(const DocWriterPtr& next);
419 };
420 
423 class IndexingChain : public LuceneObject {
424 public:
425  virtual ~IndexingChain();
426 
428 
429 public:
430  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter) = 0;
431 };
432 
448 public:
449  virtual ~DefaultIndexingChain();
450 
452 
453 public:
454  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter);
455 };
456 
457 class SkipDocWriter : public DocWriter {
458 public:
459  virtual ~SkipDocWriter();
460 
462 
463 public:
464  virtual void finish();
465  virtual void abort();
466  virtual int64_t sizeInBytes();
467 };
468 
469 class WaitQueue : public LuceneObject {
470 public:
471  WaitQueue(const DocumentsWriterPtr& docWriter);
472  virtual ~WaitQueue();
473 
475 
476 protected:
478 
479 public:
481  int32_t nextWriteDocID;
482  int32_t nextWriteLoc;
483  int32_t numWaiting;
484  int64_t waitingBytes;
485 
486 public:
487  void reset();
488  bool doResume();
489  bool doPause();
490  void abort();
491  bool add(const DocWriterPtr& doc);
492 
493 protected:
494  void writeDocument(const DocWriterPtr& doc);
495 };
496 
498 public:
499  ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize);
500  virtual ~ByteBlockAllocator();
501 
503 
504 protected:
506 
507 public:
508  int32_t blockSize;
510 
511 public:
513  virtual ByteArray getByteBlock(bool trackAllocations);
514 
516  virtual void recycleByteBlocks(Collection<ByteArray> blocks, int32_t start, int32_t end);
517  virtual void recycleByteBlocks(Collection<ByteArray> blocks);
518 };
519 
520 }
521 
522 #endif

clucene.sourceforge.net