00001 /* 00002 * Merge different vocabularies together and create the tag and facet indexes 00003 * 00004 * Copyright (C) 2003-2007 Enrico Zini <enrico@debian.org> 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 */ 00020 00021 #include <tagcoll/diskindex/mmap.h> 00022 #include <tagcoll/input/base.h> 00023 #include <string> 00024 #include <map> 00025 #include <set> 00026 00027 #ifndef EPT_DEBTAGS_VOCABULARYMERGER_H 00028 #define EPT_DEBTAGS_VOCABULARYMERGER_H 00029 00030 namespace ept { 00031 namespace debtags { 00032 00033 class VocabularyMerger 00034 { 00035 protected: 00036 class FacetIndexer : public tagcoll::diskindex::MMapIndexer 00037 { 00038 protected: 00039 VocabularyMerger& vm; 00040 public: 00041 FacetIndexer(VocabularyMerger& vm) : vm(vm) {} 00042 virtual ~FacetIndexer() {} 00043 virtual int encodedSize() const; 00044 virtual void encode(char* buf) const; 00045 }; 00046 class TagIndexer : public tagcoll::diskindex::MMapIndexer 00047 { 00048 protected: 00049 VocabularyMerger& vm; 00050 public: 00051 TagIndexer(VocabularyMerger& vm) : vm(vm) {} 00052 virtual ~TagIndexer() {} 00053 virtual int encodedSize() const; 00054 virtual void encode(char* buf) const; 00055 }; 00056 class TagData : public std::map<std::string, std::string> 00057 { 00058 public: 00059 std::string name; 00060 // Offset in the last written file (used for indexing) 00061 long ofs; 00062 int len; 00063 int id; 00064 00065 TagData() : ofs(0), len(0) {} 00066 }; 00067 class FacetData : public std::map<std::string, std::string> 00068 { 00069 public: 00070 std::string name; 00071 std::map<std::string, TagData> tags; 00072 // Offset in the last written file (used for indexing) 00073 long ofs; 00074 int len; 00075 int id; 00076 00077 FacetData() : ofs(0), len(0) {} 00078 00079 TagData& obtainTag(const std::string& fullname); 00080 }; 00081 std::map<std::string, FacetData> facets; 00082 int tagCount; 00083 FacetIndexer findexer; 00084 TagIndexer tindexer; 00085 00086 FacetData& obtainFacet(const std::string& name); 00087 TagData& obtainTag(const std::string& fullname); 00088 00089 public: 00090 VocabularyMerger() : tagCount(0), findexer(*this), tindexer(*this) {} 00091 00095 bool empty() const { return facets.empty(); } 00096 00101 void read(tagcoll::input::Input& input); 00102 00106 void write(const std::string& fname); 00107 00111 void write(FILE* out); 00112 00119 const tagcoll::diskindex::MMapIndexer& facetIndexer() const { return findexer; } 00120 00127 const tagcoll::diskindex::MMapIndexer& tagIndexer() const { return tindexer; } 00128 00132 bool hasFacet(const std::string& name) const 00133 { 00134 return facets.find(name) != facets.end(); 00135 } 00136 00140 bool hasTag(const std::string& fullname) const; 00141 00145 int tagID(const std::string& fullname) const; 00146 00150 std::set<std::string> tagNames() const; 00151 }; 00152 00153 } 00154 } 00155 00156 // vim:set ts=4 sw=4: 00157 #endif