KDECore
ksycocadict.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "ksycocadict.h"
00020 #include <kservice.h>
00021 #include "ksycocaentry.h"
00022 #include "ksycoca.h"
00023 #include "kdebug.h"
00024
00025 #include <QtCore/QBitArray>
00026
00027 namespace
00028 {
00029 struct string_entry {
00030 string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00031 : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00032 {}
00033 uint hash;
00034 const int length;
00035 const QString keyStr;
00036 const QChar * const key;
00037 const KSycocaEntry::Ptr payload;
00038 };
00039 }
00040
00041 class KSycocaDictStringList : public QList<string_entry*>
00042 {
00043 public:
00044 ~KSycocaDictStringList() {
00045 qDeleteAll(*this);
00046 }
00047 };
00048
00049 class KSycocaDict::Private
00050 {
00051 public:
00052 Private()
00053 : stringlist( 0 ),
00054 stream( 0 ),
00055 offset( 0 )
00056 {
00057 }
00058
00059 ~Private()
00060 {
00061 delete stringlist;
00062 }
00063
00064
00065 qint32 offsetForKey(const QString& key) const;
00066
00067
00068 quint32 hashKey(const QString & key) const;
00069
00070 KSycocaDictStringList *stringlist;
00071 QDataStream *stream;
00072 qint32 offset;
00073 quint32 hashTableSize;
00074 QList<qint32> hashList;
00075 };
00076
00077 KSycocaDict::KSycocaDict()
00078 : d( new Private )
00079 {
00080 }
00081
00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00083 : d( new Private )
00084 {
00085 d->stream = str;
00086 d->offset = offset;
00087
00088 quint32 test1, test2;
00089 str->device()->seek(offset);
00090 (*str) >> test1 >> test2;
00091 if ((test1 > 0x000fffff) || (test2 > 1024))
00092 {
00093 KSycoca::flagError();
00094 d->hashTableSize = 0;
00095 d->offset = 0;
00096 return;
00097 }
00098
00099 str->device()->seek(offset);
00100 (*str) >> d->hashTableSize;
00101 (*str) >> d->hashList;
00102 d->offset = str->device()->pos();
00103 }
00104
00105 KSycocaDict::~KSycocaDict()
00106 {
00107 delete d;
00108 }
00109
00110 void
00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00112 {
00113 if (key.isEmpty()) return;
00114 if (!payload) return;
00115 if (!d->stringlist)
00116 {
00117 d->stringlist = new KSycocaDictStringList;
00118 }
00119
00120 string_entry *entry = new string_entry(key, payload);
00121 d->stringlist->append(entry);
00122 }
00123
00124 void
00125 KSycocaDict::remove(const QString &key)
00126 {
00127 if (!d || !d->stringlist) {
00128 return;
00129 }
00130
00131 bool found = false;
00132 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00133 string_entry* entry = *it;
00134 if (entry->keyStr == key) {
00135 d->stringlist->erase(it);
00136 delete entry;
00137 found = true;
00138 break;
00139 }
00140 }
00141 if (!found) {
00142 kWarning(7011) << "key not found:" << key;
00143 }
00144 }
00145
00146 int KSycocaDict::find_string(const QString &key ) const
00147 {
00148 Q_ASSERT(d);
00149
00150
00151 qint32 offset = d->offsetForKey(key);
00152
00153
00154 if (offset == 0)
00155 return 0;
00156
00157 if (offset > 0)
00158 return offset;
00159
00160
00161 offset = -offset;
00162
00163 d->stream->device()->seek(offset);
00164
00165
00166 while(true)
00167 {
00168 (*d->stream) >> offset;
00169 if (offset == 0) break;
00170 QString dupkey;
00171 (*d->stream) >> dupkey;
00172
00173 if (dupkey == key) return offset;
00174 }
00175
00176
00177 return 0;
00178 }
00179
00180
00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00182 {
00183 qint32 offset = d->offsetForKey(key);
00184 QList<int> offsetList;
00185 if (offset == 0)
00186 return offsetList;
00187
00188 if (offset > 0) {
00189 offsetList.append(offset);
00190 return offsetList;
00191 }
00192
00193
00194 offset = -offset;
00195
00196 d->stream->device()->seek(offset);
00197
00198
00199 while(true)
00200 {
00201 (*d->stream) >> offset;
00202 if (offset == 0) break;
00203 QString dupkey;
00204 (*d->stream) >> dupkey;
00205
00206 if (dupkey == key)
00207 offsetList.append(offset);
00208 }
00209 return offsetList;
00210 }
00211
00212 uint KSycocaDict::count() const
00213 {
00214 if ( !d || !d->stringlist ) return 0;
00215
00216 return d->stringlist->count();
00217 }
00218
00219 void
00220 KSycocaDict::clear()
00221 {
00222 delete d;
00223 d = 0;
00224 }
00225
00226 uint KSycocaDict::Private::hashKey( const QString &key) const
00227 {
00228 int l = key.length();
00229 register uint h = 0;
00230
00231 for(int i = 0; i < hashList.count(); i++)
00232 {
00233 int pos = hashList[i];
00234 if (pos < 0)
00235 {
00236 pos = -pos-1;
00237 if (pos < l)
00238 h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00239 }
00240 else
00241 {
00242 pos = pos-1;
00243 if (pos < l)
00244 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00245 }
00246 }
00247 return h;
00248 }
00249
00250
00251
00252 static int
00253 calcDiversity(KSycocaDictStringList *stringlist, int pos, int sz)
00254 {
00255 if (pos == 0) return 0;
00256 QBitArray matrix(sz);
00257 uint usz = sz;
00258
00259 if (pos < 0)
00260 {
00261 pos = -pos-1;
00262 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00263 {
00264 string_entry* entry = *it;
00265 register int l = entry->length;
00266 if (pos < l && pos != 0)
00267 {
00268 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00269 matrix.setBit( hash % usz, true );
00270 }
00271 }
00272 }
00273 else
00274 {
00275 pos = pos-1;
00276 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00277 {
00278 string_entry* entry = *it;
00279 if (pos < entry->length)
00280 {
00281 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00282 matrix.setBit( hash % usz, true );
00283 }
00284 }
00285 }
00286 int diversity = 0;
00287 for(int i=0;i< sz;i++)
00288 if (matrix.testBit(i)) diversity++;
00289
00290 return diversity;
00291 }
00292
00293
00294
00295 static void
00296 addDiversity(KSycocaDictStringList *stringlist, int pos)
00297 {
00298 if (pos == 0) return;
00299 if (pos < 0)
00300 {
00301 pos = -pos-1;
00302 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00303 {
00304 string_entry* entry = *it;
00305 register int l = entry->length;
00306 if (pos < l)
00307 entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00308 }
00309 }
00310 else
00311 {
00312 pos = pos - 1;
00313 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00314 {
00315 string_entry* entry = *it;
00316 if (pos < entry->length)
00317 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00318 }
00319 }
00320 }
00321
00322
00323 void
00324 KSycocaDict::save(QDataStream &str)
00325 {
00326 if (count() == 0)
00327 {
00328 d->hashTableSize = 0;
00329 d->hashList.clear();
00330 str << d->hashTableSize;
00331 str << d->hashList;
00332 return;
00333 }
00334
00335 d->offset = str.device()->pos();
00336
00337
00338
00339
00340
00341 int maxLength = 0;
00342
00343 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00344 {
00345 string_entry* entry = *it;
00346 entry->hash = 0;
00347 if (entry->length > maxLength)
00348 maxLength = entry->length;
00349 }
00350
00351
00352
00353
00354
00355
00356 register unsigned int sz = count()*4 + 1;
00357 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00358 sz+=2;
00359
00360 int maxDiv = 0;
00361 int maxPos = 0;
00362 int lastDiv = 0;
00363
00364 d->hashList.clear();
00365
00366
00367
00368 int *oldvec=new int[maxLength*2+1];
00369 for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
00370 int mindiv=0;
00371
00372 while(true)
00373 {
00374 int divsum=0,divnum=0;
00375
00376 maxDiv = 0;
00377 maxPos = 0;
00378 for(int pos=-maxLength; pos <= maxLength; pos++)
00379 {
00380
00381 if (oldvec[pos+maxLength]<mindiv)
00382 { oldvec[pos+maxLength]=0; continue; }
00383
00384 int diversity = calcDiversity(d->stringlist, pos, sz);
00385 if (diversity > maxDiv)
00386 {
00387 maxDiv = diversity;
00388 maxPos = pos;
00389 }
00390 oldvec[pos+maxLength]=diversity;
00391 divsum+=diversity; divnum++;
00392 }
00393
00394 if (divnum)
00395 mindiv=(3*divsum)/(4*divnum);
00396
00397 if (maxDiv <= lastDiv)
00398 break;
00399
00400 lastDiv = maxDiv;
00401 addDiversity(d->stringlist, maxPos);
00402 d->hashList.append(maxPos);
00403 }
00404
00405 delete [] oldvec;
00406
00407 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00408 (*it)->hash = d->hashKey((*it)->keyStr);
00409
00410
00411 d->hashTableSize = sz;
00412
00413 struct hashtable_entry {
00414 string_entry *entry;
00415 QList<string_entry*>* duplicates;
00416 int duplicate_offset;
00417 };
00418
00419 hashtable_entry *hashTable = new hashtable_entry[ sz ];
00420
00421
00422 for (unsigned int i=0; i < sz; i++)
00423 {
00424 hashTable[i].entry = 0;
00425 hashTable[i].duplicates = 0;
00426 }
00427
00428
00429 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00430 {
00431 string_entry* entry = *it;
00432
00433 int hash = entry->hash % sz;
00434 if (!hashTable[hash].entry)
00435 {
00436 hashTable[hash].entry = entry;
00437 }
00438 else
00439 {
00440 if (!hashTable[hash].duplicates)
00441 {
00442 hashTable[hash].duplicates = new QList<string_entry*>;
00443 hashTable[hash].duplicates->append(hashTable[hash].entry);
00444 hashTable[hash].duplicate_offset = 0;
00445 }
00446 hashTable[hash].duplicates->append(entry);
00447 }
00448 }
00449
00450 str << d->hashTableSize;
00451 str << d->hashList;
00452
00453 d->offset = str.device()->pos();
00454
00455
00456
00457
00458
00459 for(int pass = 1; pass <= 2; pass++)
00460 {
00461 str.device()->seek(d->offset);
00462
00463 for(uint i=0; i < d->hashTableSize; i++)
00464 {
00465 qint32 tmpid;
00466 if (!hashTable[i].entry)
00467 tmpid = (qint32) 0;
00468 else if (!hashTable[i].duplicates)
00469 tmpid = (qint32) hashTable[i].entry->payload->offset();
00470 else
00471 tmpid = (qint32) -hashTable[i].duplicate_offset;
00472 str << tmpid;
00473
00474 }
00475
00476
00477
00478 for(uint i=0; i < d->hashTableSize; i++)
00479 {
00480 const QList<string_entry*> *dups = hashTable[i].duplicates;
00481 if (dups)
00482 {
00483 hashTable[i].duplicate_offset = str.device()->pos();
00484
00485
00486
00487 for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00488 {
00489 const qint32 offset = (*dup)->payload->offset();
00490 if (!offset) {
00491 const QString storageId = (*dup)->payload->storageId();
00492 kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
00493 if ((*dup)->payload->isType(KST_KService)) {
00494 KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
00495 kDebug() << service->storageId() << service->entryPath();
00496 }
00497
00498 Q_ASSERT_X( offset, "KSycocaDict::save",
00499 QByteArray("entry offset is 0, save() was not called on ")
00500 + (*dup)->payload->storageId().toLatin1()
00501 + " entryPath="
00502 + (*dup)->payload->entryPath().toLatin1()
00503 );
00504 }
00505 str << offset ;
00506 str << (*dup)->keyStr;
00507 }
00508 str << (qint32) 0;
00509 }
00510 }
00511
00512 }
00513
00514
00515 for(uint i=0; i < d->hashTableSize; i++)
00516 {
00517 delete hashTable[i].duplicates;
00518 }
00519 delete [] hashTable;
00520 }
00521
00522 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00523 {
00524 if ( !stream || !offset )
00525 {
00526 kError() << "No ksycoca4 database available!" << endl;
00527 return 0;
00528 }
00529
00530 if (hashTableSize == 0)
00531 return 0;
00532
00533
00534 const uint hash = hashKey(key) % hashTableSize;
00535
00536
00537 const uint off = offset+sizeof(qint32)*hash;
00538
00539 stream->device()->seek( off );
00540
00541 qint32 retOffset;
00542 (*stream) >> retOffset;
00543 return retOffset;
00544 }