13 #include <sys/types.h>
21 extern "C" void traceback(const char *fmt,...)
23 FILE *fp = fopen("/tmp/x","a");
25 va_list ap; va_start(ap, fmt);
26 vfprintf(fp, fmt, ap);
28 int nptrs; void *buffer[100];
29 nptrs = backtrace(buffer, lengthof(buffer));
30 struct timeval now; gettimeofday(&now,0);
31 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
32 fprintf(fp,"*** stack %d\n",nptrs);
33 char **strings = backtrace_symbols(buffer, nptrs);
34 if( !strings ) return;
35 for( int i=0; i<nptrs; ++i ) fprintf(fp,"%s\n", strings[i]);
42 // use repeated string byte move for decompress operation.
43 // page table design should be changed to hierachical bit
44 // field, so that only the trace of page table blocks in
45 // the update path are written at commit time. Writing
46 // the entire page table on commit can be expensive.
48 // local memory allocator
49 void *Db::get_mem8_t(int id)
54 void *Db::new_mem8_t(int size, int &id)
56 return new uint8_t[size];
59 int Db::del_mem8_t(const void *vp, int id)
61 delete [] (uint8_t*)vp;
65 volatile int gdb_wait;
74 // shared memory allocator
78 void *vp = shmat(id, NULL, 0);
79 if( vp == (void*)-1 ) { perror("shmat"); wait_gdb(); vp = 0; }
84 new_shm8_t(int size, int &id)
86 id = shmget(IPC_PRIVATE, size, IPC_CREAT+0666);
87 if( id < 0 ) { perror("shmget"); return 0; }
88 uint8_t *addr = (uint8_t *)get_shm8_t(id);
89 if( addr ) shmctl(id, IPC_RMID, 0);
94 del_shm8_t(const void *vp, int id)
99 if( !shmctl(id, IPC_STAT, &ds) ) ret = ds.shm_nattch;
100 else { perror("shmctl"); wait_gdb(); ret = errIoStat; }
109 get_uint8_t(int id, int pg)
111 uint8_t *bp = (uint8_t *) get_mem(id);
116 new_uint8_t(int size, int &id, int pg)
118 void *vp = new_mem(size, id);
119 return (uint8_t *)vp;
123 del_uint8_t(const void *vp, int id, int pg)
125 return del_mem(vp, id);
134 if( err_callback ) err_callback(this, v);
138 //int Db::debug = DBBUG_ERR + DBBUG_FAIL;
139 int Db::debug = DBBUG_ERR;
143 void dmp(unsigned char *bp, int len)
145 unsigned char ch[16], lch[16];
146 int llen = lengthof(lch)+1;
148 unsigned char *adr = 0;
153 for( n=0; n<lengthof(ch) && --len>=0; ch[n++]=*bp++ );
154 if( (i=n) >= llen ) // check for a duplicate
155 while( --i>=0 && ch[i]==lch[i] );
156 if( i >= 0 || len < 0 ) { /* not a duplicate */
157 if( dups > 0 ) fprintf(stderr," ...%d\n",dups);
159 fprintf(stderr,"%p:",adr);
160 for( i=0; i<n; ++i ) fprintf(stderr," %02x",lch[i]=ch[i]);
161 for( i=n; i<lengthof(ch); ++i ) fprintf(stderr," ");
164 fprintf(stderr,"%c", ch[i]>=' ' && ch[i]<='~' ? ch[i] : '.');
165 fprintf(stderr,"\n");
178 "NoCmprFn", "NotFound", "Duplicate", "NoPage", "NoMemory",
179 "IoRead", "IoWrite", "IoSeek", "IoStat", "BadMagic", "Corrupt",
180 "Invalid", "Previous", "ObjEntity", "Limit", "User",
184 dmsg(int msk, const char *msg,...)
186 if( !(msk & debug) ) return;
187 #ifdef DEBUG_TIMESTAMPS
188 struct timeval now; gettimeofday(&now,0);
189 printf("@%ld.03%ld: ",now.tv_sec,now.tv_usec/1000);
195 FILE *fp = fopen("/tmp/x","a"); if( !fp ) return;
196 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
197 va_start(ap, msg); vfprintf(fp, msg, ap); va_end(ap);
202 _err_(int v,const char *fn,int ln)
205 dmsg(DBBUG_ERR,"%s:%d errored %d (%s)\n",fn,ln,v,errMsgs[-v]);
211 _fail_(int v,const char *fn,int ln)
213 dmsg(DBBUG_FAIL,"%s:%d failed %d (%s)\n",fn,ln,v,errMsgs[-v]);
218 Error(int v,const char *msg)
221 dmsg(DBBUG_ERR,"%s\n",msg);
228 printf("freeStoreIndex\n"); fdmp();
229 printf("addrStoreIndex\n"); admp();
230 printf("freeSpaceIndex\n"); edmp();
231 printf("addrSpaceIndex\n"); bdmp();
238 printf("dmp root_info->file_size %016lx\n",
239 root_info->file_size);
240 printf(" rootInfo root_info->transaction_id %d\n",
241 root_info->transaction_id);
242 printf(" root_info->root_info_addr/size %016lx/%08x\n",
243 root_info->root_info_addr,root_info->root_info_size);
244 printf(" root_info->last_info_addr/size %016lx/%08x\n",
245 root_info->last_info_addr,root_info->last_info_size);
246 printf(" root_info->indeciesUsed %d\n",
247 root_info->indeciesUsed);
248 printf(" alloc_cache: "); alloc_cache.dmp();
249 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
250 IndexBase *ib = indecies[idx];
252 printf(" idx %d. %-24s %s pop %5ld"
253 " root %-5d rhs %-5d ky/Dt %2d/%-2d ",
254 idx, &ib->st->name[0], ib->st->type==idxBin ? "bin" :
255 ib->st->type==idxStr ? "str" : "???", ib->st->count,
256 ib->st->rootPageId, ib->st->rightHandSide,
257 ib->st->keySz, ib->st->dataSz);
258 printf(" free %d/",ib->st->freeBlocks);
260 for( pageId id=ib->st->freeBlocks; id>=0; ++n ) {
261 pgRef loc; loc.id = id; loc.offset = 0;
262 keyBlock *kbb; addrRead_(loc,kbb);
263 if( (id=kbb->right_link()) < 0 ) break;
266 printf("%d pages\n",n);
269 printf(" Entities, count %ld\n", entityIdIndex->Count());
270 Entity e(this); EntityLoc &ent = e.ent; int eid;
271 if( !entityIdIndex->First(&eid,&ent.obj) ) do {
272 int nidxs = ent->nidxs;
273 printf(" id %d. %s maxId %d, recdSz %d, count %d, nidxs %d:",
274 eid, &ent->name[0], ent->maxId, ent->recdSz, ent->count, nidxs);
275 printf(" alloc_cache: "); ent->alloc_cache.dmp();
276 for( int i=0; i<nidxs; ++i ) {
277 int idx = ent->indexs[i];
278 printf(" %d(%s),", idx, idx < 0 ? "" :
279 !indecies[idx] ? "(err)" : &indecies[idx]->st->name[0]);
282 } while( !entityIdIndex->Next(&eid,&ent.obj) );
288 printf(" root_info->pageTableUsed %d\n",root_info->pageTableUsed);
289 for( int pid=0; pid<root_info->pageTableUsed; ++pid ) {
290 Page &pg = *get_page(pid);
291 if( !pg.addr && !pg->io_allocated && pg->io_addr==NIL &&
292 pg->trans_id < 0 && pg->shmid < 0 && !pg->flags &&
293 !pg->wr_allocated && pg->wr_io_addr==NIL ) continue;
294 printf(" pid %d. used %d, type %d, link %d, flags %04x addr %p\n",
295 pid, pg->used, pg->type, pg->link, pg->flags, pg.addr);
296 printf(" allocated %d io_alloc/io_addr %d/%016lx, wr_alloc/io_addr %d/%016lx\n",
297 pg->allocated, pg->io_allocated, pg->io_addr,
298 pg->wr_allocated, pg->wr_io_addr);
300 printf(" root_info->freePages %d",root_info->freePages);
302 for( pageId id=root_info->freePages; id>=0; ++n ) id = (*get_page(id))->link;
303 // printf(" %d\n",(id=(*get_page(id))->link));
304 printf(", pages = %d\n",n);
310 freeStoreRecord free;
311 if( !freeStoreIndex->First(&free,0) ) do {
312 printf("free=%04lx/%012lx\n", free.size,free.io_addr);
313 } while( !freeStoreIndex->Next(&free,0) );
319 addrStoreRecord addr;
320 if( !addrStoreIndex->First(&addr,0) ) do {
321 printf("addr=%012lx/%04lx\n", addr.io_addr,addr.size);
322 } while( !addrStoreIndex->Next(&addr,0) );
328 if( !indecies ) return; addrStoreRecord addr;
329 addrStoreRecord last; last.io_addr = 0; last.size = 0;
330 if( !addrStoreIndex->First(&addr,0) ) do {
331 if( last.io_addr > addr.io_addr ||
332 (last.io_addr == addr.io_addr && last.size >= addr.size ) ) {
333 printf("last=%012lx/%04lx, addr=%012lx/%04lx\n",
334 addr.io_addr, addr.size, last.io_addr, last.size);
338 } while( !addrStoreIndex->Next(&addr,0) );
344 if( !indecies ) return; freeStoreRecord free;
345 freeStoreRecord last; last.size = 0; last.io_addr = 0;
346 if( !freeStoreIndex->First(&free,0) ) do {
347 if( last.size > free.size ||
348 (last.size == free.size && last.io_addr >= free.io_addr ) ) {
349 printf("last=%04lx/%012lx, free=%04lx/%012lx\n",
350 free.size, free.io_addr, last.size, last.io_addr);
354 } while( !freeStoreIndex->Next(&free,0) );
360 freeSpaceRecord free;
361 if( !freeSpaceIndex->First(&free,0) ) do {
362 printf("free=%04lx %d/%d\n", free.size,free.id,free.offset);
363 } while( !freeSpaceIndex->Next(&free,0) );
369 addrSpaceRecord addr;
370 if( !addrSpaceIndex->First(&addr,0) ) do {
371 printf("addr=%d/%d %04lx\n", addr.id,addr.offset,addr.size);
372 } while( !addrSpaceIndex->Next(&addr,0) );
378 long store_allocated=0, store_used=0;
379 long loaded_allocated=0, loaded_used=0;
380 long written_allocated=0, written_used=0;
381 int pages_written=0, pages_loaded=0, pages_deleted=0;
382 for( int id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
383 Page &pg = *get_page(id);
384 store_allocated += pg->allocated;
385 store_used += pg->used;
388 loaded_allocated += pg->allocated;
389 loaded_used += pg->used;
391 if( pg->chk_flags(fl_wr) ) {
393 written_allocated += pg->allocated;
394 written_used += pg->used;
395 if( !pg.addr ) ++pages_deleted;
398 #define percent(a,b) (!(a) || !(b) ? 0.0 : (100.0*(a)) / (b))
399 long read_allocated = loaded_allocated - written_allocated;
400 long read_used = loaded_used - written_used;
401 int pages_read = pages_loaded - pages_written;
402 printf("\ncommit %d\n", root_info->transaction_id);
403 printf(" pages %8d/del %-4d alloc:%-12ld used:%-12ld %7.3f%%\n",
404 root_info->pageTableUsed, pages_deleted, store_allocated, store_used,
405 percent(store_used,store_allocated));
406 printf(" loaded %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
407 pages_loaded, percent(pages_loaded, root_info->pageTableUsed),
408 loaded_allocated, loaded_used, percent(loaded_used, loaded_allocated));
409 printf(" read %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
410 pages_read, percent(pages_read, root_info->pageTableUsed),
411 read_allocated, read_used, percent(read_used, read_allocated));
412 printf(" write %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
413 pages_written, percent(pages_written, root_info->pageTableUsed),
414 written_allocated, written_used, percent(written_used, written_allocated));
416 long store_avail=0, space_avail=0;
417 freeStoreRecord store;
418 if( !freeStoreIndex->First(&store,0) ) do {
419 store_avail += store.size;
420 } while( !freeStoreIndex->Next(&store,0) );
421 freeSpaceRecord space;
422 if( !freeSpaceIndex->First(&space,0) ) do {
423 space_avail += space.size;
424 } while( !freeSpaceIndex->Next(&space,0) );
425 printf(" file %-12ld", root_info->file_size);
426 printf(" store %12ld/%-7.3f%% space %12ld/%-7.3f%%\n",
427 store_avail, percent(store_avail, root_info->file_size),
428 space_avail, percent(space_avail, root_info->file_size));
441 return syscall(SYS_sched_yield);
447 return syscall(SYS_gettid);
454 while( (ret=zfutex(FUTEX_WAKE, nwakeups)) < 0 );
459 zwait(int val, timespec *ts)
461 return zfutex(FUTEX_WAIT, val, ts);
467 fprintf(stderr,"unlocking and not locked\n");
474 if( v || zxchg(1,loc) >= 0 ) do {
476 } while ( zxchg(1,loc) >= 0 );
481 zunlock(int nwakeups)
490 zdecr(loc); lk.lock(); lk.unlock(); zincr(loc);
496 if( lk.loc >= 0 ) zwake(1);
502 lk.lock(); timespec ts = { 1, 0 };
503 int v; while( (v=loc) >= 0 ) zwait(v, &ts);
516 if( !db_info ) return -1;
517 if( is_locked() > 0 && db_info->owner == my_pid ) return 0;
519 //traceback(" attach_wr %d/%d/%d\n",my_pid,db_info->owner,db_info->last_owner);
520 db_info->last_owner = db_info->owner;
521 db_info->owner = my_pid;
529 //traceback(" attach_rd %d/%d\n",my_pid,db_info->owner);
536 if( !db_info ) return;
544 //traceback(" detach_rw %d\n",my_pid);
547 // persistent pageTable element initial constructor
548 void Db::PageStorage::
564 // non-persistent pageTable element initial constructor
582 // deletes storage next start_transaction
586 st->used = 0; st->set_flags(fl_wr);
590 // locked pageTable access
594 read_locked by(db_info->pgTblLk);
595 return get_Page(pid);
599 * Db::alloc_pageTable -- alloc page table
602 * sz int input page table size
606 alloc_pageTable(int sz)
608 write_blocked by(db_info->pgTblLk);
609 int n = pageTableHunks(sz) * pageTableHunkSize;
610 Page **pt = new Page*[n];
611 if( !pt ) Err(errNoMemory);
612 int info_id, info_sz = n*sizeof(PageStorage);
613 PageStorage *new_page_info = (PageStorage *)new_uint8_t(info_sz, info_id);
614 if( !new_page_info ) { delete pt; Err(errNoMemory); }
617 for( ; i<root_info->pageTableUsed; ++i ) {
619 new_page_info[i] = *pt[i]->st;
620 pt[i]->st = &new_page_info[i];
623 for( ; i<n; ++i ) pt[i] = 0;
624 db_info->page_info_id = page_info_id = info_id;
625 del_uint8_t(page_info); page_info = new_page_info;
626 delete [] pageTable; pageTable = pt;
627 pageTableAllocated = n;
632 * Db::new_page -- access/construct new page
635 * returns page id on success (>=zero), error code otherwise(< 0)
641 locked by(db_info->pgAlLk);
642 pageId id = root_info->freePages;
644 if( root_info->pageTableUsed >= pageTableAllocated ) {
645 int sz = root_info->pageTableUsed + root_info->pageTableUsed/2 + 1;
646 if_err( alloc_pageTable(sz) );
648 id = root_info->pageTableUsed;
649 Page * pp = new Page(*new(&page_info[id]) PageStorage());
650 if( !pp ) Err(errNoMemory);
652 page_table_sz = ++root_info->pageTableUsed;
655 Page &pg = *get_page(id);
656 root_info->freePages = pg->link;
657 new(&pg) Page(*new(&page_info[id]) PageStorage());
665 Page *pp = get_Page(id);
672 * Db::free_page -- link to root_info->freePages
675 * pid pageId input page id
681 Page &pg = *get_Page(pid);
685 pg->clr_flags(fl_wr | fl_rd);
686 pg->set_flags(fl_free);
687 pageId id = root_info->freePages;
689 Page *lpp = 0; // use sorted order
690 while( id >= 0 && id < pid ) {
698 root_info->freePages = pid;
703 lower_page(pageId mid)
705 locked by(db_info->pgAlLk);
706 pageId id = root_info->freePages;
708 Page *pp = 0, *lpp = 0;
710 if( id < lid ) { lid = id; lpp = pp; }
715 Page &pg = *get_Page(lid);
717 (*lpp)->link = pg->link;
719 root_info->freePages = pg->link;
728 get_index(const char *nm, CmprFn cmpr)
730 int idx = root_info->indeciesUsed;
731 while( --idx >= 0 ) {
732 IndexBase *ib = indecies[idx];
734 if( !strncmp(&ib->st->name[0],nm,nmSz) ) break;
736 if( idx >= 0 && cmpr && indecies[idx]->st->type == idxBin ) {
737 IndexBinary *bidx = (IndexBinary *)indecies[idx];
738 bidx->compare = cmpr;
739 bidx->bst->cmprId = findCmprFn(cmpr);
747 if( r < 0 || r >= root_info->indeciesUsed ) Fail(errInvalid);
748 if( !indecies[r] ) Fail(errNotFound);
749 return indecies[r]->Count();
753 alloc_indecies(int n)
755 IndexBase **it = new IndexBase*[n];
756 if( !it ) Err(errNoMemory);
758 IndexInfo *info = (IndexInfo *)new_uint8_t(n*sizeof(*info), info_id);
759 if( !info ) { delete it; Err(errNoMemory); }
760 memcpy(info, index_info, indeciesAllocated*sizeof(*info));
762 for( ; i<root_info->indeciesUsed; ++i ) {
763 if( !(it[i] = indecies[i]) ) continue;
764 switch( it[i]->st->type ) {
766 IndexBinary *bidx = (IndexBinary *)it[i];
767 bidx->st = info[i].bin;
768 bidx->bst = info[i].bin;
771 IndexString *sidx = (IndexString *)it[i];
772 sidx->st = info[i].str;
773 sidx->sst = info[i].str;
780 for( ; i<n; ++i ) it[i] = 0;
781 db_info->index_info_id = index_info_id = info_id;
782 del_uint8_t(index_info); index_info = info;
783 delete [] indecies; indecies = it;
784 indeciesAllocated = n;
792 while( idx < root_info->indeciesUsed && indecies[idx] )
794 if( idx >= indeciesAllocated ) {
795 int n = indeciesAllocated + indexTableHunkSize;
796 if( alloc_indecies(n) ) Err(errNoMemory);
798 if( idx >= root_info->indeciesUsed ) {
799 if( idx >= max_index_type ) Err(errLimit);
800 root_info->indeciesUsed = idx+1;
801 indecies_sz = root_info->indeciesUsed;
810 delete indecies[idx];
812 for( idx=root_info->indeciesUsed; idx>0 && indecies[idx-1]==0; --idx );
813 indecies_sz = root_info->indeciesUsed = idx;
817 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexBinaryInfo *bi)
819 IndexBaseStorage *st = new(b) IndexBaseStorage();
820 IndexBinaryStorage *bst = new(bi) IndexBinaryStorage();
821 IndexBinary *bidx = new IndexBinary(this, st, bst);
822 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
823 if( bidx ) delete bidx;
831 new_binary_index(const char *nm, int ksz, int dsz, CmprFn cmpr)
833 if( get_index(nm) >= 0 ) Err(errDuplicate);
834 int idx = new_index();
835 if( idx < 0 ) Err(errNoMemory);
836 IndexBinary *bidx = new IndexBinary(this, idx, ksz, dsz, cmpr);
837 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
841 if( nm ) strncpy(&bidx->st->name[0],nm,sizeof(bidx->st->name));
842 indecies[idx] = bidx;
847 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexStringInfo *si)
849 IndexBaseStorage *st = new(b) IndexBaseStorage();
850 IndexStringStorage *sst = new(si) IndexStringStorage();
851 IndexString *sidx = new IndexString(this, st, sst);
852 if( !sidx ) Err(errNoMemory);
858 new_string_index(const char *nm, int dsz)
860 if( get_index(nm) >= 0 ) Err(errDuplicate);
861 int idx = new_index();
862 if( idx < 0 ) Err(errNoMemory);
863 IndexString *sidx = new IndexString(this, idx, dsz);
868 if( nm ) strncpy(&sidx->st->name[0],nm,sizeof(sidx->st->name));
869 indecies[idx] = sidx;
874 * Db::IndexBinary::keyMap - map function on index pages
877 * s pageId input current id
879 * returns 0 on success,
880 * errNotFound if index is empty
881 * error code otherwise
884 int Db::IndexBinary::
885 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
888 keyBlock *sbb; Page *spp; char *sn;
889 if_err( db->indexRead(s,0,sbb,spp,sn) );
890 if( (r=sbb->right_link()) >= 0 ) {
891 int lkdSz = kdSz + sizeof(pageId);
892 int n = spp->iused() / lkdSz;
893 for( int i=0; i<n; ++i ) {
894 pageId l = readPageId(sn);
895 if_ret( keyMap(l,fn) );
898 if_ret( keyMap(r,fn) );
900 if_err( (this->*fn)(s) );
905 * Db::IndexBinary::setLastKey -- conditionally update lastAccess
908 * s pageId input page of last insert
909 * u pageId input rightLink of page
910 * k int input insert offset in block
913 void Db::IndexBinary::
914 setLastKey(pageId s, pageId u, int k)
916 if( keyInterior < 0 ) {
924 lastAccess.offset = sizeof(keyBlock) + k;
929 * Db::IndexBinary::keyLocate -- generalized database key retreival
932 * s pageId input current id
933 * cmpr CmprFn input key compare function
935 * returns 0 on success
936 * errNotFound if not found,
937 * error code otherwise
940 int Db::IndexBinary::
941 keyLocate(pgRef &last, pageId s, int op,void *ky,CmprFn cmpr)
943 int ret = errNotFound;
944 keyBlock *sbb; Page *spp; char *sn;
945 if_err( db->indexRead(s,0,sbb,spp,sn) );
946 int len = spp->iused();
948 if( sbb->right_link() >= 0 )
949 lkdSz += sizeof(pageId);
953 // binary search block for key
954 while( (r - l) > 1 ) {
957 if( sbb->right_link() >= 0 )
960 int n = cmpr((char*)ky,kn);
962 if( op >= keyLE && op <= keyGE ) {
964 last.offset = sizeof(keyBlock) + k;
967 if( op == keyLE || op == keyGT ) n = 1;
969 if( n > 0 ) l = i; else r = i;
973 int k = op < keyEQ ? l*lkdSz : (r < len ? r : -1);
974 if( op != keyEQ && k >= 0 ) {
975 if( sbb->right_link() >= 0 )
978 last.offset = sizeof(keyBlock) + k;
982 if( (s = sbb->right_link()) >= 0 ) {
983 if( r < len ) s = readPageId(sn+r);
985 ret = keyLocate(last,s,op,ky,cmpr);
986 if( k == 0 ) ret = 0;
993 * Db::IndexBinary::Locate -- interface to generalized database key retreival
996 * op int input key relation in keyLT..keyGT
997 * key void * input retreival key image
998 * cmpr CmprFn input retreival key image
999 * rtnKey void * output resulting key value
1000 * rtnData void * output resulting recd value
1002 * returns 0 on success
1003 * errNotFound on not found,
1004 * error code otherwise
1007 int Db::IndexBinary::
1008 refLocate(pgRef &loc, int op, void *key, CmprFn cmpr)
1010 if( st->rootPageId == NIL )
1012 if( op == keyEQ ) op = keyLE;
1013 if( !cmpr ) cmpr = compare;
1014 if_fail( keyLocate(loc,st->rootPageId,op, key,cmpr) );
1020 int Db::IndexBinary::
1021 Locate(int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
1024 if_fail( refLocate(last, op, key, cmpr) );
1026 if_err( db->addrRead_(last,kp) );
1028 memmove(rtnKey,kp,st->keySz);
1030 memmove(rtnData,kp+st->keySz,st->dataSz);
1035 * Db::IndexBinary::chkFind - check lastAccess block for key
1038 * key char * input key image
1039 * last pgRef * input last position loc
1041 * returns 0 if not found
1042 * error code otherwise
1045 int Db::IndexBinary::
1046 chkFind(pgRef &loc, char *key)
1049 if( s < 0 ) return 0; // must be valid block
1050 keyBlock *sbb; Page *spp; char *sn;
1051 if_err( db->indexRead(s,0,sbb,spp,sn) );
1052 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1053 int slen = spp->iused();
1054 int k = loc.offset - sizeof(keyBlock);
1055 if( k < 0 || k > slen ) return 0; // must be inside/end of block
1056 int cmpr0 = k>=slen ? -1 : compare(key,sn+k); // compare last access
1057 if( cmpr0 ) { // not found here
1059 if( cmpr0 < 0 ? (k-=kdSz) < 0 : (k+=kdSz) >= slen ) return 0;
1060 int cmpr1 = compare(key,sn+k);
1061 if( !cmpr1 ) goto xit; // found here
1063 if( l >= slen ) return 0; // no curr
1064 if( cmpr0 < 0 ) Fail(errNotFound); // btwn prev/curr
1066 cmpr1 = compare(key,sn+k);
1067 if( !cmpr1 ) goto xit; // found here
1068 if( cmpr1 > 0 ) return 0; // key after last in block
1071 if( cmpr0 > 0 ) Fail(errNotFound); // btwn curr/next
1073 cmpr1 = compare(key,sn);
1074 if( !cmpr1 ) goto xit; // found here
1075 if( cmpr1 < 0 ) return 0; // key before first in block
1077 return errNotFound; // key in block range, but not found
1080 loc.offset = sizeof(keyBlock) + k;
1085 * Db::IndexBinary::keyFind -- database unique key retreival
1088 * s pageId input current id
1090 * returns 0 on success
1091 * errNotFound on not found,
1092 * error code otherwise
1095 int Db::IndexBinary::
1096 keyFind(pgRef &loc, void *ky, pageId s)
1099 keyBlock *sbb; Page *spp; char *sn;
1100 if_err( db->indexRead(s,0,sbb,spp,sn) );
1102 if( sbb->right_link() >= 0 )
1103 lkdSz += sizeof(pageId);
1104 int len = spp->iused();
1108 // binary search block for key
1109 while( (r - l) > 1 ) {
1112 if( sbb->right_link() >= 0 )
1113 k += sizeof(pageId);
1115 int n = compare((char*)ky,kn);
1118 loc.offset = sizeof(keyBlock) + k;
1121 if( n > 0 ) l = i; else r = i;
1124 if( (s = sbb->right_link()) < 0 ) break;
1125 if( (r*=lkdSz) < len ) s = readPageId(sn+r);
1132 * Db::IndexBinary::Find -- interface to database unique key retreival
1135 * key void * input retreival key image
1136 * rtnData void * output resulting recd value
1138 * returns 0 on success
1139 * errNotFound on not found,
1140 * error code otherwise
1143 int Db::IndexBinary::
1144 refFind(pgRef &loc, void *ky)
1146 if( st->rootPageId == NIL )
1148 pageId r = st->rootPageId;
1152 if( CHK cFindCount > 2 ) ret = 1; }
1153 if( ret ) { // try the easy way
1154 ret = chkFind(loc, (char *)ky);
1155 if( ret == errNotFound ) {
1156 r = loc.id; ret = 0;
1160 if( !ret ) { // try the hard way
1161 if_fail( keyFind(loc,ky,r) );
1168 int Db::IndexBinary::
1169 Find(void *ky, void *rtnData)
1172 if_fail( refFind(last, ky) );
1174 if_err( db->addrRead_(last,kp) );
1176 memmove(rtnData,kp+st->keySz,st->dataSz);
1181 int Db::IndexBinary::
1182 chkInsert(void *key, void *data)
1185 char *ky = (char *)key;
1186 pageId s = lastInsert.id;
1187 if( s < 0 || cInsCount < 2 ) return 0; /* >= 2 in a row */
1188 keyBlock *sbb; Page *spp; char *sn;
1189 if_err( db->indexRead(s,1,sbb,spp,sn) );
1190 if( sbb->right_link() >= 0 ) return 0; /* must be exterior */
1191 int slen = spp->iused();
1192 int k = lastInsert.offset - sizeof(keyBlock);
1194 char *kn = kp + kdSz;
1195 char *rp = sn + slen;
1196 int n = compare(ky,kp);
1197 if( n == 0 ) Fail(errDuplicate);
1198 if( n > 0 ) { /* after last one */
1199 if( kn >= rp ) { /* no next one */
1200 if( st->rightHandSide == s )
1205 if( n == 0 ) Fail(errDuplicate);
1207 rhs = 1; /* before next one */
1210 if( !rhs ) return 0; /* not a hit */
1211 if( spp->iallocated()-slen < kdSz ) return 0; /* doesnt fit */
1212 if( rp > kn ) memmove(kn+kdSz,kn,rp-kn); /* move data up */
1213 memmove(kn,key,st->keySz);
1214 memmove(kn+st->keySz,data,st->dataSz); /* add new key/data */
1215 spp->iused(slen + kdSz);
1218 lastAccess.offset = sizeof(keyBlock) + kn-sn;
1223 * Db::IndexBinary::keyInsert - add unique key to database
1226 * s pageId input current id
1228 * iky char * input assembled insertion key
1230 * returns 0 on success,
1231 * errDuplicate if key already exists in database
1232 * error code otherwise
1235 int Db::IndexBinary::
1236 keyInsert(pageId s, pageId &t)
1239 /* mark no continued insertion, and check end of search */
1240 /* if not end, read current pageId, search for key */
1241 /* return error if found. */
1242 keyBlock *sbb; Page *spp; char *sn;
1243 if_err( db->indexRead(s,1,sbb,spp,sn) );
1245 pageId u = sbb->right_link();
1247 lkdSz += sizeof(pageId);
1248 int slen = spp->iused();
1249 int keyCount = slen / lkdSz;
1250 int maxKeysInBlock = spp->iallocated() / lkdSz;
1254 /* binary search block for key */
1255 while( (r - l) > 1 ) {
1258 if( sbb->right_link() >= 0 )
1259 k += sizeof(pageId);
1261 int n = compare(this->akey,kn);
1264 lastAccess.offset = sizeof(keyBlock) + k;
1267 if( n > 0 ) l = i; else r = i;
1270 /* not found in this pageId, continue search at lower levels. */
1271 /* process insertion if lower level splits ( t>=0 ). */
1272 int i = sizeof(pageId);
1276 u = readPageId(sn+k);
1277 if_ret( keyInsert(u, t) );
1278 if( t < 0 ) return 0;
1280 writePageId(sn+k,t);
1286 /* if current pageId is not full, insert into this pageId. */
1287 if( keyCount < maxKeysInBlock ) {
1290 memmove(kn+lkdSz,kn,slen-k);
1291 spp->iused(slen + lkdSz);
1292 memmove(kn,&iky[i],lkdSz);
1293 setLastKey(s,u,k); /* save insert loc */
1297 /* current pageId is full, split pageId and promote new parent key */
1298 keyBlock *tbb; Page *tpp; char *tn;
1299 if_err( blockAllocate(t,tbb,tpp,tn) );
1300 /* split point is middle, or end if inserting consecutive on rhs */
1301 int promoted = maxKeysInBlock/2;
1302 if( cInsCount > 2 && s == st->rightHandSide )
1303 promoted = maxKeysInBlock-1;
1305 int tlen = slen - promoted;
1306 if( st->rightHandSide == s ) st->rightHandSide = t;
1308 if( k <= promoted ) { /* at or left of promoted key */
1309 if( k != promoted ) { /* not promoting current key */
1310 kn = sn+promoted-lkdSz;
1311 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1313 memmove(kn+lkdSz,kn,promoted-lkdSz-k);
1314 memmove(kn,&iky[i],lkdSz); /* insert new key */
1315 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1316 setLastKey(s,u,k); /* save insert loc */
1318 memmove(tn,sn+promoted,tlen);
1320 else { /* key is > center */
1322 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1323 int j = k - promoted - lkdSz;
1324 memmove(tn,kn+=lkdSz,j);
1325 memmove(kn=tn+j,&iky[i],lkdSz); /* insert new key */
1326 setLastKey(t,u,j); /* save insert loc */
1327 memmove(kn+=lkdSz,sn+k,slen-k);
1328 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1331 /* set newly split blocks half full, and set new links. */
1335 tbb->right_link(sbb->right_link());
1336 sbb->right_link(readPageId(&iky[0]));
1337 writePageId(&iky[0],s);
1338 /* if root is splitting, create new left */
1339 if( s == st->rootPageId ) {
1340 keyBlock *ubb; Page *upp; char *un;
1341 if_err( blockAllocate(u,ubb,upp,un) );
1342 memmove(un,sn,slen);
1344 ubb->right_link(sbb->right_link());
1345 writePageId(&iky[0],u);
1346 k = st->keySz + st->dataSz + sizeof(pageId);
1347 memmove(sn,&iky[0],k);
1350 setLastKey(s,t,sizeof(pageId));
1352 /* t >= 0 indicates continued insertion, return success */
1356 void Db::IndexBinary::
1357 makeKey(char *cp,char *key,int l,char *recd,int n)
1359 writePageId(cp,NIL);
1360 memmove(cp+=sizeof(pageId),key,l);
1361 if( recd ) memmove(cp+=l,recd,n);
1365 * Db::Insert - interface to database unique key insertion
1368 * key void * input key image
1369 * data void * input recd value
1371 * returns 0 on success,
1372 * errDuplicate if key already exists in database
1373 * error code otherwise
1376 int Db::IndexBinary::
1377 Insert(void *key, void *data)
1379 if_err( MakeRoot() );
1382 if( CHK cInsCount > 2 ) { // try the easy way
1383 ret = chkInsert(key,data);
1386 if( !ret ) { // try the hard way
1387 makeKey(&iky[0],this->akey=(char *)key,st->keySz,(char *)data,st->dataSz);
1388 pageId t = NIL; lastAccess.id = NIL;
1389 if_ret( keyInsert(st->rootPageId, t) );
1391 if( keyInterior > 0 ) lastAccess.id = NIL;
1398 * Db::keyBlockUnderflow -- balences/merges blocks after a deletion
1401 * t int output continuation flag
1402 * lbb keyBlock * input left sibling keyBlock
1403 * p pageId input parent keyBlock id
1404 * pbb keyBlock * input parent keyBlock
1405 * pi int input deletion key offset parent
1408 * returns 0 on success, errorcode otherwise
1411 int Db::IndexBinary::
1412 keyBlockUnderflow(int &t,keyBlock *lbb,pageId p,keyBlock *pbb,int pi)
1418 int psiz = kdSz + sizeof(pageId);
1420 * if deletion was at right end of block
1421 * back up one key otherwise use this key
1423 char *pn = (char *)(pbb+1);
1424 Page *ppp = db->get_page(p);
1425 int plen = ppp->iused();
1426 if( pi >= plen ) { /* read lt side */
1427 r = pbb->right_link();
1430 l = readPageId(pn+pi);
1431 if_err( db->indexRead(l,1,lbb) );
1433 else { /* read rt side */
1434 l = readPageId(pn+pi);
1436 r = i >= plen ? pbb->right_link() : readPageId(pn+i);
1437 if_err( db->indexRead(r,1,rbb) );
1440 int lsz = lbb->right_link() >= 0 ? sizeof(pageId) : 0;
1441 int lkdSz = kdSz + lsz;
1442 char *ln = (char *)(lbb+1);
1443 Page *lpp = db->get_page(l);
1444 int llen = lpp->iused();
1445 char *rn = (char *)(rbb+1);
1446 Page *rpp = db->get_page(r);
1447 int rlen = rpp->iused();
1450 * if both lt&rt blocks+parent key all fit in one block, merge them
1452 int n = llen + rlen;
1453 if( n <= rpp->iallocated()-lkdSz ) { /* merge */
1454 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1455 memmove(ln+llen,pn+pi+sizeof(pageId)-lsz,lkdSz);
1457 memmove(ln+llen,rn,rlen); /* move right to left */
1459 lbb->right_link(rbb->right_link());
1461 memmove(pn+pi,pn+i,plen-i); /* remove parent key */
1463 if( plen == 0 && p == st->rootPageId ) { /* totally mashed root */
1464 if( st->rightHandSide == r ) st->rightHandSide = p;
1465 if_err( blockFree(r) );
1466 memmove(pn,ln,llen); /* copy to parent page */
1467 pbb->right_link(lbb->right_link());
1469 if_err( blockFree(l) );
1472 if( r != pbb->right_link() ) /* not rightLink */
1473 writePageId(pn+pi,l); /* must be next key */
1476 if( st->rightHandSide == r ) st->rightHandSide = l;
1477 if_err( blockFree(r) );
1481 lastAccess.id = NIL;
1482 return 0; /* continue underflow */
1486 if( tsiz < lkdSz ) tsiz = lkdSz; /* slosh threshold */
1487 if( plen > psiz && plen > tsiz ) t = 0; /* discontinue underflow */
1488 if( (i=(n/=2)-llen) >= tsiz || !llen ) { /* slosh left */
1489 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1490 k = pi+sizeof(pageId);
1491 memmove(kn=ln+llen,pn+k-lsz,lkdSz); /* move parent to left */
1493 memmove(kn+=lkdSz,rn,i*=lkdSz); /* migrate keys left */
1494 llen += i+lkdSz; kn = rn+i;
1495 if( lbb->right_link() >=0 )
1496 lbb->right_link(readPageId(kn));
1497 writePageId(pn+pi,l); /* change parent key */
1498 memmove(pn+k,kn+lsz,kdSz);
1499 kn += lkdSz; i += lkdSz;
1500 memmove(rn,kn,rlen-=i); /* migrate keys left */
1502 else if( (i=n-rlen) >= tsiz || !rlen ) { /* slosh right */
1503 i /= lkdSz; i *= lkdSz;
1504 memmove(kn=rn+i,rn,rlen);
1505 rlen += i; /* migrate keys right */
1506 writePageId(pn+pi,lbb->right_link());
1507 k = pi+sizeof(pageId);
1508 memmove(kn-=lkdSz,pn+k-lsz,lkdSz); /* move parent key */
1509 i -= lkdSz; n = llen-i;
1510 memmove(rn,kn=ln+n,i);
1511 kn -= lkdSz; /* migrate keys right */
1512 if( lbb->right_link() >=0 )
1513 lbb->right_link(readPageId(kn));
1514 memmove(pn+k,kn+lsz,kdSz); /* change parent key */
1515 writePageId(pn+pi,l);
1520 lastAccess.id = NIL;
1521 lpp->iused(llen); /* update lengths */
1528 * Db::IndexBinary::keyDelete - remove unique key from database
1531 * t int input/output check underflow flag
1532 * kp void * input key image to be removed
1533 * s pageId input current id
1534 * p pageId input parent id
1535 * pbb keyBlock * input parent
1536 * pi int input deletion key offset parent
1538 * returns 0 on success,
1539 * errNotFound if key does not exists in database
1540 * error code otherwise
1543 int Db::IndexBinary::
1544 keyDelete(int &t,void *kp,pageId s,pageId p,keyBlock *pbb,int pi)
1547 keyBlock *sbb; Page *spp; char *sn;
1548 if_err( db->indexRead(s,1,sbb,spp,sn) );
1549 int slen = spp->iused();
1550 t = 1; /* check underflow */
1551 if( idf == 0 ) { /* not interior deletion */
1553 if( sbb->right_link() >= 0 )
1554 lkdSz += sizeof(pageId);
1556 int r = slen / lkdSz;
1557 while( (r - l) > 1 ) { /* binary search for key */
1560 if( sbb->right_link() >= 0 )
1561 k += sizeof(pageId);
1563 int n = compare(this->akey,kn);
1565 if( sbb->right_link() < 0 ) { /* terminal key */
1567 memmove(kn,kn+lkdSz,slen-k);
1568 spp->iused(slen); /* delete key */
1569 lastAccess.id = s; /* lastAccess = key */
1570 lastAccess.offset = sizeof(keyBlock) + k;
1572 else { /* interior key */
1573 k -= sizeof(pageId);
1576 idf = 1; /* interior delete */
1577 if_ret( keyDelete(t,(void *)kn,u,s,sbb,k) );
1581 if( n > 0 ) l = i; else r = i;
1583 if( (u=sbb->right_link()) < 0 ) { /* could not find it */
1587 if( (r *= lkdSz) < slen )
1588 u = readPageId(sn+r);
1589 if_ret( keyDelete(t,kp,u,s,sbb,r) ); /* continue search */
1591 else { /* interior deletion */
1592 if( (u=sbb->right_link()) < 0 ) { /* equivalent exterior key */
1593 int i = slen - kdSz;
1594 char *kn = sn + i; /* translate to interior */
1595 memmove((char *)kp+sizeof(pageId),kn,kdSz);
1596 spp->iused(i); /* remove key */
1598 else { /* continue decending */
1599 if_ret( keyDelete(t,kp,u,s,sbb,slen) );
1603 if( t != 0 ) { /* underflow possible */
1605 t = 0; /* no parent, root */
1607 if_err( keyBlockUnderflow(t,sbb,p,pbb,pi) );
1612 int Db::IndexBinary::
1613 chkDelete(pgRef &loc, void *kp)
1617 ret = chkFind(loc, (char*)kp); // try last delete
1618 if( !ret && lastFind.id != loc.id ) {
1620 ret = chkFind(loc, (char*)kp); // try last find
1622 if( !ret ) return 0;
1623 if( ret == errNotFound ) ret = 0;
1626 keyBlock *sbb; Page *spp; char *sn;
1627 if_err( db->indexRead(s,1,sbb,spp,sn) );
1628 int dlen = spp->iused() - kdSz;
1629 if( dlen < kdSz ) return 0; // at least 1 key must remain
1630 if( !ret ) return errNotFound;
1631 spp->iused(dlen); // delete
1632 int k = loc.offset - sizeof(keyBlock);
1635 memmove(kp,kp+kdSz,dlen-k);
1641 * Db::IndexBinary::Delete - interface to remove unique key
1644 * key void * input key image to be removed
1646 * returns 0 on success,
1647 * errNotFound key does not exists in database
1648 * error code otherwise
1651 int Db::IndexBinary::
1654 if( st->rootPageId == NIL ) Fail(errNotFound);
1655 this->akey = (char *)key;
1657 pageId r = st->rootPageId;
1659 if( CHK cDelCount > 2 ) { // try the easy way
1661 ret = chkDelete(loc, key);
1662 if( ret == errNotFound ) { // in exterior block
1663 r = loc.id; ret = 0;
1666 if( !ret ) { // try the hard way
1667 makeKey(&iky[0],this->akey=(char *)key,st->keySz,0,0);
1668 lastAccess.id = NIL; int t = 1;
1669 (void)r; // use full search, r works but is not traditional
1670 if_fail( keyDelete(t,(void *)&iky[0],/*r*/st->rootPageId,0,0,0) );
1671 if_err( UnmakeRoot() );
1679 * Db::IndexBinary::keyFirst - access leftmost key in tree
1682 * s pageId input current id
1684 * returns 0 on success,
1685 * errNotFound if index is empty
1686 * error code otherwise
1689 int Db::IndexBinary::
1690 keyFirst(pgRef &loc, pageId s)
1694 if_err( db->indexRead(s,0,sbb) );
1695 if( sbb->right_link() < 0 ) break;
1696 char *sn = (char *)(sbb+1);
1701 loc.offset = sizeof(keyBlock);
1706 * Db::IndexBinary::First -- interface to database access leftmost key in tree
1709 * rtnKey void * output resulting key value
1710 * rtnData void * output resulting recd value
1712 * returns 0 on success
1713 * errNotFound on not found,
1714 * error code otherwise
1717 int Db::IndexBinary::
1718 First(void *rtnKey,void *rtnData)
1720 if( st->rootPageId == NIL ) Fail(errNotFound);
1722 if_fail( keyFirst(first, st->rootPageId) );
1724 if_err( db->addrRead_(first,kp) );
1726 memmove(rtnKey,kp,st->keySz);
1728 memmove(rtnData,kp+st->keySz,st->dataSz);
1730 lastNext = lastAccess = first; }
1735 * Db::IndexBinary::keyLast - access rightmost key in tree
1738 * s pageId input current id
1740 * returns 0 on success,
1741 * errNotFound if index is empty
1742 * error code otherwise
1745 int Db::IndexBinary::
1746 keyLast(pgRef &loc, pageId s)
1750 if_err( db->indexRead(s,0,sbb) );
1751 pageId u = sbb->right_link();
1756 Page *spp = db->get_page(s);
1758 int k = spp->iused() - kdSz;
1759 loc.offset = sizeof(keyBlock) + k;
1764 * Db::IndexBinary::Last -- interface to database access rightmost key in tree
1767 * rtnKey void * output resulting key value
1768 * rtnData void * output resulting recd value
1770 * returns 0 on success
1771 * errNotFound on not found,
1772 * error code otherwise
1775 int Db::IndexBinary::
1776 Last(void *rtnKey,void *rtnData)
1778 if( st->rootPageId == NIL ) Fail(errNotFound);
1780 if_fail( keyLast(last, st->rootPageId) );
1782 if_err( db->addrRead_(last,kp) );
1784 memmove(rtnKey,kp,st->keySz);
1786 memmove(rtnData,kp+st->keySz,st->dataSz);
1788 lastNext = lastAccess = last; }
1793 * Db::IndexBinary::Modify -- interface to database record modify
1796 * keyDef index input key definition block
1797 * key void * input retreival key image
1798 * recd void * input new recd value
1800 * returns 0 on success
1801 * errNotFound on not found,
1802 * error code otherwise
1805 int Db::IndexBinary::
1806 Modify(void *key,void *recd)
1808 if_fail( Find(key,0) );
1810 if_err( db->addrWrite_(lastAccess,bp) );
1811 memmove(bp+st->keySz,recd,st->dataSz);
1815 int Db::IndexBinary::
1816 chkNext(pgRef &loc, char *&kp)
1819 if( s < 0 ) return 0; // must be valid
1820 keyBlock *sbb; Page *spp; char *sn;
1821 if_err( db->indexRead(s,0,sbb,spp,sn) );
1822 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1823 int k = loc.offset - sizeof(keyBlock);
1824 if( k < 0 || k >= spp->iused() ) return 0; // curr must be in block
1825 if( (k+=kdSz) >= spp->iused() ) return 0; // next must be in block
1827 loc.offset = sizeof(keyBlock) + k;
1831 int Db::IndexBinary::
1832 keyNext(pgRef &loc, char *kp)
1834 if_fail( keyLocate(loc,st->rootPageId, keyGT,kp,compare) );
1839 * Db::IndexBinary::Next -- interface to sequential database key retreival
1842 * loc pgRef & input last known retreival key
1843 * rtnKey void * output resulting key value
1844 * rtnData void * output resulting recd value
1846 * returns 0 on success
1847 * error code otherwise
1850 int Db::IndexBinary::
1851 Next(pgRef &loc,void *rtnKey,void *rtnData)
1853 if( st->rootPageId == NIL ) Fail(errNotFound);
1855 int ret = CHK chkNext(loc,kp); // try the easy way
1859 if( !st->keySz && rtnKey ) // rtnKey is rKey class
1860 ky = (char *)rtnKey;
1862 if_err( db->addrRead_(loc,ky) );
1863 if_ret( keyNext(loc, ky) ); // try the hard way
1865 if_err( db->addrRead_(loc,kp) );
1867 memmove(rtnKey,kp,st->keySz);
1869 memmove(rtnData,kp+st->keySz,st->dataSz);
1875 void Db::IndexBinary::
1883 IndexBinary(Db *zdb, int zidx, int ksz, int dsz, CmprFn cmpr)
1884 : IndexBase(zdb, idxBin, zidx, ksz, dsz)
1886 compare = !cmpr && !ksz ? cmprKey : cmpr;
1887 bst = new(st+1) IndexBinaryStorage(zdb->findCmprFn(compare));
1888 iky = new char[st->blockSize/2+1];
1889 tky = new char[st->blockSize/2+1];
1894 IndexBinary(Db *zdb, IndexBaseStorage *b, IndexBinaryStorage *d)
1895 : IndexBase(zdb, *b)
1897 bst = new(d) IndexBinaryStorage();
1898 compare = !bst->cmprId && !b->keySz ? cmprKey : cmprFns[bst->cmprId];
1899 iky = new char[st->blockSize/2+1];
1900 tky = new char[st->blockSize/2+1];
1905 IndexBinary(IndexBase *ib, IndexBaseStorage *b, IndexBinaryStorage *d)
1906 : IndexBase(ib->db, *b)
1908 bst = new(d) IndexBinaryStorage();
1909 compare = !bst->cmprId && !ib->st->keySz ? cmprKey : cmprFns[bst->cmprId];
1921 int Db::IndexString::
1922 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
1925 keyBlock *sbb; Page *spp; char *sn;
1926 if_err( db->indexRead(s,0,sbb,spp,sn) );
1927 unsigned char *lp = (unsigned char *)sn;
1928 unsigned char *rp = lp + spp->iused();
1929 if( (r=sbb->right_link()) >= 0 ) {
1931 pageId l = getPageId(lp);
1932 lp += st->dataSz; ++lp;
1933 while( *lp++ != 0 );
1934 if_ret( keyMap(l,fn) );
1936 if_ret( keyMap(r,fn) );
1938 if_err( (this->*fn)(s) );
1943 * Db::IndexString::kpress -- compresses kp with prefix
1946 * kp unsigned char * input key string to compress
1947 * lp unsigned char * input left prefix
1948 * cp unsigned char * output compressed string
1950 * returns length of compressed string cp
1951 * safe to kpress buffer in place over cp or lp
1954 int Db::IndexString::
1955 kpress(unsigned char *kp, unsigned char *lp, unsigned char *cp)
1959 while( *kp == *lp ) {
1963 ch = *kp++; *cp++ = n;
1971 * divide tbfr length n into sectors l and s with rlink r
1972 * if i >= 0 then the insert point is i
1974 * promoted key in tky, return left sector
1977 int Db::IndexString::
1978 split(int n, int i, pageId s, pageId &l, pageId r)
1980 unsigned char lky[keysz];
1981 unsigned char *bp = &tbfr[0];
1982 unsigned char *lp = bp;
1983 unsigned char *rp = bp + n;
1984 unsigned char *kp = lp;
1985 unsigned char *tp = 0;
1986 pageId t = NIL, u = NIL;
1987 keyBlock *lbb; Page *lpp; char *ln;
1989 db->indexRead(l,1,lbb,lpp,ln) :
1990 blockAllocate(l,lbb,lpp,ln) );
1992 int blen = lpp->iallocated()-1 - st->dataSz;
1993 if( r >= 0 ) blen -= sizeof(pageId);
1994 if( n > blen ) n = blen;
1995 /* split point is middle, or end if inserting consecutive on rhs */
1997 if( i >= 0 && cInsCount > 2 && s == st->rightHandSide )
1999 unsigned char *mp = lp + promoted;
2000 // get promoted key in lky
2003 // expand key to lky
2004 if( r >= 0 ) u = getPageId(lp);
2005 tp = lp; lp += st->dataSz;
2006 for( i=*lp++; (lky[i++]=*lp++) != 0; );
2007 // stop copy if past mid pt and
2008 // remaining bytes + expanded key bytes fit in block
2010 if( kp != &tbfr[0] && lp >= mp && rlen+i <= blen )
2012 // copy promoted data/key
2013 unsigned char *tkp = tky;
2014 umemmove(tkp, tp, st->dataSz);
2016 // save end of left block, move to next key
2017 kp = bp; bp = lp; t = u;
2021 // must be at least 3 keys in tbfr (left,parent,right)
2022 if( !n || bp >= rp ) Err(errCorrupt);
2023 memmove(ln,&tbfr[0],n);
2026 // add first key in next block, order of moves important
2027 // memmove may be move up since key may expand past split
2029 if( r >= 0 ) putPageId(mp,u);
2030 umemmove(mp, tp, st->dataSz); // data recd
2031 *mp++ = 0; // first prefix is zero length
2032 memmove(mp+i, lp, rlen); // rest of block
2033 memmove(mp, &lky[0], i); // expanded key
2035 int slen = mp - &tbfr[0];
2037 * if root is splitting, allocate new right sibling
2038 * and set root to contain only new parent key.
2040 keyBlock *sbb; Page *spp; char *sn;
2041 if_err( db->indexRead(s,1,sbb,spp,sn) );
2042 if( s == st->rootPageId ) {
2043 keyBlock *rbb; Page *rpp; char *rn;
2044 if_err( blockAllocate(u,rbb,rpp,rn) );
2047 memmove(rn,&tbfr[0],slen);
2049 if( st->rightHandSide == s ) st->rightHandSide = r;
2050 mp = (unsigned char *)sn;
2052 umemmove(mp, kp=&tky[0], st->dataSz);
2054 for( *mp++=0; (*mp++=*kp++)!=0; );
2055 slen = mp - (unsigned char *)sn;
2058 memmove(sn, &tbfr[0], slen);
2064 int Db::IndexString::
2065 chkInsert(void *key, void *data)
2067 unsigned char *ky = (unsigned char *)key;
2068 pageId s = lastInsert.id;
2069 if( s < 0 ) return 0; // must be valid
2070 int n = ustrcmp(&lastInsKey[0],ky);
2071 if( !n ) Fail(errDuplicate);
2072 keyBlock *sbb; Page *spp; char *sn;
2073 if_err( db->indexRead(s,0,sbb,spp,sn) );
2074 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2075 int slen = spp->iused();
2076 int k = lastInsert.offset - sizeof(keyBlock);
2077 if( k < 0 || k >= slen ) return 0; // must be inside block
2078 unsigned char *bp = (unsigned char *)sn;
2079 unsigned char *lp = bp;
2080 unsigned char *rp = bp + k; // beginning to current
2081 unsigned char *tp = bp + slen;
2082 unsigned char nky[keysz]; nky[0] = 0;
2083 if( n < 0 ) { // past here
2084 ustrcpy(&nky[0],&lastInsKey[0]);
2087 else { // before here
2088 n = ustrcmp(bp+st->dataSz+1,ky);
2089 if( !n ) Fail(errDuplicate);
2090 if( n > 0 ) return 0; // before first
2091 unsigned char rb; rp += st->dataSz; // move past curr data
2092 for( int i=*rp++; (rb=*rp++) == lastInsKey[i] && rb != 0; ++i );
2093 if( rb ) return 0; // must match curr
2095 unsigned char lky[keysz]; lky[0] = 0;
2096 unsigned char *kp; n = 0;
2097 while( (kp=lp) < rp ) {
2098 lp += st->dataSz; // scan next
2099 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2100 n = ustrcmp(ky,&nky[0]);
2101 if( !n ) Fail(errDuplicate);
2102 if( n < 0 ) break; // btwn last,next
2103 ustrcpy(&lky[0],&nky[0]);
2105 if( lp >= tp && s != st->rightHandSide ) return 0;// not found, not rhs
2106 // recompress and compute storage demand
2107 int llen = kp - (unsigned char *)sn; // left
2108 int lsz = kpress(ky, &lky[0], &lky[0]); // lky = kpress new key
2109 int mlen = st->dataSz + lsz;
2110 int klen = mlen; // new key demand
2112 if( kp < rp ) { // if next key exists
2113 nsz = kpress(&nky[0], ky, &nky[0]);
2114 mlen += st->dataSz + nsz; // new+next key demand
2116 int rlen = tp - lp; // right
2117 int blen = llen + mlen + rlen; // left + demand + right
2119 if( blen > spp->iallocated() ) return 0; // if insertion wont fit
2121 spg->set_flags(fl_wr);
2122 if( kp < rp ) { // insert not at end of block
2123 memmove(kp+mlen, lp, rlen); // move right up
2124 memmove(kp+klen, kp, st->dataSz); // move next data up
2125 memmove(kp+klen+st->dataSz,&nky[0],nsz); // kpress next key
2127 k = kp - (unsigned char *)sn;
2128 lastAccess.id = lastInsert.id;
2129 lastAccess.offset = sizeof(keyBlock) + k;
2130 ustrcpy(&lastAccKey[0],ky);
2131 umemmove(kp,(unsigned char *)data,st->dataSz); // insert new key
2132 umemmove(kp,&lky[0],lsz);
2138 * insert - add node to compressed b-tree.
2139 * entry - tky - data/key to add
2140 * s - root of tree/subtree
2141 * t - NIL/discontinue or tky->left_link
2143 int Db::IndexString::
2144 keyInsert(pageId &t, pageId s)
2146 unsigned char lky[keysz]; // last key
2147 unsigned char nky[keysz]; // next key
2148 keyBlock *sbb; Page *spp; char *sn;
2149 if_err( db->indexRead(s,1,sbb,spp,sn) );
2150 t = NIL; // set discontinue insertion
2152 pageId r = sbb->right_link();
2154 int n = spp->iused();
2155 unsigned char *lp = (unsigned char *)sn; // rest of block
2156 unsigned char *rp = lp + n; // end of block
2157 unsigned char *kp = 0; // insertion point
2158 unsigned char *tkp = &tky[st->dataSz]; // tky = data/key
2160 while( (kp=lp) < rp ) { // search
2161 if( r >= 0 ) l = getPageId(lp);
2163 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2164 n = ustrcmp(&tkp[0], &nky[0]);
2166 if( n == 0 ) Fail(errDuplicate);
2169 // if non-terminal block, continue search at lower levels
2170 // if lower level splits( t>=0 ), process insertion
2172 if_ret( keyInsert(t, kp>=rp ? r : l) );
2173 if( t < 0 ) return 0; // discontinue
2176 // recompress and compute storage demand
2177 int llen = kp - (unsigned char *)sn; // left
2178 int lsz = kpress(tkp, &lky[0], &lky[0]); // lky = kpress new key
2179 int dsz = st->dataSz;
2180 if( r >= 0 ) dsz += sizeof(pageId); // link+data size
2181 int mlen = dsz + lsz;
2182 int klen = mlen; // new key demand
2184 if( kp < rp ) { // if next key exists
2185 nsz = kpress(&nky[0], &tkp[0], &nky[0]);
2186 mlen += dsz + nsz; // new+next key demand
2188 int rlen = rp - lp; // right
2189 int blen = llen + mlen + rlen; // left + demand + right
2191 if( blen <= spp->iallocated() ) { // if insertion will fit
2192 if( kp < rp ) { // insert not at end of block
2193 memmove(kp+mlen, lp, rlen); // move right up
2194 memmove(kp+klen, kp, dsz); // move next link/data up
2195 memmove(kp+klen+dsz,&nky[0],nsz); // kpress next key
2197 if( r >= 0 ) putPageId(kp, t);
2198 if( lastAccess.id < 0 ) { // update lastAccess
2200 int k = kp - (unsigned char *)sn;
2201 lastAccess.offset = sizeof(keyBlock) + k;
2202 ustrcpy(&lastAccKey[0],&tkp[0]);
2204 umemmove(kp,&tky[0],st->dataSz); // insert new key
2205 memmove(kp,&lky[0],lsz);
2210 unsigned char *bp = &tbfr[0]; // overflowed, insert to tbfr
2211 umemmove(bp,(unsigned char *)sn,llen);
2212 if( r >= 0 ) putPageId(bp, t);
2213 umemmove(bp,&tky[0],st->dataSz); // insert new key
2214 umemmove(bp,&lky[0],lsz);
2215 if( kp < rp ) { // insert not at end of block
2216 umemmove(bp,kp,dsz);
2217 umemmove(bp,&nky[0],nsz); // kpress next key
2218 umemmove(bp,lp,rlen); // copy rest of block
2221 if_err( split(blen,llen,s,t,r) ); // split block, and continue
2226 int Db::IndexString::
2227 Insert(void *key,void *data)
2229 if_err( MakeRoot() );
2231 if( CHK cInsCount > 2 ) { // try the easy way
2232 ret = chkInsert(key,data);
2235 if( !ret ) { // try the hard way
2236 unsigned char *tp = &tky[0];
2237 umemmove(tp,(unsigned char *)data,st->dataSz);
2238 ustrcpy(tp,(unsigned char*)key);
2239 pageId t = NIL; lastAccess.id = NIL;
2240 if_ret( keyInsert(t, st->rootPageId) );
2242 ustrcpy(&lastInsKey[0],&lastAccKey[0]);
2249 int Db::IndexString::
2253 if_err( db->indexRead(s,0,sbb) );
2254 char *sn = (char *)(sbb+1);
2256 while( sbb->right_link() >= 0 ) {
2258 if_err( db->indexRead(s,0,sbb) );
2259 sn = (char *)(sbb+1);
2263 lastAccess.offset = sizeof(keyBlock);
2264 unsigned char *kp = (unsigned char *)sn;
2265 ustrcpy(&lastAccKey[0],kp+st->dataSz+1);
2269 int Db::IndexString::
2270 First(void *rtnKey,void *rtnData)
2272 if( st->rootPageId == NIL ) Fail(errNotFound);
2273 if_ret( keyFirst(st->rootPageId) );
2275 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2278 if_err( db->addrRead_(lastAccess,kp) );
2279 memmove(rtnData,kp,st->dataSz);
2281 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2282 lastNext = lastAccess;
2286 int Db::IndexString::
2290 if_err( db->indexRead(s,0,sbb) );
2293 while( (u=sbb->right_link()) >= 0 ) {
2294 if_err( db->indexRead(s=u,0,sbb) );
2297 unsigned char lky[keysz];
2298 Page *spp = db->get_page(s);
2299 char *sn = (char *)(sbb+1);
2300 unsigned char *lp = (unsigned char *)sn;
2301 unsigned char *rp = lp + spp->iused();
2302 unsigned char *kp = 0;
2306 kp = lp; lp += st->dataSz;
2307 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2310 if( !kp ) Fail(errNotFound);
2311 int k = kp - (unsigned char *)sn;
2313 lastAccess.offset = sizeof(keyBlock) + k;
2314 ustrcpy(&lastAccKey[0],&lky[0]);
2318 int Db::IndexString::
2319 Last(void *rtnKey,void *rtnData)
2321 if( st->rootPageId == NIL ) Fail(errNotFound);
2322 if_ret( keyLast(st->rootPageId) );
2324 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2327 if_err( db->addrRead_(lastAccess,kp) );
2328 memmove(rtnData,kp,st->dataSz);
2330 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2331 lastNext = lastAccess;
2335 int Db::IndexString::
2336 chkFind(pgRef &loc, char *key, unsigned char *lkey, unsigned char *lkp)
2339 if( s < 0 ) return 0; // must be valid block
2340 keyBlock *sbb; Page *spp; char *sn;
2341 if_err( db->indexRead(s,0,sbb,spp,sn) );
2342 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2343 int slen = spp->iused();
2344 int k = loc.offset - sizeof(keyBlock);
2345 if( k < 0 || k >= slen ) return 0; // must be inside/end of block
2346 unsigned char *ky = (unsigned char *)key;
2347 unsigned char *bp = (unsigned char *)sn;
2348 unsigned char *kp = bp + k;
2349 unsigned char *rp = bp + slen;
2350 unsigned char *lp = kp; kp += st->dataSz;
2351 unsigned char *ip = &lkey[*kp++];
2352 while( kp<rp && *kp!=0 && *ip==*kp ) { ++ip; ++kp; }
2353 if( *ip || *kp++ ) return 0; // must match curr
2354 int n = ustrcmp(&lkey[0], ky);
2355 if( !n && !lkp ) goto xit; // found here, and no last key
2356 unsigned char lky[keysz];
2358 if( n > 0 ) { // before here
2359 rp = lp; lp = kp = bp;
2360 ip = lp + st->dataSz;
2361 if( *ip++ ) Err(errCorrupt);
2362 if( (n=ustrcmp(ip, ky)) > 0 ) return 0; // before first
2363 if( !n ) { lky[0] = 0; goto xit; } // found here, first
2365 ustrcpy(&lky[0], ip);
2367 lp = kp; kp += st->dataSz;
2368 unsigned char nky[keysz];
2369 ustrcpy(&nky[0], &lky[0]);
2370 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2371 n = ustrcmp(ky, &nky[0]);
2372 if( !n ) goto xit; // found here
2373 if( n < 0 ) Fail(errNotFound); // btwn prev,next
2374 ustrcpy(&lky[0], &nky[0]);
2376 return 0; // not in block
2378 if( lkp ) ustrcpy(lkp, &lky[0]);
2380 loc.offset = sizeof(keyBlock) + k;
2384 int Db::IndexString::
2385 keyFind(pgRef &loc,unsigned char *ky)
2387 unsigned char nky[keysz];
2389 for( pageId s=st->rootPageId; s>=0; ) {
2390 keyBlock *sbb; Page *spp; char *sn;
2391 if_err( db->indexRead(s,0,sbb,spp,sn) );
2392 unsigned char *lp = (unsigned char *)sn;
2393 unsigned char *rp = lp + spp->iused();
2395 pageId r = sbb->right_link();
2397 if( r >= 0 ) l = getPageId(lp);
2398 unsigned char *kp = lp;
2400 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2401 int n = ustrcmp(ky, &nky[0]);
2404 int k = kp - (unsigned char *)sn;
2405 loc.offset = sizeof(keyBlock) + k;
2408 if( n < 0 ) { r = l; break; }
2416 int Db::IndexString::
2417 refFind(pgRef &loc, void *key)
2419 if( st->rootPageId == NIL ) Fail(errNotFound);
2423 if( CHK cFindCount > 2 ) ret = 1; }
2424 if( ret ) { // try the easy way
2425 ret = chkFind(loc,(char *)key,&lastFndKey[0]);
2428 if( !ret ) { // try the hard way
2429 if_fail( keyFind(loc, (unsigned char *)key) );
2433 ustrcpy(&lastAccKey[0],(unsigned char *)key);
2434 ustrcpy(&lastFndKey[0],&lastAccKey[0]);
2435 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2439 int Db::IndexString::
2440 Find(void *key, void *rtnData)
2443 if_fail( refFind(last, key) );
2445 if( !db->addrRead_(last,kp) ) {
2447 memmove(rtnData,kp,st->dataSz);
2454 * remap sectors on underflow
2455 * s - parent sector, t - pageId if overflowed
2456 * k - parent key offset
2457 * updates tky with parent data/key
2460 int Db::IndexString::
2461 keyUnderflow(pageId s, pageId &t, int k)
2463 unsigned char lky[keysz], nky[keysz];
2464 keyBlock *sbb; Page *spp; char *sn;
2465 if_err( db->indexRead(s,1,sbb,spp,sn) );
2466 unsigned char *lp = (unsigned char *)sn;
2467 unsigned char *rp = lp + spp->iused();
2468 unsigned char *kp = lp + k;
2469 unsigned char *mp = &tbfr[0];
2470 // mark continue underlow
2472 pageId r = sbb->right_link();
2473 lky[0] = nky[0] = 0;
2474 // if underflow, copy to parent key offset
2476 putPageId(mp,getPageId(lp));
2477 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2478 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2480 // read l, key, r -- remove key from parent block
2481 unsigned char *tp = &tky[0];
2482 pageId l = getPageId(lp);
2483 umemmove(tp,lp,st->dataSz); lp += st->dataSz;
2484 ustrcpy(tp,&lky[0]);
2485 for( int i=*lp++; (tp[i]=*lp++) != 0; ++i );
2487 putPageId(mp, r=getPageId(lp));
2488 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2489 ustrcpy(&nky[0],tp);
2490 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2491 mp += kpress(&nky[0],&lky[0],mp);
2492 umemmove(mp,lp,rp-lp);
2494 int n = mp-&tbfr[0];
2495 memmove(sn,&tbfr[0],n);
2498 keyBlock *lbb; Page *lpp; char *ln;
2499 if_err( db->indexRead(l,1,lbb,lpp,ln) );
2500 lp = (unsigned char *)ln;
2501 rp = lp + lpp->iused();
2502 pageId m = lbb->right_link();
2503 mp = &tbfr[0]; nky[0] = 0;
2505 if( m >= 0 ) putPageId(mp,getPageId(lp));
2506 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2507 for( int i=*mp++=*lp++; (nky[i]=*mp++=*lp++) != 0; ++i );
2509 // parent key to tbfr
2510 if( m >= 0 ) putPageId(mp,m);
2511 umemmove(mp,&tky[0],st->dataSz);
2512 mp += kpress(tp,&nky[0],mp);
2514 keyBlock *rbb; Page *rpp; char *rn;
2515 if_err( db->indexRead(r,1,rbb,rpp,rn) );
2516 lp = (unsigned char *)rn;
2517 rp = lp + rpp->iused();
2518 m = rbb->right_link();
2520 if( m >= 0 ) putPageId(mp,getPageId(lp));
2521 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2522 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2523 mp += kpress(&nky[0],tp,mp);
2524 umemmove(mp,lp,rp-lp);
2527 if( n > rpp->iallocated() ) {
2528 // tbfr too big for r
2529 if_err( split(n, -1, r, l, m) );
2530 t = l; // start overflow
2533 if( s == st->rootPageId && !spp->iused() ) {
2534 // if root, delete r
2535 memmove(sn,&tbfr[0],n);
2538 if_err( blockFree(r) );
2539 st->rightHandSide = s;
2542 // update r with tbfr
2543 memmove(rn,&tbfr[0],n);
2547 if_err( blockFree(l) );
2553 * remap sectors on overflow
2554 * s - parent sector, k - parent key offset, o - insertion offset
2555 * t parent link on input,
2556 * t == DDONE on output, end overflow
2557 * tky with parent data/key
2559 int Db::IndexString::
2560 keyOverflow(pageId s, pageId &t, int k, int o)
2562 unsigned char lky[keysz], nky[keysz];
2563 keyBlock *sbb; Page *spp; char *sn;
2564 if_err( db->indexRead(s,1,sbb,spp,sn) );
2565 unsigned char *lp = (unsigned char *)sn;
2566 unsigned char *rp = lp + spp->iused();
2567 unsigned char *kp = lp + k;
2568 unsigned char *mp = &tbfr[0];
2569 pageId r = sbb->right_link();
2570 lky[0] = nky[0] = 0;
2571 // copy parent up to parent key to tbfr
2573 putPageId(mp,getPageId(lp));
2574 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2575 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2577 // if at insertion point, add new key
2580 unsigned char *tp = &tky[0];
2581 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2582 mp += kpress(kp=tp, &lky[0], mp);
2586 // copy parent key, if insertion - add tky, copy rest
2588 ustrcpy(&nky[0],kp);
2589 putPageId(mp,getPageId(lp));
2590 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2591 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2592 mp += kpress(&lky[0],&nky[0],mp);
2595 unsigned char *tp = &tky[0];
2596 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2597 mp += kpress(tp, &lky[0], mp);
2598 ustrcpy(&lky[0],tp);
2601 putPageId(mp,getPageId(lp));
2602 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2603 ustrcpy(&nky[0],&lky[0]);
2604 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2605 mp += kpress(&lky[0],&nky[0],mp);
2607 umemmove(mp,lp,rp-lp);
2609 int n = mp - &tbfr[0];
2610 // if overflow, split sector and promote new parent key
2611 if( n > spp->iallocated() ) {
2612 t = NIL; lastAccess.id = NIL;
2613 if_ret( split(n, -1, s, t, r) );
2619 memmove(sn,&tbfr[0],n);
2624 int Db::IndexString::
2625 keyRemap(pageId s, pageId &t, int k, int o)
2628 if_err( keyUnderflow(s,t,k) );
2632 if_err( keyOverflow(s,t,k,o) );
2637 * delete or replace key
2638 * s - starting sector, nky - replacement key if != 0
2639 * t - returned state -2/done,-1/underflow,pageid/overflow
2642 int Db::IndexString::
2643 keyDelete(pageId s, pageId &t)
2645 unsigned char lky[keysz], nky[keysz];
2646 unsigned char *ky = &akey[0];
2648 keyBlock *sbb; Page *spp; char *sn;
2649 if_err( db->indexRead(s,1,sbb,spp,sn) );
2650 unsigned char *bp = (unsigned char *)sn;
2651 unsigned char *lp = bp;
2652 unsigned char *rp = lp + spp->iused();
2653 unsigned char *kp = lp;
2654 unsigned char *mp = 0;
2657 pageId r = sbb->right_link();
2658 lky[0] = nky[0] = 0;
2659 // mark delete done and search
2661 while( (kp=lp) < rp ) {
2663 if( r >= 0 ) l = getPageId(lp);
2665 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2666 if( idf <= 0 && (n=ustrcmp(ky,&nky[0])) <= 0)
2668 ustrcpy(&lky[0],&nky[0]);
2672 if( !idf ) Fail(errNotFound);
2673 // found greatest node less than ky, delete and return with underflow
2674 // deleted key must be on rt side of block to be next to interior key
2675 unsigned char *dp = &dky[0];
2676 umemmove(dp,mp,st->dataSz);
2677 ustrcpy(dp,&nky[0]);
2679 t = NIL; // start underflow
2682 // not found in block, try next level
2683 int status = keyDelete(kp<rp ? l : r,t);
2686 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2689 else if( r < 0 || idf < 0 ) { // found here
2692 int dsz = st->dataSz;
2693 if( r >= 0 ) dsz += sizeof(pageId);
2694 unsigned char *tp = &lky[0];
2696 if( idf < 0 ) { // translating data/key
2697 lsz = kpress(&dky[st->dataSz],tp,tp); // kpress dky to lky
2699 tp = &dky[st->dataSz];
2702 unsigned char *np = lp;
2705 lp += dsz; // get next key
2706 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2707 if( r < 0 ) ustrcpy(&lastAccKey[0],&nky[0]);// new curr key
2708 nsz = kpress(&nky[0],tp,&nky[0]);
2712 int slen = llen + mlen + rlen;
2713 unsigned char *sp = &tbfr[0];
2715 if( (llen > 0 || rlen > 0) && // at least 3 data/keys
2716 slen <= spp->iallocated() ) { // and fits in block
2717 sp = bp; mp = kp; // edit in place
2720 umemmove(mp,bp,llen); // use tbfr, copy left
2721 if( np < rp ) { // next exists
2723 unsigned char *ip = mp;
2724 if( idf < 0 ) ip += dsz + lsz;
2725 if( sp == bp && tp > lp ) {
2726 memmove(tp,lp,rlen); // up copy/move right
2727 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2728 memmove(ip,&nky[0],nsz);
2731 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2732 memmove(ip,&nky[0],nsz);
2734 memmove(tp,lp,rlen); // down copy/move right
2737 if( idf < 0 ) { // move exterior key
2738 if(r >= 0) putPageId(mp,getPageId(kp));
2739 umemmove(mp,&dky[0],st->dataSz); // add dky data/key
2740 umemmove(mp,&lky[0],lsz);
2742 if( sp == bp ) { // in place
2746 lastAccess.id = s; // position to curr
2747 lastAccess.offset = sizeof(keyBlock) + llen;
2748 ustrcpy(&lastAccKey[0],ky);
2753 if( slen > spp->iallocated() ) {
2754 if_ret( split(slen, -1, s, t, r) ); // continue with overflow
2757 memmove(sn,&tbfr[0],slen); // continue with underflow
2763 // deleting nonterminal key, translate to greatest node less than ky
2765 int status = keyDelete(l,t);
2768 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2774 int Db::IndexString::
2777 if( st->rootPageId == NIL ) Fail(errNotFound);
2778 unsigned char lky[keysz]; lky[0] = 0;
2779 pgRef *last = &lastDelete;
2780 unsigned char *lkey = &lastDelKey[0];
2782 if( CHK cDelCount > 2 ) { // try the easy way
2783 if( lastFind.id >= 0 ) { // chk find/delete
2784 if( !ustrcmp((unsigned char *)key,&lastFndKey[0]) ) {
2785 last = &lastFind; lkey = &lastFndKey[0];
2788 ret = chkFind(*last,(char *)key,lkey,&lky[0]);
2792 pageId s = lastAccess.id;
2793 keyBlock *sbb; Page *spp; char *sn;
2794 if_err( db->indexRead(s,1,sbb,spp,sn) );
2795 unsigned char *bp = (unsigned char *)sn;
2796 int slen = spp->iused();
2797 int llen = lastAccess.offset - sizeof(keyBlock);
2798 unsigned char *kp = bp + llen; // curr data/key
2799 unsigned char *lp = kp;
2800 unsigned char *rp = bp + slen;
2802 lp += st->dataSz; ++lp; while( *lp++ ); // skip curr
2806 unsigned char *np = lp;
2807 unsigned char nky[keysz]; nky[0] = 0;
2809 lp += st->dataSz; // get next key
2810 ustrcpy(&nky[0],(unsigned char *)key);
2811 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2812 nsz = kpress(&nky[0],&lky[0],&lky[0]);
2813 mlen += st->dataSz + nsz;
2816 slen = llen + mlen + rlen;
2817 if( (llen > 0 || rlen > 0 ) && // at least 3 data/keys
2818 slen <= spp->iallocated() ) { // and fits in block
2819 if( np < rp ) { // right exists
2820 unsigned char *tp = kp + mlen;
2821 umemmove(kp,np,st->dataSz); // next data/key
2822 memmove(kp,&lky[0],nsz);
2823 if( tp != lp && rlen > 0 )
2824 memmove(tp,lp,rlen); // move right down
2827 ustrcpy(&lastAccKey[0],&nky[0]); // new curr key
2832 if( !ret ) { // try the hard way
2833 ustrcpy(&this->akey[0],(unsigned char*)key);
2834 lastAccess.id = NIL;
2835 pageId t = NIL; idf = 0;
2836 if_ret( keyDelete(st->rootPageId,t) );
2839 if_ret( keyDelete(st->rootPageId,t) );
2841 if_err( UnmakeRoot() );
2843 ustrcpy(&lastDelKey[0],&lastAccKey[0]);
2849 int Db::IndexString::
2850 keyLocate(pgRef &last,pageId s, int &t, int op,
2851 unsigned char *ky, CmprFn cmpr, unsigned char *rky)
2853 unsigned char lky[keysz], nky[keysz];
2854 keyBlock *sbb; Page *spp; char *sn;
2855 if_err( db->indexRead(s,0,sbb,spp,sn) );
2857 pageId r = sbb->right_link();
2858 unsigned char *lp = (unsigned char *)sn;
2859 unsigned char *rp = lp + spp->iused();
2860 unsigned char *kp = 0, *mp = 0;
2861 lky[0] = nky[0] = 0;
2864 while( (kp=lp) < rp ) {
2865 if( r >= 0 ) l = getPageId(lp);
2866 kp = lp; lp += st->dataSz;
2867 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2868 int n = cmpr((char *)ky,(char *)&nky[0]);
2869 if( op <= keyEQ ? n <= 0 : n < 0 ) break;
2870 ustrcpy(&lky[0],&nky[0]);
2875 int status = keyLocate(last, (kp<rp ? l : r), t, op, ky, cmpr, rky);
2876 if( !t && status == errNotFound ) status = 0;
2881 if( op == keyLT || op == keyGE ) {
2882 if( !mp ) Fail(errNotFound);
2884 ustrcpy(rky,&lky[0]);
2887 if( kp >= rp ) Fail(errNotFound);
2888 ustrcpy(rky,&nky[0]);
2891 int k = kp - (unsigned char *)sn;
2892 last.offset = sizeof(keyBlock) + k;
2899 int Db::IndexString::
2900 refLocate(pgRef &loc, int op,void *key,CmprFn cmpr, unsigned char *rkey)
2902 if( st->rootPageId == NIL ) Fail(errNotFound);
2903 if( op == keyEQ ) op = keyLE;
2904 if( !cmpr ) cmpr = cmprStr;
2906 if_fail( keyLocate(loc,st->rootPageId,t,op,(unsigned char*)key,cmpr, rkey) );
2909 ustrcpy(&lastAccKey[0], rkey);
2910 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2914 int Db::IndexString::
2915 Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData)
2917 pgRef last; uint8_t rkey[keysz];
2918 if_fail( refLocate(last, op, key, cmpr, rkey) );
2920 ustrcpy((unsigned char *)rtnKey, rkey);
2923 if_err( db->addrRead_(last,kp) );
2924 memmove(rtnData,kp,st->dataSz);
2930 int Db::IndexString::
2931 Modify(void *key,void *recd)
2933 if_fail( Find(key,0) );
2935 if_err( db->addrWrite_(lastAccess,bp) );
2936 memmove(bp,recd,st->dataSz);
2940 int Db::IndexString::
2941 keyNext(pgRef &loc, unsigned char *rky)
2943 unsigned char lky[keysz]; lky[0] = 0;
2945 keyBlock *sbb; Page *spp; char *sn;
2946 if_err( db->indexRead(s,0,sbb,spp,sn) );
2947 unsigned char *bp = (unsigned char *)sn;
2948 int k = loc.offset - sizeof(keyBlock);
2949 unsigned char *lp = bp;
2950 unsigned char *kp = bp + k;
2951 unsigned char *rp = bp + spp->iused();
2952 if( kp >= rp ) Err(errInvalid);
2953 pageId r = sbb->right_link();
2954 if( &loc != &lastNext ) { // find last key
2955 while( lp <= kp ) { // until past last
2956 if( r >= 0 ) lp += sizeof(pageId);
2957 bp = lp; lp += st->dataSz;
2958 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2962 ustrcpy(&lky[0],&lastNxtKey[0]);
2963 lp = kp; lp += st->dataSz; // verify lastNext
2964 unsigned char *ip = &lky[*lp++];
2965 while( lp<rp && *lp!=0 && *ip==*lp ) { ++ip; ++lp; }
2966 if( *ip || *lp++ ) Fail(errInvalid); // bad lastNxtKey
2968 if( r < 0 && lp < rp ) { // exterior and more data
2970 bp = lp; lp += st->dataSz;
2971 for( int i=*lp++; lp<rp && (rky[i]=*lp) != 0; ++i,++lp );
2972 if( *lp ) Err(errCorrupt);
2973 loc.offset = (bp-(unsigned char *)sn) + sizeof(keyBlock);
2977 if_fail( keyLocate(loc,st->rootPageId,t,keyGT,lky,cmprStr, rky) );
2981 int Db::IndexString::
2982 Next(pgRef &loc,void *rtnKey,void *rtnData)
2984 if( st->rootPageId == NIL ) Fail(errNotFound);
2985 unsigned char rky[keysz];
2986 if_ret( keyNext(loc, rky) );
2988 ustrcpy(&lastAccKey[0], rky);
2990 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2993 if_err( db->addrRead_(loc,kp) );
2994 memmove(rtnData,kp,st->dataSz);
2996 if( &loc == &lastNext )
2997 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2998 lastAccess = lastNext = loc; }
3002 void Db::IndexString::
3008 IndexString(Db *zdb, int zidx, int dsz)
3009 : IndexBase(zdb, idxStr, zidx, 0, dsz)
3011 sst = new(st+1) IndexStringStorage();
3012 dky = new unsigned char[st->dataSz+keysz+1];
3013 tky = new unsigned char[st->dataSz+keysz+1];
3014 tbfr = new unsigned char[3*st->blockSize];
3019 IndexString(Db *zdb, IndexBaseStorage *b, IndexStringStorage *d)
3020 : IndexBase(zdb, *b)
3023 dky = new unsigned char[st->dataSz+keysz+1];
3024 tky = new unsigned char[st->dataSz+keysz+1];
3025 tbfr = new unsigned char[3*st->blockSize];
3030 IndexString(IndexBase *ib, IndexBaseStorage *b, IndexStringStorage *d)
3031 : IndexBase(ib->db, *b)
3033 sst = new(d) IndexStringStorage();
3048 * Db::IndexBase::Clear - clear index
3052 * returns 0 on success,
3053 * error code otherwise
3059 while( st->freeBlocks >= 0 ) {
3060 keyBlock *kbb; pgRef loc;
3061 pageId id = st->freeBlocks;
3062 loc.id = id; loc.offset = 0;
3063 if_err( db->addrWrite_(loc,kbb) );
3064 st->freeBlocks = kbb->right_link();
3067 if( st->rootPageId >= 0 ) {
3068 if_err( keyMap(st->rootPageId, &Db::IndexBase::blockRelease) );
3069 st->rootPageId = st->rightHandSide = NIL;
3078 if( st->rootPageId == NIL ) {
3079 pageId u; keyBlock *ubb; Page *upp; char *un;
3080 if_err( blockAllocate(u,ubb,upp,un) );
3082 ubb->right_link(NIL);
3083 st->rootPageId = st->rightHandSide = u;
3091 pageId u = st->rootPageId;
3092 Page *upp = db->get_page(u);
3093 if( !upp->iused() ) {
3094 if_err( blockFree(u) );
3095 st->rootPageId = st->rightHandSide = NIL;
3100 void Db::IndexBase::
3103 if( lastAccess.id >= 0 && lastAccess.id == lastInsert.id )
3107 lastInsert = lastAccess;
3108 cDelCount = 0; lastDelete.id = NIL;
3109 cFindCount = 0; lastFind.id = NIL;
3110 lastOp = opInsert; lastNext.id = NIL;
3113 void Db::IndexBase::
3116 if( lastAccess.id >= 0 && lastAccess.id == lastDelete.id )
3120 lastDelete = lastAccess;
3121 cInsCount = 0; lastInsert.id = NIL;
3122 cFindCount = 0; lastFind.id = NIL;
3123 lastOp = opDelete; lastNext.id = NIL;
3126 void Db::IndexBase::
3127 chkLastFind(pgRef &last)
3129 if( last.id >= 0 && last.id == lastFind.id )
3140 IndexBaseType(int typ)
3143 memset(&name[0],0,sizeof(name));
3148 defaultBlockSizes[] = {
3150 defaultBinaryBlockSize, // idxBin
3151 defaultStringBlockSize, // idxStr
3155 IndexBaseRecd(int typ, int zidx, int ksz, int dsz)
3161 rightHandSide = NIL;
3163 count = 0; pad1 = 0;
3164 blockSize = defaultBlockSizes[typ];
3167 Db::IndexBaseStorage::
3168 IndexBaseStorage(int typ, int zidx, int ksz, int dsz)
3170 new((IndexTypeInfo *)this) IndexBaseType(typ);
3171 new((IndexRecdInfo *)this) IndexBaseRecd(typ, zidx, ksz, dsz);
3174 void Db::IndexBase::
3178 kdSz = st->keySz + st->dataSz;
3179 lastAccess.id = lastDelete.id = lastInsert.id =
3180 lastFind.id = lastNext.id = NIL;
3181 lastAccess.offset = lastDelete.offset = lastInsert.offset =
3182 lastFind.offset = lastNext.offset = 0;
3183 cFindCount = cDelCount = cInsCount = 0;
3187 IndexBase(Db *zdb, IndexBaseStorage &d)
3194 IndexBase(Db *db, int typ, int zidx, int ksz, int dsz)
3196 st = new(&db->index_info[zidx]) IndexBaseStorage(typ, zidx, ksz, dsz);
3207 /*** int Db::objectHeapInsert(int sz,int pg,int off)
3209 * insert a free space record into the entity heap
3212 * sz int record size
3214 * offset int record offset
3216 * returns zero on success, error code on failure
3220 objectHeapInsert(int sz,int id,int offset)
3222 freeSpaceRecord free;
3223 free.size = sz; free.id = id; free.offset = offset;
3224 if_err( freeSpaceIndex->Insert(&free,0) );
3225 addrSpaceRecord addr;
3226 addr.id = id; addr.offset = offset; addr.size = sz;
3227 if_err( addrSpaceIndex->Insert(&addr,0) );
3231 /*** int Db::objectHeapDelete(int sz,int pg,int off)
3233 * delete a free space record from the entity heap
3236 * sz int record size
3238 * offset int record offset
3240 * returns zero on success, error code on failure
3244 objectHeapDelete(int sz,int id,int offset)
3246 freeSpaceRecord free;
3247 free.size = sz; free.id = id; free.offset = offset;
3248 if_err( freeSpaceIndex->Delete(&free) );
3249 addrSpaceRecord addr;
3250 addr.id = id; addr.offset = offset; addr.size = sz;
3251 if_err( addrSpaceIndex->Delete(&addr) );
3255 /*** int Db::pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3257 * find existing persistent free storage.
3260 * size int input/output requested/allocated size
3261 * loc pgRef & output locator returned for new storage
3262 * cache AllocCache& output allocator cache to operate
3264 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3268 pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3270 freeSpaceRecord look, find;
3271 look.size = size; look.id = 0; look.offset = 0;
3272 int status = freeSpaceIndex->Locate(keyGE, &look, 0, &find, 0);
3273 if( status == errNotFound ) return 1;
3275 // if record is at offset 0, need extra space for prefix
3276 while( !find.offset && look.size+(int)sizeof(pagePrefix) > find.size ) {
3277 status = freeSpaceIndex->Next(&find, 0);
3278 if( status == errNotFound ) return 1;
3281 if_err( objectHeapDelete(find.size,find.id,find.offset) );
3283 loc.offset = find.offset ? find.offset : sizeof(pagePrefix);
3284 Page &pg = *get_page(loc.id);
3285 unsigned int ofs = loc.offset + look.size;
3286 if( ofs > pg->used ) pg->used = ofs;
3287 int sz = find.offset+find.size - ofs;
3288 if( sz >= min_heap_allocation ) {
3289 //if_err( objectHeapInsert(sz,find.id,ofs) );
3290 if_err( cache.Load(this, find.id, ofs, sz) );
3293 size = look.size + sz;
3297 /*** int Db::pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3299 * allocate new persistent free storage.
3302 * size int input/output requested/allocated size
3303 * loc pgRef & output locator returned for new storage
3304 * cache AllocCache& output allocator cache to operate
3306 * returns: zero on success, error code.
3310 pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3312 pageId id = new_page();
3314 unsigned int sz = size + sizeof(pagePrefix);
3315 sz = entityPages(sz) * entityPageSize;
3316 Page &pg = *get_page(id);
3317 if( pg->allocated != sz ) {
3323 loc.offset = sizeof(pagePrefix);
3324 int used = loc.offset + size;
3325 int free = pg->allocated - used;
3326 if( free >= min_heap_allocation ) {
3327 //if_err( objectHeapInsert(free,id,used) );
3328 if_err( cache.Load(this, id, used, free) );
3331 size = pg->allocated - loc.offset;
3335 /*** int Db::pgRefAllocate(int &size, pgRef &loc)
3337 * find persistent free storage. existing/new
3338 * does not allocate virtual memory storage
3341 * size int input/output requested/allocated size
3342 * loc pgRef & output locator returned for new storage
3343 * cache AllocCache& output allocator cache to operate
3345 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3349 pgRefAllocate(int &size, pgRef &loc, AllocCache &cache)
3351 int status = pgRefGet(size, loc, cache);
3354 if_err( pgRefNew(size, loc, cache) );
3358 /*** int Db::objectAllocate(int typ, int &size, pgRef &loc)
3360 * find persistent free storage. existing/new
3361 * does allocate virtual memory storage
3362 * inits pagePrefix, inits allocPrefix
3365 * type int input entity type id
3366 * size int input/output requested/allocated size
3367 * loc pgRef & output locator returned for new storage
3368 * cache AllocCache& output allocator cache to operate
3370 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3374 objectAllocate(int typ, int &size, pgRef &loc, AllocCache &cache)
3376 int status = cache.Get(this, size, loc);
3379 if_err( pgRefAllocate(size, loc, cache) );
3380 Page &pg = *get_page(loc.id);
3382 pagePrefix *bpp; pgRef ref;
3383 ref.id = loc.id; ref.offset = 0;
3384 if_err( addrWrite_(ref,bpp) );
3385 bpp->magic = page_magic;
3386 bpp->type = pg_entity + typ;
3387 pg->type = bpp->type;
3389 unsigned int sz = loc.offset + size;
3396 /*** int Db::objectFree(pgRef &loc)
3398 * Purpose: Immediate release of entity memory
3401 * loc pgRef & Page/offset
3403 * returns zero on success, error code on failure
3406 int Db::objectFree(pgRef &loc)
3409 if_err( addrRead_(loc,mp) );
3410 if( mp->magic != entity_magic ) Err(errBadMagic);
3411 addrSpaceRecord addr, prev, next;
3412 /* get prev, next addrSpace free heap items near this addr */
3413 addr.size = mp->size;
3415 addr.offset = loc.offset;
3416 int status = addrSpaceIndex->Locate(keyLT,&addr,0,&prev,0);
3417 if( status == errNotFound ) {
3419 status = addrSpaceIndex->Locate(keyGT,&addr,0,&next,0);
3422 status = addrSpaceIndex->Next(&next,0);
3423 if( status == errNotFound ) {
3428 /* merge with prev if possible */
3429 if( prev.id == addr.id &&
3430 prev.offset + prev.size == addr.offset ) {
3431 if_err( objectHeapDelete(prev.size,prev.id,prev.offset) );
3432 addr.offset = prev.offset;
3433 addr.size += prev.size;
3435 /* merge with next if possible */
3436 if( addr.id == next.id &&
3437 addr.offset + addr.size == next.offset ) {
3438 if_err( objectHeapDelete(next.size,next.id,next.offset) );
3439 addr.size += next.size;
3441 /* reduce used block bytes if possible */
3442 Page &pg = *get_page(loc.id);
3443 if( pg->used <= addr.offset + addr.size )
3444 pg->used = addr.offset;
3445 // if only page prefix, release page, otherwise save new fragment
3446 if( pg->used == sizeof(pagePrefix) )
3449 if_err( objectHeapInsert(addr.size,addr.id,addr.offset) );
3453 /*** int Db::blockAllocate(pageId *pid, keyBlock *&pkbb)
3455 * allocate persistent storage.
3458 * pageId & pid page allocated
3459 * keyBlock *& pkbb keyblock* pointer
3463 blockAllocate(pageId &pid, keyBlock *&pkbb)
3465 locked by(db->db_info->blkAlLk);
3466 pageId id; Page *kpp;
3467 pgRef loc; keyBlock *kbb;
3468 if( st->freeBlocks != NIL ) {
3470 id = st->freeBlocks;
3471 loc.id = id; loc.offset = 0;
3472 if_err( db->addrWrite_(loc,kbb) );
3473 st->freeBlocks = kbb->right_link();
3474 Page &kpg = *db->get_page(id);
3475 if( kpg->allocated != st->blockSize ) {
3476 db->pageDealloc(kpg);
3477 kpg->allocated = st->blockSize;
3478 if_err( db->addrWrite_(loc, kbb) );
3483 id = db->new_page();
3485 loc.id = id; loc.offset = 0;
3486 Page &kpg = *db->get_page(id);
3487 kpg->allocated = st->blockSize;
3488 if_err( db->addrWrite_(loc, kbb) );
3491 kbb->magic = page_magic;
3493 kbb->type = pg_index + st->idx;
3496 kpg->type = kbb->type;
3502 /*** int Db::blockFree(pageId pid)
3504 * Purpose: delayed release database memory
3507 * pid int input Page
3509 * returns zero on success, error code on failure
3513 blockFree(pageId pid)
3515 locked by(db->db_info->blkAlLk);
3517 pageId id = st->freeBlocks; pgRef loc;
3518 loc.id = NIL; loc.offset = 0;
3521 while( id >= 0 && id < pid ) {
3523 if_err( db->addrRead_(loc,kbb) );
3524 id = kbb->right_link();
3528 if_err( db->addrWrite_(loc,kbb) );
3529 kbb->right_link(pid);
3532 st->freeBlocks = pid;
3534 if_err( db->addrWrite_(loc,kbb) );
3535 kbb->right_link(id);
3536 Page *kpp = db->get_page(pid);
3542 blockRelease(pageId pid)
3544 Page *pp = db->get_page(pid);
3545 return pp->release();
3548 /*** int Db::deleteFreeBlock()
3550 * Purpose: release database memory/storage
3552 * returns zero on success, error code on failure
3559 while( (id=st->freeBlocks) != NIL ) {
3560 keyBlock *kbb; pgRef loc;
3561 loc.id = id; loc.offset = 0;
3562 if_err( db->addrRead_(loc,kbb) );
3563 st->freeBlocks = kbb->right_link();
3564 Page &pg = *db->get_Page(id);
3565 if_err( db->storeFree(pg->wr_allocated, pg->wr_io_addr) );
3566 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
3567 if_err( db->storeFree(pg->io_allocated, pg->io_addr) );
3568 pg->io_allocated = 0; pg->io_addr = NIL;
3574 /*** int Db::storeInsert(int sz, ioAddr io_addr)
3576 * insert a storage record into the storage heap
3580 * io_addr ioAddr blob addr
3582 * returns zero on success, error code on failure
3586 storeInsert(long sz, ioAddr io_addr)
3588 //dmsg(-1, "storeInsert %08lx/%012lx\n",sz,io_addr);
3589 freeStoreRecord free;
3590 free.size = sz; free.io_addr = io_addr;
3591 if_err( freeStoreIndex->Insert(&free,0) );
3593 addrStoreRecord addr;
3594 addr.io_addr = io_addr; addr.size = sz;
3595 if_err( addrStoreIndex->Insert(&addr,0) );
3600 /*** int Db::storeDelete(int sz, ioAddr io_addr)
3602 * delete a storage record from the storage heap
3606 * io_addr ioAddr blob addr
3608 * returns zero on success, error code on failure
3612 storeDelete(long sz, ioAddr io_addr)
3614 //dmsg(-1, "storeDelete %08lx/%012lx\n",sz,io_addr);
3615 freeStoreRecord free;
3616 free.size = sz; free.io_addr = io_addr;
3617 if_err( freeStoreIndex->Delete(&free) );
3619 addrStoreRecord addr;
3620 addr.io_addr = io_addr; addr.size = sz;
3621 if_err( addrStoreIndex->Delete(&addr) );
3626 /*** int Db::storeGet(int &size, ioAddr &io_addr)
3628 * allocate storage record from the storage heap
3630 * size int input/output requested/allocated blob size
3631 * io_addr ioAddr output blob addr
3633 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3637 storeGet(int &size, ioAddr &io_addr)
3639 freeStoreRecord look, find;
3640 look.size = size; look.io_addr = 0;
3641 int status = freeStoreIndex->Locate(keyGE, &look,0, &find,0);
3642 if( status == errNotFound ) return 1;
3644 if_err( storeDelete(find.size,find.io_addr) );
3645 io_addr = find.io_addr;
3646 long sz = find.size - look.size;
3647 if( sz >= min_heap_allocation ) {
3648 /* put back extra */
3649 find.size = sz; find.io_addr += look.size;
3650 if_err( storeInsert(find.size,find.io_addr) );
3653 look.size = find.size;
3658 /*** int Db::storeNew(int &size, ioAddr &io_addr)
3660 * allocate storage by extending file
3662 * size int input/output requested/allocated blob size
3663 * io_addr ioAddr output blob addr
3665 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3669 storeNew(int &size, ioAddr &io_addr)
3671 if_err( seek_data(root_info->file_size) );
3672 io_addr = root_info->file_size;
3673 size = storeBlocks(size) * storeBlockSize;
3674 /* make sure file storage exists */
3675 if_err( write_padding(root_info->file_size + size) );
3679 /*** int Db::storeAllocate(int &size, ioAddr &io_addr)
3681 * find persistent free storage. existing/new
3682 * does not allocate virtual memory storage
3685 * size int input/output requested/allocated size
3686 * io_addr ioAddr & output returned storage address
3688 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3692 storeAllocate(int &size, ioAddr &io_addr)
3694 int status = storeGet(size, io_addr);
3696 status = storeNew(size, io_addr);
3701 /*** int Db::storeFree(int size, ioAddr io_addr)
3703 * Immediate release of store heap
3705 * size int input blob size
3706 * io_addr ioAddr input blob addr
3708 * returns zero on success, error code on failure
3712 storeFree(int size, ioAddr io_addr)
3714 if( io_addr < 0 || size == 0 ) return 0;
3715 /* get prev, next addrStore heap items near this io_addr */
3716 addrStoreRecord addr, prev, next;
3717 addr.io_addr = io_addr; addr.size = size;
3718 int status = addrStoreIndex->Locate(keyLT,&addr,0, &prev,0);
3719 if( status == errNotFound ) {
3721 status = addrStoreIndex->Locate(keyGT,&addr,0, &next,0);
3724 status = addrStoreIndex->Next(&next,0);
3725 if( status == errNotFound ) {
3730 /* merge with prev if possible */
3731 if( prev.io_addr >= 0 && prev.io_addr + prev.size == addr.io_addr ) {
3732 if_err( storeDelete(prev.size,prev.io_addr) );
3733 addr.io_addr = prev.io_addr;
3734 addr.size += prev.size;
3736 /* merge with next if possible */
3737 if( next.io_addr >= 0 && addr.io_addr + addr.size == next.io_addr ) {
3738 if_err( storeDelete(next.size,next.io_addr) );
3739 addr.size += next.size;
3742 return storeInsert(addr.size,addr.io_addr);
3745 /*** int Db::pageLoad(pageId id)
3750 * id pageId pageTable element to load
3755 pageLoad(pageId id, Page &pg)
3758 uint8_t *bp = (uint8_t*)pg.addr;
3759 int rd = pg->chk_flags(fl_rd);
3760 int shmid = pg->shmid, used = pg->used;
3762 if( no_shm || shmid < 0 ) {
3763 bp = new_uint8_t(pg->allocated, shmid, id);
3764 if( used ) rd = fl_rd;
3767 bp = get_uint8_t(shmid, id);
3770 if( !bp ) Err(errNoMemory);
3772 if( likely(rd && used > 0) ) {
3773 if_err( pageRead(pg->io_addr, bp, used) );
3775 pg.addr = (pagePrefix*)bp;
3777 pg->clr_flags(fl_rd);
3781 /*** int Db::pageRead(ioAddr io_adr, uint8_t *bp, int len)
3783 * read data from the database file
3786 * pg Page input Page to read
3791 pageRead(ioAddr io_adr, uint8_t *bp, int len)
3793 //locked by(db_info->pgLdLk);
3794 //if_err( seek_data(io_adr) );
3795 //if_err( read_data((char*)bp, len) );
3796 long sz = pread(fd, bp, len, io_adr);
3797 if( len != sz ) Err(errIoRead);
3798 file_position = io_adr+len;
3799 pagePrefix *bpp = (pagePrefix *)bp;
3800 if( bpp->magic != page_magic ) Err(errBadMagic);
3804 /*** int Db::pageWrite(Page *pp)
3806 * writes data to the database file
3809 * pg Page input Page to write
3816 pagePrefix *bpp = pg.addr;
3817 // when io is block aligned and not disk block sized, but
3818 // the allocated areas exceed disk block size, increase
3819 // write to whole blocks to prevent read/modify/write.
3820 int len = bpp->used = pg->used;
3821 unsigned int blen = (len+0xfff) & ~0xfff;
3822 if( blen > pg->allocated ) blen = pg->allocated;
3823 if_err( seek_data(pg->wr_io_addr) );
3824 if_err( write_data((char *)bpp, blen) );
3825 pg->trans_id = active_transaction;
3829 // deallocate page data buffer
3830 // mode<0: delete without shm deallocate
3831 // mode=0: delete and deallocate written page
3832 // mode>0: delete and deallocate page (default)
3835 pageDealloc(Page &pg, int mode)
3837 uint8_t *bp = (uint8_t *)pg.addr;
3838 //int pg=page_table_sz; while( --pg>=0 && pp!=pageTable[pg] );
3839 if( del_uint8_t(bp, pg.shm_id /*, pg*/) <= 1 ||
3840 mode > 0 || (!mode && pg->chk_flags(fl_wr)) )
3846 int Db::AllocCache::
3850 if_ret( db->objectHeapInsert(avail,loc.id,loc.offset) );
3856 int Db::AllocCache::
3857 Get(Db *db,int &size, pgRef &ref)
3860 int isz = avail - size;
3862 unsigned int sz = isz;
3863 if( sz < min_heap_allocation ) {
3866 ref = loc; avail = sz;
3867 if( sz == 0 ) loc.id = NIL;
3868 sz = (loc.offset += size);
3869 Page &pg = *db->get_page(ref.id);
3870 if( pg->used < sz ) pg->used = sz;
3873 if_ret( cacheFlush(db) );
3878 int Db::AllocCache::
3879 Load(Db *db, pageId id, int ofs, int sz)
3883 if_ret( db->objectHeapInsert(sz,id,ofs) );
3892 int Db::cache_all_flush()
3894 if_err( cacheFlush() );
3895 Entity e(this); EntityLoc &ent = e.ent; int ret;
3896 if( !(ret=entityIdIndex->First(0,&ent.obj)) ) do {
3897 if_err( ent.cacheFlush() );
3898 } while( !(ret=entityIdIndex->Next(0,&ent.obj)) );
3899 if( ret == errNotFound ) ret = 0;
3905 allocate(int typ, int size, pgRef &loc, AllocCache &cache)
3907 locked by(db_info->objAlLk);
3909 int sz = size + sizeof(allocPrefix);
3910 // granularity is sizeof pagePrefix
3911 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3912 if_err( objectAllocate(typ, sz, loc, cache) );
3914 if_err( addrWrite_(loc,mp) );
3915 mp->magic = entity_magic;
3922 deallocate(pgRef &loc, AllocCache &cache)
3924 locked by(db_info->objAlLk);
3925 cache.cacheFlush(this);
3926 if( loc.id < 0 ) return 0;
3927 if_fail( objectFree(loc) );
3928 loc.id = NIL; loc.offset = 0;
3933 reallocate(int size, pgRef &loc, AllocCache &cache)
3935 int sz = size + sizeof(allocPrefix);
3936 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3937 int typ = 0, msz = 0;
3940 if_err( addrRead_(loc,mp) );
3946 ref.id = NIL; ref.offset = 0;
3948 if_err( allocate(typ, size, ref, cache) );
3949 if( (msz-=sizeof(allocPrefix)) > 0 ) {
3950 char *bp = 0, *cp = 0;
3951 if_err( addrRead(loc,bp) );
3952 if_err( addrWrite(ref,cp) );
3953 if( msz > size ) msz = size;
3957 if_err( deallocate(loc, cache) );
3965 int Db::pack::aligned = 0;
3967 void Db::pack::put(uint64_t v, int n)
3969 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
3972 int k = 8-n-(idx&7), m = (1<<n)-1;
3973 bits[i] = (bits[i] & ~(m<<k)) | (v<<k);
3977 case 8: *((uint8_t *)&bits[i]) = v; break;
3978 case 16: *((uint16_t*)&bits[i]) = htobe16(v); break;
3979 case 32: *((uint32_t*)&bits[i]) = htobe32(v); break;
3980 case 64: *((uint64_t*)&bits[i]) = htobe64(v); break;
3987 if( !aligned && n <= 32 ) {
3988 int i = idx/32, k = 64-n-idx%32;
3989 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
3990 *bp = htobe64(((be64toh(*bp) & ~(m<<k)) | (v<<k)));
3996 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
3997 uint64_t *bp = (uint64_t*)&bits[8*i];
3998 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1; v &= m;
3999 b = l>=0 ? (b & ~(m<<l)) | (v<<l) : (b & ~(m>>-l)) | (v>>-l);
4007 uint64_t Db::pack::get(int n)
4010 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
4013 int k = 8-n-(idx&7), m = (1<<n)-1;
4014 v = (bits[i]>>k) & m;
4018 case 8: v = *((uint8_t *)&bits[i]); break;
4019 case 16: v = be16toh(*(uint16_t*)&bits[i]); break;
4020 case 32: v = be32toh(*(uint32_t*)&bits[i]); break;
4021 case 64: v = be64toh(*(uint64_t*)&bits[i]); break;
4028 if( !aligned && n <= 32 ) {
4029 int i = idx/32, k = 64-n-idx%32;
4030 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
4031 v = (be64toh(*bp) >> k) & m;
4037 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
4038 uint64_t *bp = (uint64_t*)&bits[8*i];
4039 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1;
4040 uint64_t vv = l>=0 ? (b>>l) & m : b & (m>>-l);
4049 int64_t Db::mediaKey::bit_size(int len, int w)
4052 for( int i=0, m=len; m>1; ++i ) {
4054 int b = m * (i+w+1);
4055 b = (b+pack::alignBits-1) & ~(pack::alignBits-1);
4061 int64_t Db::mediaKey::byte_size(int len, int w)
4063 int64_t sz = bit_size(len, w);
4064 sz = (sz+(8*sizeof(uint64_t)-1)) & ~(8*sizeof(uint64_t)-1);
4065 return sz/8 + 2*sizeof(int)+sizeof(int64_t);
4068 int64_t Db::mediaKey::count1(uint8_t *kp)
4071 int w = pk.iget(), len = pk.iget();
4072 pk.seek(pk.pos()+sizeof(int64_t)*8);
4073 int k = high_bit_no(len-1);
4075 int64_t z = (uint64_t)1<<sz++;
4076 return pk.get(sz) - z;
4080 mediaKey(uint8_t *kp, uint8_t *bp, int w, int len)
4083 pk.iput(this->w = w);
4084 pk.iput(this->len = len);
4088 pk.lput(this->cnt = pb.get(w));
4092 // allocate memory, every level length m requires (m+1)/2 differences
4093 // need an extra one when length is power of 2
4094 int k = high_bit_no(len-1) + 1;
4095 struct { int64_t cnt, sz, *wgt; } lt[k+1], rt[k+1];
4096 for( int i=0, m=len; i<=k; ++i ) {
4098 lt[i].wgt = new int64_t[m];
4099 rt[i].wgt = new int64_t[m];
4100 lt[i].sz = rt[i].sz = 0;
4101 lt[i].cnt = rt[i].cnt = 0;
4105 for( int i=0,l=0; ++i<=len; l=i ) {
4107 int m = i&1 ? 0 : high_bit_no(i ^ l); // highest flipped bit
4108 rt[m].cnt = n; // 0->1, begin right
4109 for( int j=0; j<m; ++j ) {
4110 rt[j].wgt[rt[j].sz++] = n-rt[j].cnt;// 1->0, end right
4111 lt[j].cnt = n; // 1->0, begin left
4113 lt[m].wgt[lt[m].sz++] = n-lt[m].cnt; // 0->1, end left
4116 for( int i=0, m=len; m>1; ++i ) {
4118 if( lt[i].sz < m ) { // finish left
4119 lt[i].wgt[lt[i].sz++] = n-lt[i].cnt;
4122 if( lt[i].sz != rt[i].sz ) // finish right
4123 rt[i].wgt[rt[i].sz++] = n-rt[i].cnt;
4128 for( int i=k; --i>=0; ) {
4129 n = (uint64_t)1<<(i+w); // offset for signed difference
4131 for( int j=0; j<sz; ++j ) {
4132 uint64_t v = (lt[i].wgt[j]-rt[i].wgt[j]) + n;
4133 pk.put(v, i+w+1); // output pair differences
4135 if( (n=pk.pos()%pack::alignBits) != 0 ) // align next level
4136 pk.put(0, pack::alignBits-n);
4139 for( int i=0; i<=k; ++i ) {
4140 delete [] lt[i].wgt;
4141 delete [] rt[i].wgt;
4152 mediaLoad(uint8_t *bp)
4155 w = pb.iget(); len = pb.iget(); cnt = pb.lget();
4157 psz = high_bit_no(len-1)+1;
4158 if( psz < 1 ) psz = 1;
4167 void Db::mediaLoad::
4168 get_fields(int n, int k)
4172 dp[k] = dat; dat += m;
4175 int64_t *ap = dp[k];
4177 int64_t z = (uint64_t)1<<sz++;
4179 int64_t *avp = dp[k-1];
4180 for( int j=n/2; --j>=0; ) {
4181 int64_t av = pb.get(sz)-z, a = *ap++;
4182 *avp++ = (a+av)/2; *avp++ = (a-av)/2;
4185 int64_t av = pb.get(sz)-z, a = *ap;
4190 int64_t *ap = dp[0];
4191 for( int j=n/2; --j>=0; ) {
4192 int64_t av = pb.get(sz)-z, a = *ap++;
4193 pk.putc((a+av)/2, w); pk.putc((a-av)/2, w);
4196 int64_t av = pb.get(sz)-z, a = *ap;
4197 pk.putc((a+av)/2, w);
4203 void Db::mediaLoad::
4206 pb.seek(spos); pk.init(kp);
4207 pk.set_max((1<<w)-1);
4208 int64_t d[dsz]; dat = d;
4209 int64_t *p[psz]; dp = p;
4211 for( int i=psz-1; --i>=0; ) dp[i] = 0;
4220 mediaCmpr(uint8_t *bp)
4222 this->err = 0; this->lmt = ~0;
4223 pb.init(bp); w = pb.iget(); len = pb.iget();
4225 psz = high_bit_no(len-1)+1;
4226 if( psz < 1 ) psz = 1;
4235 uint64_t Db::mediaCmpr::
4236 chk_fields(int n, int k)
4240 adp[k] = adat; adat += m;
4241 bdp[k] = bdat; bdat += m;
4242 if( chk_fields(m, k+1) > lmt ) return err;
4244 int64_t *ap = adp[k], *bp = bdp[k];
4245 //uint64_t serr = 0;
4246 err = 0; int sz = k+w;
4247 int64_t z = (uint64_t)1<<sz++;
4249 int64_t *avp = adp[k-1], *bvp = bdp[k-1];
4250 for( int j=n/2; --j>=0; ) {
4251 int64_t a = *ap++, b = *bp++;
4252 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4253 int64_t alt = (a+av)/2, art = (a-av)/2;
4254 int64_t blt = (b+bv)/2, brt = (b-bv)/2;
4255 //serr += sqr(alt-blt) + sqr(art-brt);
4256 *avp++ = alt; *avp++ = art;
4257 *bvp++ = blt; *bvp++ = brt;
4258 err += labs(alt-blt) + labs(art-brt);
4261 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4262 int64_t a = *ap, b = *bp;
4263 int64_t alt = (a+av)/2, blt = (b+bv)/2;
4264 //serr += sqr(alt-blt);
4265 *avp++ = alt; *bvp++ = blt;
4266 err += labs(alt-blt);
4270 int64_t *ap = adp[0], *bp = bdp[0];
4271 for( int j=n/2; --j>=0; ) {
4272 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4273 int64_t a = *ap++, b = *bp++;
4274 int64_t v = a-b, dv = av-bv;
4275 //serr += sqr(v) + sqr(dv);
4276 err += labs(v+dv)/2 + labs(v-dv)/2;
4280 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4281 int64_t a = *ap, b = *bp;
4282 int64_t v = a-b, dv = av-bv, d = v + dv;
4287 pb.align(); pk.align();
4288 //printf("n=%d,k=%d,lerr=%lu,err=%lu\n",n,k,lerr,(int64_t)sqrt((double)serr));
4292 uint64_t Db::mediaCmpr::
4293 chk(uint8_t *kp, uint64_t lmt)
4295 pb.seek(spos); pk.init(kp); err = 0;
4296 if( pk.iget() != w || len != pk.iget() ) return ~0;
4297 acnt = pb.lget(); bcnt = pk.lget();
4298 //if( (err=sqr(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4299 if( (err=labs(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4300 int64_t a[dsz], b[dsz]; adat = a; bdat = b;
4301 int64_t *ap[psz], *bp[psz]; adp = ap; bdp = bp;
4302 adp[psz-1] = &acnt; bdp[psz-1] = &bcnt;
4303 for( int i=psz-1; --i>=0; ) adp[i] = bdp[i] = 0;
4304 return len > 1 ? chk_fields(len, 0) : err;
4308 cmpr(uint8_t *kp, uint64_t lmt)
4310 pb.seek(spos); pk.init(kp);
4311 if( pk.iget() != w || len != pk.iget() ) return ~0;
4312 acnt = pb.lget(); bcnt = pk.lget();
4313 if( acnt < bcnt ) return -1;
4314 if( acnt > bcnt ) return 1;
4315 if( len == 1 ) return 0;
4316 int bsz = Db::mediaKey::bit_size(len, w);
4317 int i = bsz / (8*sizeof(uint64_t));
4318 uint64_t *ap = (uint64_t *)pb.addr(), *bp = (uint64_t *)pk.addr();
4320 if( *ap != *bp ) return be64toh(*ap)<be64toh(*bp) ? -1 : 1;
4323 if( (i=bsz % (8*sizeof(uint64_t))) > 0 ) {
4324 int l = (8*sizeof(uint64_t)) - i;
4325 uint64_t av = be64toh(*ap)>>l, bv = be64toh(*bp)>>l;
4326 if( av != bv ) return av<bv ? -1 : 1;
4335 start_transaction(int undo_save)
4337 //traceback(" start_transaction %d\n",my_pid);
4339 // activate written pages
4340 for( id=0; id<root_info->pageTableUsed; ++id ) {
4341 Page &pg = *get_Page(id);
4342 if( !pg.addr && pg->used ) pg->set_flags(fl_rd);
4343 if( !pg->chk_flags(fl_new) ) continue;
4344 int io_allocated = pg->io_allocated;
4345 pg->io_allocated = pg->wr_allocated;
4346 pg->wr_allocated = io_allocated;
4347 ioAddr io_addr = pg->io_addr;
4348 pg->io_addr = pg->wr_io_addr;
4349 pg->wr_io_addr = io_addr;
4351 // release inactivate page storage
4352 for( id=0; id<root_info->pageTableUsed; ++id ) {
4353 Page &pg = *get_Page(id);
4354 if( !pg->chk_flags(fl_new) ) continue;
4355 pg->clr_flags(fl_new);
4356 if_err( storeFree(pg->wr_allocated, pg->wr_io_addr) );
4357 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
4358 if( pg->used ) continue;
4359 if_err( storeFree(pg->io_allocated, pg->io_addr) );
4360 pg->io_allocated = 0; pg->io_addr = NIL;
4365 addrStoreRecord addr;
4366 int status = addrStoreIndex->Last(&addr,0);
4368 if( addr.io_addr+addr.size == root_info->file_size ) {
4369 if_err( storeDelete(addr.size, addr.io_addr) );
4370 ioAddr fsz = storeBlocks(addr.io_addr) * storeBlockSize;
4371 if( root_info->file_size > fsz ) {
4372 status = ftruncate(fd, fsz);
4373 if( status ) Err(errIoWrite);
4374 addr.size = fsz - addr.io_addr;
4376 if_err( storeInsert(addr.size, addr.io_addr) );
4377 root_info->file_size = fsz;
4382 if( status != errNotFound ) Err(status);
4384 for( int idx=0; idx<root_info->indeciesUsed; ++idx )
4385 if( indecies[idx] ) indecies[idx]->deleteFreeBlocks();
4387 // move root pages lower if possible
4388 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4389 IndexBase *bip = indecies[idx];
4390 if( !bip ) continue;
4391 pageId r = bip->st->rootPageId;
4392 if( r < 0 ) continue;
4393 if( r != bip->st->rightHandSide ) continue;
4394 bip->st->rootPageId = bip->st->rightHandSide = lower_page(r);
4397 // truncate pageTable
4398 for( id=root_info->pageTableUsed; id>0; --id ) {
4399 Page &pg = *get_Page(id-1);
4400 if( pg->used ) break;
4402 page_table_sz = root_info->pageTableUsed = id;
4404 // remove released pages from free list
4406 id = root_info->freePages;
4408 Page &pg = *get_Page(id);
4409 pageId next_id = pg->link;
4410 if( id < root_info->pageTableUsed ) {
4411 if( prev ) prev->st->link = id;
4412 else root_info->freePages = id;
4419 if( prev ) prev->st->link = NIL;
4420 else root_info->freePages = NIL;
4422 if( pageTableHunks(root_info->pageTableUsed)*pageTableHunkSize < pageTableAllocated )
4423 alloc_pageTable(root_info->pageTableUsed);
4425 if( undo_save ) undata.save(this);
4426 active_transaction = ++root_info->transaction_id;
4432 #define bfr_sz 0x40000
4437 bfr = inp = new char[bfr_sz];
4438 memset(bfr, 0, bfr_sz);
4446 int ret = flush_bfr();
4448 bfr = inp = lmt = 0;
4457 return n > 0 ? write_data(bfr, n) : 0;
4461 write_bfr(char *dp, int sz)
4465 if( n > sz ) n = sz;
4467 if( (inp+=n) >= lmt && flush_bfr() < 0 )
4479 //traceback(" commit %d\n",my_pid);
4480 int v = attach_wr();
4481 int ret = icommit(force);
4482 if( !ret && force < 0 ) ret = icommit(1);
4484 if( v ) detach_rw();
4492 //traceback(" icommit %d\n",my_pid);
4499 // check for written pages
4500 for( id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
4501 Page &pg = *get_Page(id);
4502 if( pg->chk_flags(fl_wr) ) break;
4504 // if no pages are written
4505 if( n < 0 ) return 0;
4509 // release last root info, allows storage to move down
4510 if_err( storeFree(root_info->last_info_size, root_info->last_info_addr) );
4511 root_info->last_info_addr = NIL; root_info->last_info_size = 0;
4514 ioAddr ri_addr = root_info->last_info_addr;
4515 int ri_size = root_info->last_info_size;
4516 int k = root_info_extra_pages;
4519 // allocate new storage
4522 for( id=0; id<root_info->pageTableUsed; ++id ) {
4523 Page &pg = *get_Page(id);
4524 if( !pg->chk_flags(fl_wr) ) continue;
4525 if( pg->chk_flags(fl_new) ) continue;
4526 pg->set_flags(fl_new);
4528 pg->wr_allocated = pg->allocated;
4529 if_err( storeAllocate(pg->wr_allocated, pg->wr_io_addr) );
4534 // compute rootInfo size
4535 int rsz = writeRootInfo(&Db::size_data);
4536 int rsz0 = entityPages(rsz) * entityPageSize;
4537 int rsz1 = rsz0 + k * entityPageSize;
4538 // retry while no storage, too little, or too big
4539 if( !(ri_addr < 0 || ri_size < rsz0 || ri_size > rsz1) ) break;
4540 // delete/allocate can change pageTable;
4541 if_err( storeFree(ri_size, ri_addr) );
4542 ri_addr = NIL; ri_size = 0;
4543 rsz1 = rsz0 + k/2 * entityPageSize;
4544 if_err( storeAllocate(rsz1, ri_addr) );
4545 ri_size = rsz1; k += 2;
4548 // delete free index pages
4549 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4550 IndexBase *ib = indecies[idx];
4552 while( (id=ib->st->freeBlocks) != NIL ) {
4553 keyBlock *kbb; pgRef loc;
4554 loc.id = id; loc.offset = 0;
4555 if_err( addrWrite_(loc,kbb) );
4556 ib->st->freeBlocks = kbb->right_link();
4557 Page &pg = *get_Page(id);
4558 pg->used = 0; pg->set_flags(fl_new);
4562 root_info->last_info_addr = root_info->root_info_addr;
4563 root_info->last_info_size = root_info->root_info_size;
4564 root_info->root_info_addr = ri_addr;
4565 root_info->root_info_size = ri_size;
4567 int npages = root_info->pageTableUsed;
4568 pageId *pages = new pageId[npages];
4569 for( int i=0 ; i<npages; ++i ) pages[i] = i;
4570 qsort_r(pages, npages, sizeof(*pages), ioCmpr, this);
4572 // write page storage
4573 for( id=0; id<npages; ++id ) {
4574 Page &pg = *get_Page(pages[id]);
4575 if( pg->chk_flags(fl_wr) ) {
4576 pg->clr_flags(fl_wr);
4578 if_err( pageWrite(pg) );
4582 pg->set_flags(fl_rd);
4588 // write rootInfo storage
4589 if_err( seek_data(ri_addr) );
4591 if_err( open_bfr() );
4592 if_err( writeRootInfo(&Db::write_bfr) );
4593 if_err( close_bfr() );
4595 if_err( writeRootInfo(&Db::write_data) );
4597 if_err( write_zeros(ri_addr+ri_size) );
4599 // update rootInfo pointer
4600 if_err( seek_data(root_info_offset) );
4601 if_err( write_data((char*)&ri_addr,sizeof(ri_addr)) );
4603 // commit is finished
4604 return start_transaction();
4610 // evict unwritten page storage
4611 for( int id=0; id<root_info->pageTableUsed; ++id ) {
4612 Page &pg = *get_page(id);
4613 if( !pg.addr ) continue;
4614 if( pg->chk_flags(fl_wr) ) continue;
4616 pg->set_flags(fl_rd);
4622 seek_data(ioAddr io_addr)
4624 if( io_addr < 0 ) Err(errIoSeek);
4625 if( file_position != io_addr ) {
4626 long offset = lseek(fd,io_addr,SEEK_SET);
4627 if( offset != io_addr ) Err(errIoSeek);
4628 file_position = io_addr;
4634 size_data(char *dp, int sz)
4636 return sz > 0 ? sz : 0;
4640 read_data(char *dp, int sz)
4643 long len = read(fd,dp,sz);
4644 if( len != sz ) Err(errIoRead);
4645 file_position += sz;
4651 write_data(char *dp, int sz)
4654 long len = write(fd,dp,sz);
4655 if( len != sz ) Err(errIoWrite);
4656 file_position += sz;
4657 if( file_position > root_info->file_size )
4658 root_info->file_size = file_position;
4664 write_zeros(ioAddr io_addr)
4666 ioAddr len = io_addr - file_position;
4667 if( len < 0 ) Err(errIoWrite);
4668 int sz = len > entityPageSize ? entityPageSize : len;
4669 char bfr[sz]; memset(&bfr[0],0,sz);
4671 sz = len > entityPageSize ? entityPageSize : len;
4672 if_err( write_data(&bfr[0], sz) );
4673 len = io_addr - file_position;
4679 write_padding(ioAddr io_addr)
4681 if( root_info->file_size > io_addr ) Err(errIoWrite);
4683 if_err( write_zeros(io_addr) );
4685 int status = ftruncate(fd, io_addr);
4686 if( status ) Err(errIoSeek);
4687 root_info->file_size = io_addr;
4692 #define Read(n) do { \
4693 if( (count+=sizeof(n)) > root_info->root_info_size ) Err(errCorrupt); \
4694 if_err( (this->*fn)((char*)&(n),sizeof(n)) ); \
4698 readRootInfo(int(Db::*fn)(char *dp,int sz))
4704 root_info->root_info_size = sizeof(RootInfo);
4706 if( root_info->root_magic != root_magic ) Err(errBadMagic);
4709 sz = indeciesHunks(root_info->indeciesUsed) * indexTableHunkSize;
4710 indecies = new IndexBase*[sz];
4711 if( !indecies ) Err(errNoMemory);
4712 index_info = (IndexInfo *)new_uint8_t(sz*sizeof(IndexInfo),id);
4713 if( !index_info ) Err(errNoMemory);
4714 db_info->index_info_id = index_info_id = id;
4715 indeciesAllocated = sz;
4717 indecies_sz = root_info->indeciesUsed;
4718 for( int idx=0; idx<indecies_sz; ++idx ) {
4719 IndexBaseInfo *b = &index_info[idx].base;
4720 Read(*(IndexTypeInfo*)b);
4721 if( b->magic != idx_magic ) Err(errBadMagic);
4722 if( b->type != idxNil )
4723 Read(*(IndexRecdInfo*)b);
4726 IndexBinaryInfo *bi = &index_info[idx].bin;
4728 if_err( new_index(indecies[idx], b, bi) );
4731 IndexStringInfo *si = &index_info[idx].str;
4733 if_err( new_index(indecies[idx], b, si) );
4744 page_table_sz = root_info->pageTableUsed;
4745 sz = pageTableHunks(page_table_sz) * pageTableHunkSize;
4746 pageTable = (Page **)new Page*[sz];
4747 if( !pageTable ) Err(errNoMemory);
4748 pageTableAllocated = sz;
4749 sz = pageTableAllocated*sizeof(PageStorage);
4750 page_info = (PageStorage *)new_uint8_t(sz, id);
4751 if( !page_info ) Err(errNoMemory);
4752 db_info->page_info_id = page_info_id = id;
4753 for( id=0; id<root_info->pageTableUsed; ++id ) {
4754 Read(page_info[id]);
4755 Page *pp = new Page(page_info[id]);
4756 if( !pp ) Err(errNoMemory);
4759 if( !pg->chk_flags(fl_free) ) pg->shmid = -1;
4760 if( pg->used ) pg->set_flags(fl_rd);
4762 while( id < pageTableAllocated )
4770 #define Write(n) do { \
4771 if_err( (sz=(this->*fn)((char*)&(n),sizeof(n))) ); \
4776 writeRootInfo(int(Db::*fn)(char *data, int sz))
4785 for( id=0; id<root_info->indeciesUsed; ++id ) {
4786 IndexBase *ib = indecies[id];
4788 switch( ib->st->type ) {
4790 IndexBinary *b = (IndexBinary*)ib;
4795 IndexString *b = (IndexString*)ib;
4802 IndexBaseType b(idxNil);
4808 for( id=0; id<root_info->pageTableUsed; ++id ) {
4809 Page *pp = get_Page(id);
4821 int n = db->indeciesAllocated - usrIdx + 1;
4822 if( cfnAllocated != n ) {
4824 cfn = new cfnData[n];
4827 cfnData *cdp = &cfn[0];
4828 cfnUsed = db->root_info->indeciesUsed;
4829 for( int idx=usrIdx; idx<cfnUsed; ++idx,++cdp ) {
4830 IndexBase *ib = db->indecies[idx];
4832 switch( ib->st->type ) {
4834 IndexBinary *bidx = (IndexBinary *)ib;
4835 cdp->cmprId = bidx->bst->cmprId;
4836 cdp->compare = bidx->compare;
4849 cfnData *cdp = &cfn[0];
4850 int n = db->root_info->indeciesUsed<cfnUsed ? db->root_info->indeciesUsed : cfnUsed;
4851 for( int idx=usrIdx; idx<n; ++idx,++cdp ) {
4852 IndexBase *ib = db->indecies[idx];
4854 switch( ib->st->type ) {
4856 IndexBinary *bidx = (IndexBinary *)ib;
4857 int cid = cdp->cmprId;
4858 bidx->bst->cmprId = cid;
4859 bidx->compare = cid>0 ? db->cmprFns[cid] : cdp->compare;
4872 int v = attach_wr();
4875 if( v ) detach_rw();
4885 undata.restore(this);
4892 DbInfo(int pid, int key, int id)
4907 int id = -1, flk = 0, ret = 0;
4910 if( !shm_init ) init_shm();
4911 get_mem = &get_shm8_t;
4912 new_mem = &new_shm8_t;
4913 del_mem = &del_shm8_t;
4914 if( flock(fd, LOCK_EX) ) ret = errInvalid;
4917 id = shmget(key, sizeof(DbInfo), IPC_CREAT+IPC_EXCL+0666);
4918 if( id < 0 ) ret = errno==EEXIST ? errDuplicate : errNoMemory;
4921 vp = shmat(id, NULL, 0);
4922 if( vp == (void*)-1 ) ret = errNoMemory;
4926 get_mem = &get_mem8_t;
4927 new_mem = &new_mem8_t;
4928 del_mem = &del_mem8_t;
4929 vp = (void *)new uint8_t[sizeof(DbInfo)];
4930 if( !vp ) Err(errNoMemory);
4933 db_info = new(vp) DbInfo(my_pid, key, id);
4937 if( flk ) flock(fd, LOCK_UN);
4938 //traceback("new_info %s\n",ret ? "failed" : "success");
4946 if( no_shm ) Err(errInvalid);
4947 get_mem = &get_shm8_t;
4948 new_mem = &new_shm8_t;
4949 del_mem = &del_shm8_t;
4950 int id = shmget(key, sizeof(DbInfo), 0666);
4951 if( id < 0 ) Fail(errNotFound);
4952 void *vp = shmat(id, NULL, 0);
4953 if( vp == (void*)-1 ) Err(errNoMemory);
4954 if( del_uint8_t(0, id) <= 1 ) {
4955 shmctl(id, IPC_RMID, 0);
4957 //traceback("get_info failed\n");
4960 DbInfo *dip = (DbInfo *)vp;
4961 if( dip->magic != info_magic ) Err(errBadMagic);
4962 if( dip->info_key != key || dip->info_id != id ) Err(errCorrupt);
4964 //traceback("get_info success\n");
4972 db_info->index_info_id = -1;
4973 db_info->page_info_id = -1;
4974 db_info->root_info_id = -1;
4975 db_info->owner = -1;
4976 db_info->last_owner = my_pid;
4978 int flk = !flock(fd, LOCK_EX) ? 1 : 0;
4980 int ret = del_uint8_t(0, db_info->info_id);
4982 shmctl(db_info->info_id, IPC_RMID, 0);
4986 delete [] (uint8_t*)db_info;
4987 if( flk ) flock(fd, LOCK_UN);
4988 //traceback("del_info %d, refs=%d\n",my_pid,ret);
4995 open(int zfd, int zkey)
4997 //traceback("open %d\n",my_pid);
4999 if( fd >= 0 ) Err(errDuplicate);
5002 if( fd < 0 ) Err(errNotFound);
5004 if( fstat(fd,&st) < 0 ) Err(errIoStat);
5005 if( zkey >= 0 ) use_shm(1);
5007 if_err( new_info(zkey) );
5015 iopen(int undo_save)
5017 //traceback(" iopen %d\n",my_pid);
5019 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5020 int ret = !root_info ? errNoMemory : 0;
5023 db_info->root_info_id = root_info_id = info_id;
5026 if( !ret ) ret = seek_data(db_magic_offset);
5027 if( !ret ) ret = read_data((char*)&magic,sizeof(magic));
5028 if( !ret && magic != db_magic ) ret = errBadMagic;
5029 if( !ret ) ret = seek_data(root_info_offset);
5030 if( !ret ) ret = read_data((char*)&root_info->root_info_addr,sizeof(root_info->root_info_addr));
5031 if( !ret ) ret = seek_data(root_info->root_info_addr);
5032 if( !ret ) ret = readRootInfo(&Db::read_data) >= 0 ? 0 : ret;
5033 if( !ret ) ret = start_transaction(undo_save);
5035 active_transaction = root_info->transaction_id;
5044 //traceback("close %d\n",my_pid);
5045 if( !db_info || fd < 0 ) return 0;
5053 //traceback(" iclose %d\n",my_pid);
5063 //traceback(" ireopen %d\n",my_pid);
5064 Page **old_page_table = pageTable; pageTable = 0;
5065 PageStorage *old_page_info = page_info; page_info = 0;
5066 int old_page_table_sz = page_table_sz; page_table_sz = 0;
5069 for( pageId id=0; id<old_page_table_sz; ++id ) {
5070 Page *opp = old_page_table[id], &opg = *opp;
5071 if( id < page_table_sz && opg.addr && !opg->chk_flags(fl_wr | fl_rd) ) {
5072 Page &pg = *get_Page(id);
5073 if( !pg.addr && pg->shmid < 0 && opg.shm_id == opg->shmid &&
5074 pg->io_addr == opg->io_addr && pg->trans_id == opg->trans_id ) {
5075 pg.addr = opg.addr; pg->shmid = pg.shm_id = opg.shm_id;
5076 pg->clr_flags(fl_rd);
5084 delete [] old_page_table;
5085 del_uint8_t(old_page_info);
5092 //traceback(" iattach %d\n",my_pid);
5093 RootInfo *new_root_info = 0;
5094 IndexInfo *new_index_info = 0;
5095 PageStorage *new_page_info = 0;
5096 int new_indecies_alloc = 0;
5097 int new_indecies_sz = 0;
5098 IndexBase **new_indecies = 0;
5099 int new_page_table_alloc = 0;
5100 int new_page_table_sz = 0;
5101 Page **new_page_table = 0;
5104 if( !ret && root_info_id != db_info->root_info_id ) {
5105 new_root_info = (RootInfo *)get_uint8_t(db_info->root_info_id);
5106 if( !new_root_info ) ret = errCorrupt;
5109 new_root_info = root_info;
5114 new_indecies_sz = new_root_info->indeciesUsed;
5115 new_indecies_alloc = indeciesHunks(new_indecies_sz) * indexTableHunkSize;
5116 new_indecies = new IndexBase*[new_indecies_alloc];
5117 if( !new_indecies ) ret = errNoMemory;
5121 if( index_info_id != db_info->index_info_id ) {
5122 new_index_info = (IndexInfo *)get_uint8_t(db_info->index_info_id);
5123 if( !new_index_info ) ret = errCorrupt;
5126 new_index_info = index_info;
5131 for( int idx=0; !ret && idx<new_indecies_sz; ++idx ) {
5132 IndexBaseInfo *b = &new_index_info[idx].base;
5133 if( b->magic == idx_magic ) {
5134 IndexBase *ib = idx<indecies_sz ? indecies[idx] : 0;
5135 if( ib && ib->st->type != b->type ) ib = 0;
5139 new_indecies[idx] = new(ib) IndexBinary(ib,*b,new_index_info[idx].bin);
5143 ret = new_index(new_indecies[idx], b, &new_index_info[idx].bin);
5147 new_indecies[idx] = new(ib) IndexString(ib,*b,new_index_info[idx].str);
5151 ret = new_index(new_indecies[idx], b, &new_index_info[idx].str);
5154 new_indecies[idx] = 0;
5164 for( int idx=0; idx<indecies_sz; ++idx ) {
5165 if( !indecies[idx] ) continue;
5166 delete indecies[idx]; indecies[idx] = 0;
5171 new_page_table_sz = new_root_info->pageTableUsed;
5172 new_page_table_alloc = pageTableHunks(new_page_table_sz) * pageTableHunkSize;
5173 new_page_table = (Page **)new Page*[new_page_table_alloc];
5174 if( !new_page_table ) ret = errNoMemory;
5178 if( page_info_id != db_info->page_info_id ) {
5179 new_page_info = (PageStorage *)get_uint8_t(db_info->page_info_id);
5180 if( !new_page_info ) ret = errCorrupt;
5183 new_page_info = page_info;
5189 for( id=0; !ret && id<new_page_table_sz; ++id ) {
5190 Page *pp = id<page_table_sz ? pageTable[id] : 0;
5191 PageStorage *st = &new_page_info[id];
5193 pageTable[id] = 0; pp->st = st; Page &pg = *pp;
5194 if( pg->chk_flags(fl_rd | fl_free) )
5196 else if( pg->shmid >= 0 && pp->shm_id != pg->shmid ) {
5197 del_uint8_t(pp->addr);
5198 pp->addr = (pagePrefix *)get_uint8_t(pp->shm_id=pg->shmid);
5203 if( !pp ) ret = errNoMemory;
5205 new_page_table[id] = pp;
5207 while( id<page_table_sz ) del_page(id++);
5210 delete [] new_indecies;
5211 delete [] new_page_table;
5212 if( !root_info ) root_info = new_root_info;
5213 else del_uint8_t(new_root_info);
5214 if( !index_info ) index_info = new_index_info;
5215 else del_uint8_t(new_index_info);
5216 if( !page_info ) page_info = new_page_info;
5217 else del_uint8_t(new_page_info);
5223 indecies = new_indecies;
5224 indeciesAllocated = new_indecies_alloc;
5225 indecies_sz = new_indecies_sz;
5226 delete [] pageTable;
5227 pageTable = new_page_table;
5228 pageTableAllocated = new_page_table_alloc;
5229 page_table_sz = new_page_table_sz;
5230 root_info_id = db_info->root_info_id;
5231 del_uint8_t(root_info);
5232 root_info = new_root_info;
5233 index_info_id = db_info->index_info_id;
5234 del_uint8_t(index_info);
5235 index_info = new_index_info;
5236 page_info_id = db_info->page_info_id;
5237 del_uint8_t(page_info);
5238 page_info = new_page_info;
5239 active_transaction = root_info->transaction_id;
5244 attach(int zfd, int zkey, int zrw)
5246 //traceback("attach %s %d\n",zrw ? "wr" : "rd", my_pid);
5248 if_ret( get_info(zkey) );
5249 fd = zfd; key = zkey;
5252 else if( zfd != fd || zkey != key )
5256 ( root_info && active_transaction == root_info->transaction_id &&
5257 db_info->root_info_id >= 0 && root_info_id == db_info->root_info_id &&
5258 db_info->index_info_id >= 0 && index_info_id == db_info->index_info_id &&
5259 db_info->page_info_id >= 0 && page_info_id == db_info->page_info_id ) )
5261 int ret = db_info->root_info_id < 0 ? ireopen() : iattach();
5279 if( fd >= 0 ) Err(errDuplicate);
5283 if( new_info(key) ) Err(errNotFound);
5285 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5286 if( !root_info ) { Err(errNoMemory); }
5288 db_info->root_info_id = root_info_id = info_id;
5289 if_err( init_idx() );
5290 if_err( seek_data(db_magic_offset) );
5291 int magic = db_magic;
5292 if_err( write_data((char *)&magic,sizeof(magic)) );
5293 write_zeros(entityPageSize);
5298 // in-order traversal copying IndexBinary
5299 int Db::IndexBinary::
5300 keyCopy(pageId s, IndexBase *ib)
5302 IndexBinary *idx = (IndexBinary *)ib;
5304 keyBlock *sbb; Page *spp; char *sn;
5305 if_err( db->indexRead(s,0,sbb,spp,sn) );
5306 if( (r=sbb->right_link()) >= 0 ) {
5307 int lkdSz = kdSz + sizeof(pageId);
5308 int n = spp->iused() / lkdSz;
5309 for( int i=0; i<n; ++i ) {
5310 pageId l = readPageId(sn);
5311 sn += sizeof(pageId);
5312 if_ret( keyCopy(l,idx) );
5313 if_ret( idx->Insert(sn,sn+st->keySz) );
5316 if_ret( keyCopy(r,idx) );
5319 int n = spp->iused() / kdSz;
5320 for( int i=0; i<n; ++i ) {
5321 if_ret( idx->Insert(sn,sn+st->keySz) );
5328 // in-order traversal copying IndexString
5329 int Db::IndexString::
5330 keyCopy(pageId s, IndexBase *ib)
5332 IndexString *idx = (IndexString *)ib;
5333 pageId r; unsigned char lky[keysz];
5334 keyBlock *sbb; Page *spp; char *sn;
5335 if_err( db->indexRead(s,0,sbb,spp,sn) );
5336 unsigned char *lp = (unsigned char *)sn;
5337 unsigned char *rp = lp + spp->iused();
5339 if( (r=sbb->right_link()) >= 0 ) {
5341 pageId l = getPageId(lp);
5342 if_ret( keyCopy(l,idx) );
5343 char *dp = (char *)lp; lp += st->dataSz;
5344 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5345 if_ret( idx->Insert(&lky[0],dp) );
5347 if_ret( keyCopy(r,idx) );
5351 char *dp = (char *)lp; lp += st->dataSz;
5352 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5353 if_ret( idx->Insert(&lky[0],dp) );
5360 EntityObj(EntityObj &eobj, int eid)
5363 memmove(&name[0],&eobj.name[0],nmSz);
5364 recdSz = eobj.recdSz;
5366 indexs[idxId] = eobj.indexs[idxId];
5367 for( int i=idxId+1; i<nidxs; ++i )
5368 indexs[i] = eobj.indexs[i];
5374 copy(ObjectLoc &dobj)
5377 Obj *dp = dobj.addr();
5378 if( !dp ) Err(errNoMemory);
5379 allocPrefix *mp = ((allocPrefix *)dp) - 1;
5380 int sz = mp->size - sizeof(*mp);
5381 if_err( allocate(sz - dobj.entity->ent->recdSz) );
5383 if( !np ) Err(errNoMemory);
5385 // copy varObj data using ref data, if any
5386 for( varObjs vp=dobj.entity->vobjs; vp!=0; vp=vp->next ) {
5388 varObj &vd = dp->*ref;
5389 varObj &vn = np->*ref;
5391 if( (sz=vd.size()) > 0 ) {
5393 memcpy(addr(vn),dobj.addr(vd), sz);
5400 copy(Db *db, Objects objs)
5402 int id, n = db->root_info->indeciesUsed;
5403 for( id=usrIdx; id<n; ++id ) {
5404 IndexBase *ib = db->indecies[id];
5405 if( ib && ib->st->type != idxNil ) {
5407 switch( ib->st->type ) {
5408 // copy binary index
5410 IndexBinary *bidx = (IndexBinary *)ib;
5411 int kySz = bidx->st->keySz, dtSz = bidx->st->dataSz;
5412 int idx = get_index(&bidx->st->name[0]);
5414 idx = new_binary_index(&bidx->st->name[0], kySz, dtSz, bidx->compare);
5416 // ignore empty indecies
5417 if( bidx->st->rootPageId >= 0 ) {
5418 // entity id indecies are processed below
5419 if( db->entityNmIndex->Find(&ib->st->name[0],0) != 0 ) {
5420 IndexBinary *bip = (IndexBinary *)indecies[idx];
5421 // use cmprLast since index is in-order. Avoids using
5422 // user defined class key cmprs and compare functions.
5423 bip->compare = cmprLast;
5424 ret = bidx->keyCopy(bidx->st->rootPageId, indecies[idx]);
5425 bip->compare = cmprFns[bip->bst->cmprId];
5429 // copy string index
5431 IndexString *sidx = (IndexString *)ib;
5432 int dtSz = sidx->st->dataSz;
5433 int idx = get_index(&sidx->st->name[0]);
5435 idx = new_string_index(&sidx->st->name[0], dtSz);
5438 if( sidx->st->rootPageId >= 0 )
5439 ret = sidx->keyCopy(sidx->st->rootPageId, indecies[idx]);
5443 if_err( db->flush() );
5444 if_err( commit(-1) );
5449 // copy entity indecies/data
5450 IndexBinary *eidx = (IndexBinary *)db->entityIdIndex;
5451 int eid, status; pgRef loc;
5452 if( !(status=eidx->First(&eid,&loc)) ) do {
5455 Entity *ep = op->obj->entity;
5456 if( ep->ent->id == eid ) break;
5461 EntityLoc &dent = db_ent.ent;
5463 ObjectLoc objd(&db_ent), *dobj = &objd;
5464 // if old db copy fn, use ObjectLoc::copy
5465 if( op->obj->entity->db == db ) dobj = op->obj;
5466 char name[nmSz]; memset(name,0,sizeof(name));
5467 strncpy(&name[0],&dent->name[0],sizeof(name));
5469 Entity entity(this);
5470 EntityLoc &nent = entity.ent;
5472 if( entityNmIndex->Find(&name[0],&nid) != 0 ) {
5473 int nidx1 = dent->nidxs-1;
5474 int sz = sizeof(EntityObj) + sizeof(dent->indexs[0])*nidx1;
5476 if_err( allocate(dent->id, sz, nent.obj, alloc_cache) );
5477 if( !nent.addr_wr() ) Err(errNoMemory);
5479 new((EntityObj *)nent.addr())
5480 EntityObj(*(EntityObj*)dent.addr(),eid);
5481 if_err( entityIdIndex->Insert(&eid,&nent.obj) );
5482 if_err( entityNmIndex->Insert(&name[0],&eid) );
5484 else if( nid == eid )
5485 if_err( entityIdIndex->Find(&eid,&nent.obj) );
5488 ObjectLoc objn(&entity), *nobj = &objn;
5489 // if new db copy fn, use it instead of ObjectLoc::copy
5490 if( op->obj->entity->db == this ) nobj = op->obj;
5492 if( !(status = dobj->FirstId(cur)) ) do {
5494 if_err( nobj->copy(*dobj) );
5496 if_err( entity.construct_(*nobj, dobj->id()) );
5497 } while( !(status=dobj->NextId(cur)) );
5498 if( status == errNotFound ) status = 0;
5500 if_err( db->flush() );
5501 if_err( commit(-1) );
5503 } while( !(status=eidx->Next(&eid,&loc)) );
5504 if( status == errNotFound ) status = 0;
5506 if_err( db->flush() );
5507 if_err( commit(-1) );
5512 cmprFrSt(char *a, char *b)
5514 freeStoreRecord *ap = (freeStoreRecord *)a;
5515 freeStoreRecord *bp = (freeStoreRecord *)b;
5516 if( ap->size > bp->size ) return 1;
5517 if( ap->size < bp->size ) return -1;
5518 if( ap->io_addr > bp->io_addr ) return 1;
5519 if( ap->io_addr < bp->io_addr ) return -1;
5524 cmprAdSt(char *a, char *b)
5526 addrStoreRecord *ap = (addrStoreRecord *)a;
5527 addrStoreRecord *bp = (addrStoreRecord *)b;
5528 if( ap->io_addr > bp->io_addr ) return 1;
5529 if( ap->io_addr < bp->io_addr ) return -1;
5530 if( ap->size > bp->size ) return 1;
5531 if( ap->size < bp->size ) return -1;
5536 cmprFrSp(char *a, char *b)
5538 freeSpaceRecord *ap = (freeSpaceRecord *)a;
5539 freeSpaceRecord *bp = (freeSpaceRecord *)b;
5540 if( ap->size > bp->size ) return 1;
5541 if( ap->size < bp->size ) return -1;
5542 if( ap->id > bp->id ) return 1;
5543 if( ap->id < bp->id ) return -1;
5544 if( ap->offset > bp->offset ) return 1;
5545 if( ap->offset < bp->offset ) return -1;
5550 cmprAdSp(char *a, char *b)
5552 addrSpaceRecord *ap = (addrSpaceRecord *)a;
5553 addrSpaceRecord *bp = (addrSpaceRecord *)b;
5554 if( ap->id > bp->id ) return 1;
5555 if( ap->id < bp->id ) return -1;
5556 if( ap->offset > bp->offset ) return 1;
5557 if( ap->offset < bp->offset ) return -1;
5558 if( ap->size > bp->size ) return 1;
5559 if( ap->size < bp->size ) return -1;
5564 cmprOIds(char *a, char *b)
5568 if( ap->id > bp->id ) return 1;
5569 if( ap->id < bp->id ) return -1;
5574 cmprStr(char *a, char *b)
5576 return strncmp(a,b,keysz);
5580 cmprKey(char *a, char *b)
5583 return kp->cmpr(a,b);
5587 cmprLast(char *a, char *b)
5592 Db::CmprFn Db::cmprFns[] = {
5601 findCmprFn(CmprFn fn)
5604 for( i=lengthof(cmprFns); --i>0; )
5605 if( fn == cmprFns[i] ) return i;
5612 if_err( new_binary_index("freeStoreIndex", sizeof(freeStoreRecord), 0, cmprFrSt) );
5613 if_err( new_binary_index("addrStoreIndex", sizeof(addrStoreRecord), 0, cmprAdSt) );
5614 if_err( new_binary_index("freeSpaceIndex", sizeof(freeSpaceRecord), 0, cmprFrSp) );
5615 if_err( new_binary_index("addrSpaceIndex", sizeof(addrSpaceRecord), 0, cmprAdSp) );
5616 if_err( new_binary_index("entityIdIndex", sizeof(int), sizeof(pgRef), cmprOIds) );
5617 if_err( new_binary_index("entityNmIndex", sizeof(char[nmSz]), sizeof(int), cmprStr) );
5626 FILE *fp = fopen("/proc/sys/kernel/shmall","w");
5627 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5628 fp = fopen("/proc/sys/kernel/shmmax","w");
5629 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5630 fp = fopen("/proc/sys/kernel/shmmni","w");
5631 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5637 root_magic = Db::root_magic;
5638 root_info_size = sizeof(RootInfo);
5641 root_info_addr = NIL;
5642 last_info_addr = NIL;
5658 storeBlockSize = defaultStoreBlockSize;
5659 entityPageSize = defaultEntityPageSize;
5660 pageTableHunkSize = defaultPageTableHunkSize;
5661 indexTableHunkSize = defaultIndexTableHunkSize;
5663 root_info_id = -1; root_info = 0;
5664 index_info_id = -1; index_info = 0;
5665 indecies = 0; indeciesAllocated = 0; indecies_sz = 0;
5666 page_info_id = -1; page_info = 0;
5667 pageTable = 0; pageTableAllocated = 0; page_table_sz = 0;
5671 bfr = lmt = inp = 0;
5672 active_transaction = -1;
5679 for( int idx=indecies_sz; --idx>=0; ) delete indecies[idx];
5680 delete [] indecies; indecies = 0;
5682 del_uint8_t(index_info); index_info = 0;
5683 indeciesAllocated = 0; indecies_sz = 0;
5687 for( pageId id=0; id<page_table_sz; ++id ) {
5688 Page *pp = get_Page(id); pageDealloc(*pp); delete pp;
5690 delete [] pageTable; pageTable = 0;
5692 del_uint8_t(page_info); page_info = 0;
5693 pageTableAllocated = 0; page_table_sz = 0;
5696 del_uint8_t(root_info); root_info = 0;
5703 no_shm = defaultNoShm;
5704 get_mem = !no_shm ? &get_shm8_t : &get_mem8_t;
5705 new_mem = !no_shm ? &new_shm8_t : &new_mem8_t;
5706 del_mem = !no_shm ? &del_shm8_t : &del_mem8_t;
5707 root_info_id = index_info_id = page_info_id = -1;
5729 if( error() < 0 ) Fail(errPrevious); \
5730 if( r < 0 || r >= root_info->indeciesUsed || !indecies[r] ) Err(errInvalid); \
5731 return indecies[r]->fn;
5734 ins(int r, void *key, void *data)
5736 Run(r,Insert(key,data));
5740 del(int r, void *key)
5746 find(int r, void *key, void *rtnData)
5748 Run(r,Find(key,rtnData));
5752 locate(int r, int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
5754 Run(r,Locate(op,key,cmpr,rtnKey,rtnData));
5758 first(int r, void *key, void *rtnData)
5760 Run(r,First(key,rtnData));
5764 last(int r, void *key, void *rtnData)
5766 Run(r,Last(key,rtnData));
5770 next(int r, void *key, void *rtnData)
5772 Run(r,Next(key,rtnData));
5776 nextloc(int r, pgRef &loc)
5778 Run(r,NextLoc(loc));
5785 allocate(ObjectLoc &loc, int sz)
5787 if( loc.entity != this ) Err(errObjEntity);
5789 int n = ent->recdSz + sz;
5790 if_err( db->allocate(id, n, loc.obj, ent->alloc_cache) );
5791 Obj *op = loc.addr();
5793 ent._wr(); loc._wr();
5795 op->id = ent->maxId;
5801 construct_(ObjectLoc &loc, int id)
5803 int idx = ent->indexs[idxId];
5804 loc._wr(); loc->id = id;
5805 if_err( db->indecies[idx]->Insert(&id,&loc.obj) );
5806 ent._wr(); ++ent->count;
5807 if( id >= ent->maxId ) ent->maxId = id+1;
5813 destruct_(ObjectLoc &loc, int id)
5815 if_err( index(idxId)->Delete(&id) );
5816 ent._wr(); --ent->count;
5817 if( id+1 == ent->maxId ) {
5818 if( ent->count > 0 ) {
5819 if_err( index(idxId)->Last(&id,0) );
5831 new_entity_(Entity &entity, const char *nm, int sz)
5834 EntityLoc &ent = entity.ent;
5835 // construct EntityObj
5836 if( root_info->entity_max_id >= max_entity_type ) Err(errLimit);
5837 if_err( allocate(root_info->entity_max_id+1,
5838 sizeof(EntityObj), ent.obj, alloc_cache) );
5839 int id = root_info->entity_max_id;
5840 ent._wr(); ent->id = id;
5841 char name[nmSz]; memset(&name[0],0,sizeof(name));
5842 strncpy(name,nm,sizeof(name)-1);
5843 memmove(&ent->name[0],name,sizeof(name));
5844 ent->alloc_cache.init();
5848 // add to entity indecies
5849 if_err( entityIdIndex->Insert(&id,&ent.obj) );
5850 if_err( entityNmIndex->Insert(&name[0],&id) );
5851 // create entity id/loc
5852 int idx = new_binary_index(nm, sizeof(int),sizeof(pgRef),cmprOIds);
5854 ent->indexs[idxId] = idx;
5856 ++root_info->entity_count;
5857 ++root_info->entity_max_id;
5858 entity.rw_lock = &db_info->rw_locks[id];
5863 del_entity(Entity &entity)
5865 EntityLoc &ent = entity.ent;
5866 if( ent.obj.id >= 0 ) {
5868 ObjectLoc loc(&entity);
5869 int status = loc.FirstId();
5872 entity.deallocate(loc.obj);
5873 } while( !(status=loc.NextId()) );
5874 if( status != errNotFound )
5876 for( int i=ent->nidxs; --i>=0; ) entity.del_index_(i);
5878 entityIdIndex->Delete(&id);
5879 entityNmIndex->Delete(&ent->name[0]);
5880 if_err( deallocate(ent.obj, alloc_cache) );
5882 --root_info->entity_count;
5883 if( id+1 == root_info->entity_max_id ) {
5884 if( root_info->entity_count > 0 ) {
5885 if_err( entityIdIndex->Last(&id,0) );
5890 root_info->entity_max_id = id;
5897 new_entity(Entity &entity, const char *nm, int sz)
5899 int ret = new_entity_(entity, nm, sz);
5900 if( ret ) del_entity(entity);
5905 get_entity(Entity &entity, const char *nm)
5907 EntityLoc &ent = entity.ent;
5908 char name[nmSz]; memset(&name[0],0,sizeof(name));
5909 strncpy(name,nm,sizeof(name)-1); int id;
5910 if_fail( entityNmIndex->Find(&name[0], &id) );
5911 if_err( entityIdIndex->Find(&id, &ent.obj) );
5912 entity.rw_lock = &db_info->rw_locks[id];
5917 get_index(const char *nm, CmprFn cmpr)
5919 int idx = ent->nidxs;
5920 while( --idx >= 0 ) {
5921 int i = ent->indexs[idx];
5922 if( i < 0 ) continue;
5923 IndexBase *ib = db->indecies[i];
5924 if( ib && !strncmp(&ib->st->name[0],nm,nmSz) ) {
5925 if( cmpr && ib->st->type == idxBin ) {
5926 IndexBinary *bidx = (IndexBinary *)ib;
5927 bidx->compare = cmpr;
5928 bidx->bst->cmprId = db->findCmprFn(cmpr);
5933 if( idx < 0 ) Fail(errNotFound);
5940 EntityLoc nent(this);
5941 // construct EntityObj
5942 int nidx = ent->nidxs;
5943 int sz = sizeof(EntityObj) + sizeof(ent->indexs[0])*nidx;
5944 if_err( db->allocate(ent->id, sz, nent.obj, db->alloc_cache) );
5945 nent._wr(); nent->id = ent->id;
5946 memmove(&nent->name[0],&ent->name[0],nmSz);
5947 nent->alloc_cache = ent->alloc_cache;
5948 nent->maxId = ent->maxId;
5949 nent->recdSz = ent->recdSz;
5950 nent->count = ent->count;
5951 // add to entity indecies
5952 if_err( db->entityIdIndex->Modify(&ent->id,&nent.obj) );
5953 for( int i=0; i<nidx; ++i )
5954 nent->indexs[i] = ent->indexs[i];
5956 nent->indexs[nidx] = idx;
5957 nent->nidxs = nidx+1;
5958 if_err( db->deallocate(ent.obj, db->alloc_cache) );
5964 del_index(const char *nm)
5966 int idx; if_ret( idx = get_index(nm) );
5967 return del_index(idx);
5973 if( i <= idxId ) Fail(errInvalid);
5974 if( i >= ent->nidxs ) Fail(errInvalid);
5975 return del_index_(i);
5981 int idx = ent->indexs[i];
5982 if( idx < 0 ) Fail(errNotFound);
5983 if( idx >= db->root_info->indeciesUsed ) Fail(errNotFound);
5984 if( !db->indecies[idx] ) Fail(errNotFound);
5985 ent._wr(); ent->indexs[i] = -1;
5986 db->indecies[idx]->Clear();
5992 finit(Objects objects)
5995 Objects op = objects; objects = op->next;
5996 Entity *ep = op->obj->entity;
5997 for( varObjs nxt=0, vp=ep->vobjs; vp!=0; vp=nxt ) {
5998 nxt = vp->next; delete vp;
6004 void Db::ObjectLoc::
6007 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
6009 (op->*(vp->ref)).init();
6013 void Db::ObjectLoc::
6016 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
6018 (op->*(vp->ref)).del(entity);
6024 size(varObj &vobj, int sz)
6026 if( vobj.len != sz ) {
6027 AllocCache &cache = entity->ent->alloc_cache;
6028 if_ret( entity->db->reallocate(sz,vobj.loc,cache) );
6029 vobj.len = sz; entity->ent._wr(); _wr();
6034 // get last index id on member accessed with ip
6036 last(const char *nm,int (ObjectLoc::*ip)())
6038 int idx; if_ret( idx = entity->get_index(nm) );
6039 return last(idx, ip);
6043 last(int idx,int (ObjectLoc::*ip)())
6045 ObjectLoc last_loc(*this);
6046 int id, ret = entity->index(idx)->Last(0,&id);
6047 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
6048 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
6049 return (last_loc.*ip)();
6052 // get last index unsigned id on member accessed with ip
6053 unsigned int Db::ObjectLoc::
6054 last(const char *nm,unsigned int (ObjectLoc::*ip)())
6056 int idx; if_ret( idx = entity->get_index(nm) );
6057 return last(idx, ip);
6060 unsigned int Db::ObjectLoc::
6061 last(int idx,unsigned int (ObjectLoc::*ip)())
6063 ObjectLoc last_loc(*this);
6064 int id, ret = entity->index(idx)->Last(0,&id);
6065 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
6066 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
6067 return (last_loc.*ip)();
6070 #define cmpr_type(nm,ty) int Db::ObjectLoc:: \
6071 nm(const ty *ap, int asz, const ty *bp, int bsz) { \
6072 int n = asz < bsz ? asz : bsz; \
6074 if( *ap > *bp ) return 1; \
6075 if( *ap < *bp ) return -1; \
6076 ++ap; ++bp; n -= sizeof(ty); \
6078 if( asz > bsz ) return 1; \
6079 if( asz < bsz ) return -1; \
6083 cmpr_type(cmpr_char, char)
6084 cmpr_type(cmpr_uchar, unsigned char)
6085 cmpr_type(cmpr_short, short)
6086 cmpr_type(cmpr_ushort, unsigned short)
6087 cmpr_type(cmpr_int, int)
6088 cmpr_type(cmpr_uint, unsigned int)
6089 cmpr_type(cmpr_long, long)
6090 cmpr_type(cmpr_ulong, unsigned long)
6091 cmpr_type(cmpr_float, float)
6092 cmpr_type(cmpr_double, double)
6096 cmpr_media(const unsigned char *ap, int asz, const unsigned char *bp, int bsz)
6098 return mediaCmpr((uint8_t *)ap).cmpr((uint8_t *)bp);
6105 if( !idx ) Err(errInvalid);
6106 int id; if_fail( idx->Find(*this, &id) );
6107 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6114 if( !idx ) Err(errInvalid);
6115 int id; if_fail( idx->Locate(op, *this,0, 0,&id) );
6116 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6123 if( !idx ) Err(errInvalid);
6124 int id; if_fail( idx->First(0,&id) );
6125 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6132 if( !idx ) Err(errInvalid);
6133 int id; if_fail( idx->Last(0,&id) );
6134 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6141 if( !idx ) Err(errInvalid);
6142 int id; if_fail( idx->Next(this,&id) );
6143 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6150 if( !idx ) Err(errInvalid);
6151 int id; if_fail( idx->First(pos,0,&id) );
6152 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6159 if( !idx ) Err(errInvalid);
6160 int id; if_fail( idx->Next(pos,this,&id) );
6161 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6168 if( !idx ) Err(errInvalid);
6169 int id; if_fail( idx->Locate(op, *this,0, 0,&id) );
6170 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6174 int Db::ioCmpr(const void *a, const void *b, void *c)
6177 Page &pa = *db->get_page(*(pageId*)a);
6178 Page &pb = *db->get_page(*(pageId*)b);
6179 int64_t v = pa->io_addr - pb->io_addr;
6180 return v < 0 ? -1 : v > 0 ? 1 : 0;
6185 int npages = root_info->pageTableUsed;
6186 pageId *pages = new pageId[npages];
6187 for( int i=0 ; i<npages; ++i ) pages[i] = i;
6188 qsort_r(pages, npages, sizeof(*pages), ioCmpr, this);
6189 for( int i=0 ; i<npages; ++i )
6190 pageLoad(pages[i], *get_page(pages[i]));