13 #include <sys/types.h>
21 extern "C" void traceback(const char *fmt,...)
23 FILE *fp = fopen("/tmp/x","a");
25 va_list ap; va_start(ap, fmt);
26 vfprintf(fp, fmt, ap);
28 int nptrs; void *buffer[100];
29 nptrs = backtrace(buffer, lengthof(buffer));
30 struct timeval now; gettimeofday(&now,0);
31 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
32 fprintf(fp,"*** stack %d\n",nptrs);
33 char **strings = backtrace_symbols(buffer, nptrs);
34 if( !strings ) return;
35 for( int i=0; i<nptrs; ++i ) fprintf(fp,"%s\n", strings[i]);
42 // use repeated string byte move for decompress operation.
43 // page table design should be changed to hierachical bit
44 // field, so that only the trace of page table blocks in
45 // the update path are written at commit time. Writing
46 // the entire page table on commit can be expensive.
48 // local memory allocator
49 void *Db::get_mem8_t(int id)
54 void *Db::new_mem8_t(int size, int &id)
56 return new uint8_t[size];
59 int Db::del_mem8_t(const void *vp, int id)
61 delete [] (uint8_t*)vp;
65 volatile int gdb_wait;
74 // shared memory allocator
78 void *vp = shmat(id, NULL, 0);
79 if( vp == (void*)-1 ) { perror("shmat"); wait_gdb(); vp = 0; }
84 new_shm8_t(int size, int &id)
86 id = shmget(IPC_PRIVATE, size, IPC_CREAT+0666);
87 if( id < 0 ) { perror("shmget"); return 0; }
88 uint8_t *addr = (uint8_t *)get_shm8_t(id);
89 if( addr ) shmctl(id, IPC_RMID, 0);
94 del_shm8_t(const void *vp, int id)
99 if( !shmctl(id, IPC_STAT, &ds) ) ret = ds.shm_nattch;
100 else { perror("shmctl"); wait_gdb(); ret = errIoStat; }
109 get_uint8_t(int id, int pg)
111 uint8_t *bp = (uint8_t *) get_mem(id);
116 new_uint8_t(int size, int &id, int pg)
118 void *vp = new_mem(size, id);
119 return (uint8_t *)vp;
123 del_uint8_t(const void *vp, int id, int pg)
125 return del_mem(vp, id);
134 if( err_callback ) err_callback(this, v);
138 //int Db::debug = DBBUG_ERR + DBBUG_FAIL;
139 int Db::debug = DBBUG_ERR;
143 void dmp(unsigned char *bp, int len)
145 unsigned char ch[16], lch[16];
146 int llen = lengthof(lch)+1;
148 unsigned char *adr = 0;
153 for( n=0; n<lengthof(ch) && --len>=0; ch[n++]=*bp++ );
154 if( (i=n) >= llen ) // check for a duplicate
155 while( --i>=0 && ch[i]==lch[i] );
156 if( i >= 0 || len < 0 ) { /* not a duplicate */
157 if( dups > 0 ) fprintf(stderr," ...%d\n",dups);
159 fprintf(stderr,"%p:",adr);
160 for( i=0; i<n; ++i ) fprintf(stderr," %02x",lch[i]=ch[i]);
161 for( i=n; i<lengthof(ch); ++i ) fprintf(stderr," ");
164 fprintf(stderr,"%c", ch[i]>=' ' && ch[i]<='~' ? ch[i] : '.');
165 fprintf(stderr,"\n");
178 "NoCmprFn", "NotFound", "Duplicate", "NoPage", "NoMemory",
179 "IoRead", "IoWrite", "IoSeek", "IoStat", "BadMagic", "Corrupt",
180 "Invalid", "Previous", "ObjEntity", "Limit", "User",
184 dmsg(int msk, const char *msg,...)
186 if( !(msk & debug) ) return;
187 #ifdef DEBUG_TIMESTAMPS
188 struct timeval now; gettimeofday(&now,0);
189 printf("@%ld.03%ld: ",now.tv_sec,now.tv_usec/1000);
195 FILE *fp = fopen("/tmp/x","a"); if( !fp ) return;
196 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
197 va_start(ap, msg); vfprintf(fp, msg, ap); va_end(ap);
202 _err_(int v,const char *fn,int ln)
205 dmsg(DBBUG_ERR,"%s:%d errored %d (%s)\n",fn,ln,v,errMsgs[-v]);
211 _fail_(int v,const char *fn,int ln)
213 dmsg(DBBUG_FAIL,"%s:%d failed %d (%s)\n",fn,ln,v,errMsgs[-v]);
218 Error(int v,const char *msg)
221 dmsg(DBBUG_ERR,"%s\n",msg);
234 printf("dmp root_info->file_size %016lx\n",
235 root_info->file_size);
236 printf(" rootInfo root_info->transaction_id %d\n",
237 root_info->transaction_id);
238 printf(" root_info->root_info_addr/size %016lx/%08x\n",
239 root_info->root_info_addr,root_info->root_info_size);
240 printf(" root_info->last_info_addr/size %016lx/%08x\n",
241 root_info->last_info_addr,root_info->last_info_size);
242 printf(" root_info->indeciesUsed %d\n",
243 root_info->indeciesUsed);
244 printf(" alloc_cache: "); alloc_cache.dmp();
245 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
246 IndexBase *ib = indecies[idx];
248 printf(" idx %d. %-24s %s%c pop %5ld"
249 " root %-5d rhs %-5d ky/Dt %2d/%-2d ",
250 idx, &ib->st->name[0], ib->st->type==idxBin ? "bin" :
251 ib->st->type==idxStr ? "str" : "???",
252 ib->st->key_type >= ktyBin && ib->st->key_type <= ktyDir ?
253 " *="[ib->st->key_type] : '?', ib->st->count,
254 ib->st->rootPageId, ib->st->rightHandSide,
255 ib->st->keySz, ib->st->dataSz);
256 printf(" free %d/",ib->st->freeBlocks);
258 for( pageId id=ib->st->freeBlocks; id>=0; ++n ) {
259 pgRef loc; loc.id = id; loc.offset = 0;
260 keyBlock *kbb; addrRead_(loc,kbb);
261 if( (id=kbb->right_link()) < 0 ) break;
264 printf("%d pages\n",n);
267 printf(" Entities, count %ld\n", entityIdIndex->Count());
268 Entity e(this); EntityLoc &ent = e.ent; int eid;
269 if( !entityIdIndex->First(&eid,&ent.obj) ) do {
270 int nidxs = ent->nidxs;
271 printf(" id %d. %s maxId %d, recdSz %d, count %d, nidxs %d:",
272 eid, &ent->name[0], ent->maxId, ent->recdSz, ent->count, nidxs);
273 printf(" alloc_cache: "); ent->alloc_cache.dmp();
274 for( int i=0; i<nidxs; ++i ) {
275 int idx = ent->indexs[i];
276 printf(" %d(%s),", idx, idx < 0 ? "" :
277 !indecies[idx] ? "(err)" : &indecies[idx]->st->name[0]);
280 } while( !entityIdIndex->Next(&eid,&ent.obj) );
286 printf(" root_info->pageTableUsed %d\n",root_info->pageTableUsed);
287 for( int pid=0; pid<root_info->pageTableUsed; ++pid ) {
288 Page &pg = *get_page(pid);
289 if( !pg.addr && !pg->io_allocated && pg->io_addr==NIL &&
290 pg->trans_id < 0 && pg->shmid < 0 && !pg->flags &&
291 !pg->wr_allocated && pg->wr_io_addr==NIL ) continue;
292 printf(" pid %d. used %d, type %d, link %d, flags %04x addr %p\n",
293 pid, pg->used, pg->type, pg->link, pg->flags, pg.addr);
294 printf(" allocated %d io_alloc/io_addr %d/%016lx, wr_alloc/io_addr %d/%016lx\n",
295 pg->allocated, pg->io_allocated, pg->io_addr,
296 pg->wr_allocated, pg->wr_io_addr);
298 printf(" root_info->freePages %d",root_info->freePages);
300 for( pageId id=root_info->freePages; id>=0; ++n ) id = (*get_page(id))->link;
301 // printf(" %d\n",(id=(*get_page(id))->link));
302 printf(", pages = %d\n",n);
308 freeStoreRecord free;
309 if( !freeStoreIndex->First(&free,0) ) do {
310 printf("free=%04lx/%012lx\n", free.size,free.io_addr);
311 } while( !freeStoreIndex->Next(&free,0) );
317 addrStoreRecord addr;
318 if( !addrStoreIndex->First(&addr,0) ) do {
319 printf("addr=%012lx/%04lx\n", addr.io_addr,addr.size);
320 } while( !addrStoreIndex->Next(&addr,0) );
326 Entity e(this); EntityLoc &ent = e.ent; int ret, eid;
327 if( !(ret=entityIdIndex->First(&eid,&ent.obj)) ) do {
328 printf(" %d. %-32s %5d/%-5d %d\n",eid,ent->name,
329 ent->alloc_cache.loc.id, ent->alloc_cache.loc.offset,
330 ent->alloc_cache.avail);
331 } while( !(ret=entityIdIndex->Next(&eid,&ent.obj)) );
337 if( !indecies ) return; addrStoreRecord addr;
338 addrStoreRecord last; last.io_addr = 0; last.size = 0;
339 if( !addrStoreIndex->First(&addr,0) ) do {
340 if( last.io_addr > addr.io_addr ||
341 (last.io_addr == addr.io_addr && last.size >= addr.size ) ) {
342 printf("last=%012lx/%04lx, addr=%012lx/%04lx\n",
343 addr.io_addr, addr.size, last.io_addr, last.size);
347 } while( !addrStoreIndex->Next(&addr,0) );
353 if( !indecies ) return; freeStoreRecord free;
354 freeStoreRecord last; last.size = 0; last.io_addr = 0;
355 if( !freeStoreIndex->First(&free,0) ) do {
356 if( last.size > free.size ||
357 (last.size == free.size && last.io_addr >= free.io_addr ) ) {
358 printf("last=%04lx/%012lx, free=%04lx/%012lx\n",
359 free.size, free.io_addr, last.size, last.io_addr);
363 } while( !freeStoreIndex->Next(&free,0) );
367 edmp(AllocCache &cache)
369 freeSpaceRecord free;
370 if( !freeSpaceIndex->First(&free,0) ) do {
371 printf("free=%04lx %d/%d\n", free.size,free.id,free.offset);
372 } while( !freeSpaceIndex->Next(&free,0) );
376 bdmp(AllocCache &cache)
378 addrSpaceRecord addr;
379 if( !addrSpaceIndex->First(&addr,0) ) do {
380 printf("addr=%d/%d %04lx\n", addr.id,addr.offset,addr.size);
381 } while( !addrSpaceIndex->Next(&addr,0) );
387 long store_allocated=0, store_used=0;
388 long loaded_allocated=0, loaded_used=0;
389 long written_allocated=0, written_used=0;
390 int pages_written=0, pages_loaded=0, pages_deleted=0;
391 for( int id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
392 Page &pg = *get_page(id);
393 store_allocated += pg->allocated;
394 store_used += pg->used;
397 loaded_allocated += pg->allocated;
398 loaded_used += pg->used;
400 if( pg->chk_flags(fl_wr) ) {
402 written_allocated += pg->allocated;
403 written_used += pg->used;
404 if( !pg.addr ) ++pages_deleted;
407 #define percent(a,b) (!(a) || !(b) ? 0.0 : (100.0*(a)) / (b))
408 long read_allocated = loaded_allocated - written_allocated;
409 long read_used = loaded_used - written_used;
410 int pages_read = pages_loaded - pages_written;
411 printf("\ncommit %d\n", root_info->transaction_id);
412 printf(" pages %8d/del %-4d alloc:%-12ld used:%-12ld %7.3f%%\n",
413 root_info->pageTableUsed, pages_deleted, store_allocated, store_used,
414 percent(store_used,store_allocated));
415 printf(" loaded %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
416 pages_loaded, percent(pages_loaded, root_info->pageTableUsed),
417 loaded_allocated, loaded_used, percent(loaded_used, loaded_allocated));
418 printf(" read %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
419 pages_read, percent(pages_read, root_info->pageTableUsed),
420 read_allocated, read_used, percent(read_used, read_allocated));
421 printf(" write %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
422 pages_written, percent(pages_written, root_info->pageTableUsed),
423 written_allocated, written_used, percent(written_used, written_allocated));
435 return syscall(SYS_sched_yield);
441 return syscall(SYS_gettid);
448 while( (ret=zfutex(FUTEX_WAKE, nwakeups)) < 0 );
453 zwait(int val, timespec *ts)
455 return zfutex(FUTEX_WAIT, val, ts);
461 fprintf(stderr,"unlocking and not locked\n");
468 if( v || zxchg(1,loc) >= 0 ) do {
470 } while ( zxchg(1,loc) >= 0 );
475 zunlock(int nwakeups)
484 zdecr(loc); lk.lock(); lk.unlock(); zincr(loc);
490 if( lk.loc >= 0 ) zwake(1);
496 lk.lock(); timespec ts = { 1, 0 };
497 int v; while( (v=loc) >= 0 ) zwait(v, &ts);
510 if( !db_info ) return -1;
511 if( is_locked() > 0 && db_info->owner == my_pid ) return 0;
513 //traceback(" attach_wr %d/%d/%d\n",my_pid,db_info->owner,db_info->last_owner);
514 db_info->last_owner = db_info->owner;
515 db_info->owner = my_pid;
523 //traceback(" attach_rd %d/%d\n",my_pid,db_info->owner);
530 if( !db_info ) return;
538 //traceback(" detach_rw %d\n",my_pid);
541 // persistent pageTable element initial constructor
542 void Db::PageStorage::
558 // non-persistent pageTable element initial constructor
576 // deletes storage next start_transaction
580 st->used = 0; st->set_flags(fl_wr);
584 // locked pageTable access
588 read_locked by(db_info->pgTblLk);
589 return get_Page(pid);
593 * Db::alloc_pageTable -- alloc page table
596 * sz int input page table size
600 alloc_pageTable(int sz)
602 write_blocked by(db_info->pgTblLk);
603 int n = pageTableHunks(sz) * pageTableHunkSize;
604 Page **pt = new Page*[n];
605 if( !pt ) Err(errNoMemory);
606 int info_id, info_sz = n*sizeof(PageStorage);
607 PageStorage *new_page_info = (PageStorage *)new_uint8_t(info_sz, info_id);
608 if( !new_page_info ) { delete pt; Err(errNoMemory); }
611 for( ; i<root_info->pageTableUsed; ++i ) {
613 new_page_info[i] = *pt[i]->st;
614 pt[i]->st = &new_page_info[i];
617 for( ; i<n; ++i ) pt[i] = 0;
618 db_info->page_info_id = page_info_id = info_id;
619 del_uint8_t(page_info); page_info = new_page_info;
620 delete [] pageTable; pageTable = pt;
621 pageTableAllocated = n;
626 * Db::new_page -- access/construct new page
629 * returns page id on success (>=zero), error code otherwise(< 0)
635 locked by(db_info->pgAlLk);
636 pageId id = root_info->freePages;
638 if( root_info->pageTableUsed >= pageTableAllocated ) {
639 int sz = root_info->pageTableUsed + root_info->pageTableUsed/2 + 1;
640 if_err( alloc_pageTable(sz) );
642 id = root_info->pageTableUsed;
643 Page * pp = new Page(*new(&page_info[id]) PageStorage());
644 if( !pp ) Err(errNoMemory);
646 page_table_sz = ++root_info->pageTableUsed;
649 Page &pg = *get_page(id);
650 root_info->freePages = pg->link;
651 new(&pg) Page(*new(&page_info[id]) PageStorage());
659 Page *pp = get_Page(id);
666 * Db::free_page -- link to root_info->freePages
669 * pid pageId input page id
675 Page &pg = *get_Page(pid);
679 pg->clr_flags(fl_wr | fl_rd);
680 pg->set_flags(fl_free);
681 pageId id = root_info->freePages;
683 Page *lpp = 0; // use sorted order
684 while( id >= 0 && id < pid ) {
692 root_info->freePages = pid;
697 lower_page(pageId mid)
699 locked by(db_info->pgAlLk);
700 pageId id = root_info->freePages;
702 Page *pp = 0, *lpp = 0;
704 if( id < lid ) { lid = id; lpp = pp; }
709 Page &pg = *get_Page(lid);
711 (*lpp)->link = pg->link;
713 root_info->freePages = pg->link;
722 get_index(const char *nm, CmprFn cmpr)
724 int idx = root_info->indeciesUsed;
725 while( --idx >= 0 ) {
726 IndexBase *ib = indecies[idx];
728 if( !strncmp(&ib->st->name[0],nm,nmSz) ) break;
730 if( idx >= 0 && cmpr && indecies[idx]->st->type == idxBin ) {
731 IndexBinary *bidx = (IndexBinary *)indecies[idx];
732 bidx->compare = cmpr;
733 bidx->bst->cmprId = findCmprFn(cmpr);
741 if( r < 0 || r >= root_info->indeciesUsed ) Fail(errInvalid);
742 if( !indecies[r] ) Fail(errNotFound);
743 return indecies[r]->Count();
747 alloc_indecies(int n)
749 IndexBase **it = new IndexBase*[n];
750 if( !it ) Err(errNoMemory);
752 IndexInfo *info = (IndexInfo *)new_uint8_t(n*sizeof(*info), info_id);
753 if( !info ) { delete it; Err(errNoMemory); }
754 memcpy(info, index_info, indeciesAllocated*sizeof(*info));
756 for( ; i<root_info->indeciesUsed; ++i ) {
757 if( !(it[i] = indecies[i]) ) continue;
758 switch( it[i]->st->type ) {
760 IndexBinary *bidx = (IndexBinary *)it[i];
761 bidx->st = info[i].bin;
762 bidx->bst = info[i].bin;
765 IndexString *sidx = (IndexString *)it[i];
766 sidx->st = info[i].str;
767 sidx->sst = info[i].str;
774 for( ; i<n; ++i ) it[i] = 0;
775 db_info->index_info_id = index_info_id = info_id;
776 del_uint8_t(index_info); index_info = info;
777 delete [] indecies; indecies = it;
778 indeciesAllocated = n;
786 while( idx < root_info->indeciesUsed && indecies[idx] )
788 if( idx >= indeciesAllocated ) {
789 int n = indeciesAllocated + indexTableHunkSize;
790 if( alloc_indecies(n) ) Err(errNoMemory);
792 if( idx >= root_info->indeciesUsed ) {
793 if( idx >= max_index_type ) Err(errLimit);
794 root_info->indeciesUsed = idx+1;
795 indecies_sz = root_info->indeciesUsed;
804 delete indecies[idx];
806 for( idx=root_info->indeciesUsed; idx>0 && indecies[idx-1]==0; --idx );
807 indecies_sz = root_info->indeciesUsed = idx;
811 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexBinaryInfo *bi)
813 IndexBaseStorage *st = new(b) IndexBaseStorage();
814 IndexBinaryStorage *bst = new(bi) IndexBinaryStorage();
815 IndexBinary *bidx = new IndexBinary(this, st, bst);
816 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
817 if( bidx ) delete bidx;
825 new_binary_index(const char *nm, int ksz, int dsz, CmprFn cmpr)
827 if( get_index(nm) >= 0 ) Err(errDuplicate);
828 int idx = new_index();
829 if( idx < 0 ) Err(errNoMemory);
830 IndexBinary *bidx = new IndexBinary(this, idx, ksz, dsz, cmpr);
831 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
835 if( nm ) strncpy(&bidx->st->name[0],nm,sizeof(bidx->st->name));
836 indecies[idx] = bidx;
841 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexStringInfo *si)
843 IndexBaseStorage *st = new(b) IndexBaseStorage();
844 IndexStringStorage *sst = new(si) IndexStringStorage();
845 IndexString *sidx = new IndexString(this, st, sst);
846 if( !sidx ) Err(errNoMemory);
852 new_string_index(const char *nm, int dsz)
854 if( get_index(nm) >= 0 ) Err(errDuplicate);
855 int idx = new_index();
856 if( idx < 0 ) Err(errNoMemory);
857 IndexString *sidx = new IndexString(this, idx, dsz);
862 if( nm ) strncpy(&sidx->st->name[0],nm,sizeof(sidx->st->name));
863 indecies[idx] = sidx;
868 * Db::IndexBinary::keyMap - map function on index pages
871 * s pageId input current id
873 * returns 0 on success,
874 * errNotFound if index is empty
875 * error code otherwise
878 int Db::IndexBinary::
879 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
882 keyBlock *sbb; Page *spp; char *sn;
883 if_err( db->indexRead(s,0,sbb,spp,sn) );
884 if( (r=sbb->right_link()) >= 0 ) {
885 int lkdSz = kdSz + sizeof(pageId);
886 int n = spp->iused() / lkdSz;
887 for( int i=0; i<n; ++i ) {
888 pageId l = readPageId(sn);
889 if_ret( keyMap(l,fn) );
892 if_ret( keyMap(r,fn) );
894 if_err( (this->*fn)(s) );
899 * Db::IndexBinary::setLastKey -- conditionally update lastAccess
902 * s pageId input page of last insert
903 * u pageId input rightLink of page
904 * k int input insert offset in block
907 void Db::IndexBinary::
908 setLastKey(pageId s, pageId u, int k)
910 if( keyInterior < 0 ) {
918 lastAccess.offset = sizeof(keyBlock) + k;
923 * Db::IndexBinary::keyLocate -- generalized database key retreival
926 * s pageId input current id
927 * cmpr CmprFn input key compare function
929 * returns 0 on success
930 * errNotFound if not found,
931 * error code otherwise
934 int Db::IndexBinary::
935 keyLocate(pgRef &last, pageId s, int op,void *ky,CmprFn cmpr)
937 int ret = errNotFound;
938 keyBlock *sbb; Page *spp; char *sn;
939 if_err( db->indexRead(s,0,sbb,spp,sn) );
940 int len = spp->iused();
942 if( sbb->right_link() >= 0 )
943 lkdSz += sizeof(pageId);
947 // binary search block for key
948 while( (r - l) > 1 ) {
951 if( sbb->right_link() >= 0 )
954 int n = cmpr((char*)ky,kn);
956 if( op >= keyLE && op <= keyGE ) {
958 last.offset = sizeof(keyBlock) + k;
961 if( op == keyLE || op == keyGT ) n = 1;
963 if( n > 0 ) l = i; else r = i;
967 int k = op < keyEQ ? l*lkdSz : (r < len ? r : -1);
968 if( op != keyEQ && k >= 0 ) {
969 if( sbb->right_link() >= 0 )
972 last.offset = sizeof(keyBlock) + k;
976 if( (s = sbb->right_link()) >= 0 ) {
977 if( r < len ) s = readPageId(sn+r);
979 ret = keyLocate(last,s,op,ky,cmpr);
980 if( k == 0 ) ret = 0;
987 * Db::IndexBinary::Locate -- interface to generalized database key retreival
990 * op int input key relation in keyLT..keyGT
991 * key void * input retreival key image
992 * cmpr CmprFn input retreival key image
993 * rtnKey void * output resulting key value
994 * rtnData void * output resulting recd value
996 * returns 0 on success
997 * errNotFound on not found,
998 * error code otherwise
1001 int Db::IndexBinary::
1002 refLocate(pgRef &loc, int op, void *key, CmprFn cmpr)
1004 if( st->rootPageId == NIL )
1006 if( op == keyEQ ) op = keyLE;
1007 if( !cmpr ) cmpr = compare;
1008 if_fail( keyLocate(loc,st->rootPageId,op, key,cmpr) );
1014 int Db::IndexBinary::
1015 Locate(int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
1018 if_fail( refLocate(last, op, key, cmpr) );
1020 if_err( db->addrRead_(last,kp) );
1021 if( rtnKey && st->keySz > 0 )
1022 wr_key(kp, (char*)rtnKey,st->keySz);
1023 if( rtnData && st->dataSz > 0 )
1024 memmove(rtnData,kp+st->keySz,st->dataSz);
1029 * Db::IndexBinary::chkFind - check lastAccess block for key
1032 * key char * input key image
1033 * last pgRef * input last position loc
1035 * returns 0 if not found
1036 * error code otherwise
1039 int Db::IndexBinary::
1040 chkFind(pgRef &loc, char *key)
1043 if( s < 0 ) return 0; // must be valid block
1044 keyBlock *sbb; Page *spp; char *sn;
1045 if_err( db->indexRead(s,0,sbb,spp,sn) );
1046 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1047 int slen = spp->iused();
1048 int k = loc.offset - sizeof(keyBlock);
1049 if( k < 0 || k > slen ) return 0; // must be inside/end of block
1050 int cmpr0 = k>=slen ? -1 : compare(key,sn+k); // compare last access
1051 if( cmpr0 ) { // not found here
1053 if( cmpr0 < 0 ? (k-=kdSz) < 0 : (k+=kdSz) >= slen ) return 0;
1054 int cmpr1 = compare(key,sn+k);
1055 if( !cmpr1 ) goto xit; // found here
1057 if( l >= slen ) return 0; // no curr
1058 if( cmpr0 < 0 ) Fail(errNotFound); // btwn prev/curr
1060 cmpr1 = compare(key,sn+k);
1061 if( !cmpr1 ) goto xit; // found here
1062 if( cmpr1 > 0 ) return 0; // key after last in block
1065 if( cmpr0 > 0 ) Fail(errNotFound); // btwn curr/next
1067 cmpr1 = compare(key,sn);
1068 if( !cmpr1 ) goto xit; // found here
1069 if( cmpr1 < 0 ) return 0; // key before first in block
1071 return errNotFound; // key in block range, but not found
1074 loc.offset = sizeof(keyBlock) + k;
1079 * Db::IndexBinary::keyFind -- database unique key retreival
1082 * s pageId input current id
1084 * returns 0 on success
1085 * errNotFound on not found,
1086 * error code otherwise
1089 int Db::IndexBinary::
1090 keyFind(pgRef &loc, void *ky, pageId s)
1093 keyBlock *sbb; Page *spp; char *sn;
1094 if_err( db->indexRead(s,0,sbb,spp,sn) );
1096 if( sbb->right_link() >= 0 )
1097 lkdSz += sizeof(pageId);
1098 int len = spp->iused();
1102 // binary search block for key
1103 while( (r - l) > 1 ) {
1106 if( sbb->right_link() >= 0 )
1107 k += sizeof(pageId);
1109 int n = compare((char*)ky,kn);
1112 loc.offset = sizeof(keyBlock) + k;
1115 if( n > 0 ) l = i; else r = i;
1118 if( (s = sbb->right_link()) < 0 ) break;
1119 if( (r*=lkdSz) < len ) s = readPageId(sn+r);
1126 * Db::IndexBinary::Find -- interface to database unique key retreival
1129 * key void * input retreival key image
1130 * rtnData void * output resulting recd value
1132 * returns 0 on success
1133 * errNotFound on not found,
1134 * error code otherwise
1137 int Db::IndexBinary::
1138 refFind(pgRef &loc, void *ky)
1140 if( st->rootPageId == NIL )
1142 pageId r = st->rootPageId;
1146 if( CHK cFindCount > 2 ) ret = 1; }
1147 if( ret ) { // try the easy way
1148 ret = chkFind(loc, (char *)ky);
1149 if( ret == errNotFound ) {
1150 r = loc.id; ret = 0;
1154 if( !ret ) { // try the hard way
1155 if_fail( keyFind(loc,ky,r) );
1162 int Db::IndexBinary::
1163 Find(void *ky, void *rtnData)
1166 if_fail( refFind(last, ky) );
1168 if_err( db->addrRead_(last,kp) );
1170 memmove(rtnData,kp+st->keySz,st->dataSz);
1175 int Db::IndexBinary::
1176 chkInsert(void *key, void *data)
1179 char *ky = (char *)key;
1180 pageId s = lastInsert.id;
1181 if( s < 0 || cInsCount < 2 ) return 0; /* >= 2 in a row */
1182 keyBlock *sbb; Page *spp; char *sn;
1183 if_err( db->indexRead(s,1,sbb,spp,sn) );
1184 if( sbb->right_link() >= 0 ) return 0; /* must be exterior */
1185 int slen = spp->iused();
1186 int k = lastInsert.offset - sizeof(keyBlock);
1188 char *kn = kp + kdSz;
1189 char *rp = sn + slen;
1190 int n = compare(ky,kp);
1191 if( n == 0 ) Fail(errDuplicate);
1192 if( n > 0 ) { /* after last one */
1193 if( kn >= rp ) { /* no next one */
1194 if( st->rightHandSide == s )
1199 if( n == 0 ) Fail(errDuplicate);
1201 rhs = 1; /* before next one */
1204 if( !rhs ) return 0; /* not a hit */
1205 if( spp->iallocated()-slen < kdSz ) return 0; /* doesnt fit */
1206 if( rp > kn ) memmove(kn+kdSz,kn,rp-kn); /* move data up */
1207 if( key && st->keySz > 0 )
1208 wr_key(key, kn,st->keySz);
1209 if( data && st->dataSz > 0 )
1210 memmove(kn+st->keySz,data,st->dataSz); /* add new key/data */
1211 spp->iused(slen + kdSz);
1214 lastAccess.offset = sizeof(keyBlock) + kn-sn;
1219 * Db::IndexBinary::keyInsert - add unique key to database
1222 * s pageId input current id
1224 * iky char * input assembled insertion key
1226 * returns 0 on success,
1227 * errDuplicate if key already exists in database
1228 * error code otherwise
1231 int Db::IndexBinary::
1232 keyInsert(pageId s, pageId &t)
1235 /* mark no continued insertion, and check end of search */
1236 /* if not end, read current pageId, search for key */
1237 /* return error if found. */
1238 keyBlock *sbb; Page *spp; char *sn;
1239 if_err( db->indexRead(s,1,sbb,spp,sn) );
1241 pageId u = sbb->right_link();
1243 lkdSz += sizeof(pageId);
1244 int slen = spp->iused();
1245 int keyCount = slen / lkdSz;
1246 int maxKeysInBlock = spp->iallocated() / lkdSz;
1250 /* binary search block for key */
1251 while( (r - l) > 1 ) {
1254 if( sbb->right_link() >= 0 )
1255 k += sizeof(pageId);
1257 int n = compare(this->akey,kn);
1260 lastAccess.offset = sizeof(keyBlock) + k;
1263 if( n > 0 ) l = i; else r = i;
1266 /* not found in this pageId, continue search at lower levels. */
1267 /* process insertion if lower level splits ( t>=0 ). */
1268 int i = sizeof(pageId);
1272 u = readPageId(sn+k);
1273 if_ret( keyInsert(u, t) );
1274 if( t < 0 ) return 0;
1276 writePageId(sn+k,t);
1282 /* if current pageId is not full, insert into this pageId. */
1283 if( keyCount < maxKeysInBlock ) {
1286 memmove(kn+lkdSz,kn,slen-k);
1287 spp->iused(slen + lkdSz);
1288 memmove(kn,&iky[i],lkdSz);
1289 setLastKey(s,u,k); /* save insert loc */
1293 /* current pageId is full, split pageId and promote new parent key */
1294 keyBlock *tbb; Page *tpp; char *tn;
1295 if_err( blockAllocate(t,tbb,tpp,tn) );
1296 /* split point is middle, or end if inserting consecutive on rhs */
1297 int promoted = maxKeysInBlock/2;
1298 if( cInsCount > 2 && s == st->rightHandSide )
1299 promoted = maxKeysInBlock-1;
1301 int tlen = slen - promoted;
1302 if( st->rightHandSide == s ) st->rightHandSide = t;
1304 if( k <= promoted ) { /* at or left of promoted key */
1305 if( k != promoted ) { /* not promoting current key */
1306 kn = sn+promoted-lkdSz;
1307 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1309 memmove(kn+lkdSz,kn,promoted-lkdSz-k);
1310 memmove(kn,&iky[i],lkdSz); /* insert new key */
1311 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1312 setLastKey(s,u,k); /* save insert loc */
1314 memmove(tn,sn+promoted,tlen);
1316 else { /* key is > center */
1318 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1319 int j = k - promoted - lkdSz;
1320 memmove(tn,kn+=lkdSz,j);
1321 memmove(kn=tn+j,&iky[i],lkdSz); /* insert new key */
1322 setLastKey(t,u,j); /* save insert loc */
1323 memmove(kn+=lkdSz,sn+k,slen-k);
1324 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1327 /* set newly split blocks half full, and set new links. */
1331 tbb->right_link(sbb->right_link());
1332 sbb->right_link(readPageId(&iky[0]));
1333 writePageId(&iky[0],s);
1334 /* if root is splitting, create new left */
1335 if( s == st->rootPageId ) {
1336 keyBlock *ubb; Page *upp; char *un;
1337 if_err( blockAllocate(u,ubb,upp,un) );
1338 memmove(un,sn,slen);
1340 ubb->right_link(sbb->right_link());
1341 writePageId(&iky[0],u);
1342 k = st->keySz + st->dataSz + sizeof(pageId);
1343 memmove(sn,&iky[0],k);
1346 setLastKey(s,t,sizeof(pageId));
1348 /* t >= 0 indicates continued insertion, return success */
1352 void Db::IndexBinary::
1353 makeKey(char *cp,char *key,int l,char *recd,int n)
1355 writePageId(cp,NIL); cp += sizeof(pageId);
1356 if( l > 0 ) { wr_key(key, cp,l); cp += l; }
1357 if( n > 0 && recd ) memmove(cp,recd,n);
1361 * Db::Insert - interface to database unique key insertion
1364 * key void * input key image
1365 * data void * input recd value
1367 * returns 0 on success,
1368 * errDuplicate if key already exists in database
1369 * error code otherwise
1372 int Db::IndexBinary::
1373 Insert(void *key, void *data)
1375 if_err( MakeRoot() );
1378 if( CHK cInsCount > 2 ) { // try the easy way
1379 ret = chkInsert(key,data);
1382 if( !ret ) { // try the hard way
1383 makeKey(&iky[0],this->akey=(char *)key,st->keySz,(char *)data,st->dataSz);
1384 pageId t = NIL; lastAccess.id = NIL;
1385 if_ret( keyInsert(st->rootPageId, t) );
1387 if( keyInterior > 0 ) lastAccess.id = NIL;
1394 * Db::keyBlockUnderflow -- balences/merges blocks after a deletion
1397 * t int output continuation flag
1398 * lbb keyBlock * input left sibling keyBlock
1399 * p pageId input parent keyBlock id
1400 * pbb keyBlock * input parent keyBlock
1401 * pi int input deletion key offset parent
1404 * returns 0 on success, errorcode otherwise
1407 int Db::IndexBinary::
1408 keyBlockUnderflow(int &t,keyBlock *lbb,pageId p,keyBlock *pbb,int pi)
1414 int psiz = kdSz + sizeof(pageId);
1416 * if deletion was at right end of block
1417 * back up one key otherwise use this key
1419 char *pn = (char *)(pbb+1);
1420 Page *ppp = db->get_page(p);
1421 int plen = ppp->iused();
1422 if( pi >= plen ) { /* read lt side */
1423 r = pbb->right_link();
1426 l = readPageId(pn+pi);
1427 if_err( db->indexRead(l,1,lbb) );
1429 else { /* read rt side */
1430 l = readPageId(pn+pi);
1432 r = i >= plen ? pbb->right_link() : readPageId(pn+i);
1433 if_err( db->indexRead(r,1,rbb) );
1436 int lsz = lbb->right_link() >= 0 ? sizeof(pageId) : 0;
1437 int lkdSz = kdSz + lsz;
1438 char *ln = (char *)(lbb+1);
1439 Page *lpp = db->get_page(l);
1440 int llen = lpp->iused();
1441 char *rn = (char *)(rbb+1);
1442 Page *rpp = db->get_page(r);
1443 int rlen = rpp->iused();
1446 * if both lt&rt blocks+parent key all fit in one block, merge them
1448 int n = llen + rlen;
1449 if( n <= rpp->iallocated()-lkdSz ) { /* merge */
1450 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1451 memmove(ln+llen,pn+pi+sizeof(pageId)-lsz,lkdSz);
1453 memmove(ln+llen,rn,rlen); /* move right to left */
1455 lbb->right_link(rbb->right_link());
1457 memmove(pn+pi,pn+i,plen-i); /* remove parent key */
1459 if( plen == 0 && p == st->rootPageId ) { /* totally mashed root */
1460 if( st->rightHandSide == r ) st->rightHandSide = p;
1461 if_err( blockFree(r) );
1462 memmove(pn,ln,llen); /* copy to parent page */
1463 pbb->right_link(lbb->right_link());
1465 if_err( blockFree(l) );
1468 if( r != pbb->right_link() ) /* not rightLink */
1469 writePageId(pn+pi,l); /* must be next key */
1472 if( st->rightHandSide == r ) st->rightHandSide = l;
1473 if_err( blockFree(r) );
1477 lastAccess.id = NIL;
1478 return 0; /* continue underflow */
1482 if( tsiz < lkdSz ) tsiz = lkdSz; /* slosh threshold */
1483 if( plen > psiz && plen > tsiz ) t = 0; /* discontinue underflow */
1484 if( (i=(n/=2)-llen) >= tsiz || !llen ) { /* slosh left */
1485 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1486 k = pi+sizeof(pageId);
1487 memmove(kn=ln+llen,pn+k-lsz,lkdSz); /* move parent to left */
1489 memmove(kn+=lkdSz,rn,i*=lkdSz); /* migrate keys left */
1490 llen += i+lkdSz; kn = rn+i;
1491 if( lbb->right_link() >=0 )
1492 lbb->right_link(readPageId(kn));
1493 writePageId(pn+pi,l); /* change parent key */
1494 memmove(pn+k,kn+lsz,kdSz);
1495 kn += lkdSz; i += lkdSz;
1496 memmove(rn,kn,rlen-=i); /* migrate keys left */
1498 else if( (i=n-rlen) >= tsiz || !rlen ) { /* slosh right */
1499 i /= lkdSz; i *= lkdSz;
1500 memmove(kn=rn+i,rn,rlen);
1501 rlen += i; /* migrate keys right */
1502 writePageId(pn+pi,lbb->right_link());
1503 k = pi+sizeof(pageId);
1504 memmove(kn-=lkdSz,pn+k-lsz,lkdSz); /* move parent key */
1505 i -= lkdSz; n = llen-i;
1506 memmove(rn,kn=ln+n,i);
1507 kn -= lkdSz; /* migrate keys right */
1508 if( lbb->right_link() >=0 )
1509 lbb->right_link(readPageId(kn));
1510 memmove(pn+k,kn+lsz,kdSz); /* change parent key */
1511 writePageId(pn+pi,l);
1516 lastAccess.id = NIL;
1517 lpp->iused(llen); /* update lengths */
1524 * Db::IndexBinary::keyDelete - remove unique key from database
1527 * t int input/output check underflow flag
1528 * kp void * input key image to be removed
1529 * s pageId input current id
1530 * p pageId input parent id
1531 * pbb keyBlock * input parent
1532 * pi int input deletion key offset parent
1534 * returns 0 on success,
1535 * errNotFound if key does not exists in database
1536 * error code otherwise
1539 int Db::IndexBinary::
1540 keyDelete(int &t,void *kp,pageId s,pageId p,keyBlock *pbb,int pi)
1543 keyBlock *sbb; Page *spp; char *sn;
1544 if_err( db->indexRead(s,1,sbb,spp,sn) );
1545 int slen = spp->iused();
1546 t = 1; /* check underflow */
1547 if( idf == 0 ) { /* not interior deletion */
1549 if( sbb->right_link() >= 0 )
1550 lkdSz += sizeof(pageId);
1552 int r = slen / lkdSz;
1553 while( (r - l) > 1 ) { /* binary search for key */
1556 if( sbb->right_link() >= 0 )
1557 k += sizeof(pageId);
1559 int n = compare(this->akey,kn);
1561 if( sbb->right_link() < 0 ) { /* terminal key */
1563 memmove(kn,kn+lkdSz,slen-k);
1564 spp->iused(slen); /* delete key */
1565 lastAccess.id = s; /* lastAccess = key */
1566 lastAccess.offset = sizeof(keyBlock) + k;
1568 else { /* interior key */
1569 k -= sizeof(pageId);
1572 idf = 1; /* interior delete */
1573 if_ret( keyDelete(t,(void *)kn,u,s,sbb,k) );
1577 if( n > 0 ) l = i; else r = i;
1579 if( (u=sbb->right_link()) < 0 ) { /* could not find it */
1583 if( (r *= lkdSz) < slen )
1584 u = readPageId(sn+r);
1585 if_ret( keyDelete(t,kp,u,s,sbb,r) ); /* continue search */
1587 else { /* interior deletion */
1588 if( (u=sbb->right_link()) < 0 ) { /* equivalent exterior key */
1589 int i = slen - kdSz;
1590 char *kn = sn + i; /* translate to interior */
1591 memmove((char *)kp+sizeof(pageId),kn,kdSz);
1592 spp->iused(i); /* remove key */
1594 else { /* continue decending */
1595 if_ret( keyDelete(t,kp,u,s,sbb,slen) );
1599 if( t != 0 ) { /* underflow possible */
1601 t = 0; /* no parent, root */
1603 if_err( keyBlockUnderflow(t,sbb,p,pbb,pi) );
1608 int Db::IndexBinary::
1609 chkDelete(pgRef &loc, void *kp)
1613 ret = chkFind(loc, (char*)kp); // try last delete
1614 if( !ret && lastFind.id != loc.id ) {
1616 ret = chkFind(loc, (char*)kp); // try last find
1618 if( !ret ) return 0;
1619 if( ret == errNotFound ) ret = 0;
1622 keyBlock *sbb; Page *spp; char *sn;
1623 if_err( db->indexRead(s,1,sbb,spp,sn) );
1624 int dlen = spp->iused() - kdSz;
1625 if( dlen < kdSz ) return 0; // at least 1 key must remain
1626 if( !ret ) return errNotFound;
1627 spp->iused(dlen); // delete
1628 int k = loc.offset - sizeof(keyBlock);
1631 memmove(kp,kp+kdSz,dlen-k);
1637 * Db::IndexBinary::Delete - interface to remove unique key
1640 * key void * input key image to be removed
1642 * returns 0 on success,
1643 * errNotFound key does not exists in database
1644 * error code otherwise
1647 int Db::IndexBinary::
1650 if( st->rootPageId == NIL ) Fail(errNotFound);
1651 this->akey = (char *)key;
1653 pageId r = st->rootPageId;
1655 if( CHK cDelCount > 2 ) { // try the easy way
1657 ret = chkDelete(loc, key);
1658 if( ret == errNotFound ) { // in exterior block
1659 r = loc.id; ret = 0;
1662 if( !ret ) { // try the hard way
1663 makeKey(&iky[0],this->akey=(char *)key,st->keySz,0,0);
1664 lastAccess.id = NIL; int t = 1;
1665 (void)r; // use full search, r works but is not traditional
1666 if_fail( keyDelete(t,(void *)&iky[0],/*r*/st->rootPageId,0,0,0) );
1667 if_err( UnmakeRoot() );
1675 * Db::IndexBinary::keyFirst - access leftmost key in tree
1678 * s pageId input current id
1680 * returns 0 on success,
1681 * errNotFound if index is empty
1682 * error code otherwise
1685 int Db::IndexBinary::
1686 keyFirst(pgRef &loc, pageId s)
1690 if_err( db->indexRead(s,0,sbb) );
1691 if( sbb->right_link() < 0 ) break;
1692 char *sn = (char *)(sbb+1);
1697 loc.offset = sizeof(keyBlock);
1702 * Db::IndexBinary::First -- interface to database access leftmost key in tree
1705 * rtnKey void * output resulting key value
1706 * rtnData void * output resulting recd value
1708 * returns 0 on success
1709 * errNotFound on not found,
1710 * error code otherwise
1713 int Db::IndexBinary::
1714 First(void *rtnKey,void *rtnData)
1716 if( st->rootPageId == NIL ) Fail(errNotFound);
1718 if_fail( keyFirst(first, st->rootPageId) );
1720 if_err( db->addrRead_(first,kp) );
1721 if( rtnKey && st->keySz > 0 )
1722 wr_key(kp, (char*)rtnKey,st->keySz);
1723 if( rtnData && st->dataSz > 0 )
1724 memmove(rtnData,kp+st->keySz,st->dataSz);
1726 lastNext = lastAccess = first; }
1731 * Db::IndexBinary::keyLast - access rightmost key in tree
1734 * s pageId input current id
1736 * returns 0 on success,
1737 * errNotFound if index is empty
1738 * error code otherwise
1741 int Db::IndexBinary::
1742 keyLast(pgRef &loc, pageId s)
1746 if_err( db->indexRead(s,0,sbb) );
1747 pageId u = sbb->right_link();
1752 Page *spp = db->get_page(s);
1754 int k = spp->iused() - kdSz;
1755 loc.offset = sizeof(keyBlock) + k;
1760 * Db::IndexBinary::Last -- interface to database access rightmost key in tree
1763 * rtnKey void * output resulting key value
1764 * rtnData void * output resulting recd value
1766 * returns 0 on success
1767 * errNotFound on not found,
1768 * error code otherwise
1771 int Db::IndexBinary::
1772 Last(void *rtnKey,void *rtnData)
1774 if( st->rootPageId == NIL ) Fail(errNotFound);
1776 if_fail( keyLast(last, st->rootPageId) );
1778 if_err( db->addrRead_(last,kp) );
1779 if( rtnKey && st->keySz > 0 )
1780 wr_key(kp, (char*)rtnKey,st->keySz);
1781 if( rtnData && st->dataSz > 0 )
1782 memmove(rtnData,kp+st->keySz,st->dataSz);
1784 lastNext = lastAccess = last; }
1789 * Db::IndexBinary::Modify -- interface to database record modify
1792 * keyDef index input key definition block
1793 * key void * input retreival key image
1794 * recd void * input new recd value
1796 * returns 0 on success
1797 * errNotFound on not found,
1798 * error code otherwise
1801 int Db::IndexBinary::
1802 Modify(void *key,void *recd)
1804 if_fail( Find(key,0) );
1806 if_err( db->addrWrite_(lastAccess,bp) );
1807 memmove(bp+st->keySz,recd,st->dataSz);
1811 int Db::IndexBinary::
1812 chkNext(pgRef &loc, char *&kp)
1815 if( s < 0 ) return 0; // must be valid
1816 keyBlock *sbb; Page *spp; char *sn;
1817 if_err( db->indexRead(s,0,sbb,spp,sn) );
1818 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1819 int k = loc.offset - sizeof(keyBlock);
1820 if( k < 0 || k >= spp->iused() ) return 0; // curr must be in block
1821 if( (k+=kdSz) >= spp->iused() ) return 0; // next must be in block
1823 loc.offset = sizeof(keyBlock) + k;
1827 int Db::IndexBinary::
1828 keyNext(pgRef &loc, char *kp)
1830 if_fail( keyLocate(loc,st->rootPageId, keyGT,kp,compare) );
1835 * Db::IndexBinary::Next -- interface to sequential database key retreival
1838 * loc pgRef & input last known retreival key
1839 * rtnKey void * output resulting key value
1840 * rtnData void * output resulting recd value
1842 * returns 0 on success
1843 * error code otherwise
1846 int Db::IndexBinary::
1847 Next(pgRef &loc,void *rtnKey,void *rtnData)
1849 if( st->rootPageId == NIL ) Fail(errNotFound);
1851 int ret = CHK chkNext(loc,kp); // try the easy way
1855 switch( st->key_type ) {
1857 ky = (char *)rtnKey;
1859 case ktyBin: case ktyDir:
1860 if_err( db->addrRead_(loc,ky) );
1863 if_ret( keyNext(loc, ky) ); // try the hard way
1865 if_err( db->addrRead_(loc,kp) );
1866 if( rtnKey && st->keySz > 0 )
1867 wr_key(kp, (char*)rtnKey,st->keySz);
1868 if( rtnData && st->dataSz > 0 )
1869 memmove(rtnData,kp+st->keySz,st->dataSz);
1875 void Db::IndexBinary::
1883 IndexBinary(Db *zdb, int zidx, int ksz, int dsz, CmprFn cmpr)
1884 : IndexBase(zdb, idxBin, zidx, ksz, dsz)
1887 bst = new(st+1) IndexBinaryStorage(zdb->findCmprFn(compare));
1888 iky = new char[st->blockSize/2+1];
1889 tky = new char[st->blockSize/2+1];
1894 IndexBinary(Db *zdb, IndexBaseStorage *b, IndexBinaryStorage *d)
1895 : IndexBase(zdb, *b)
1897 bst = new(d) IndexBinaryStorage();
1898 compare = cmprFns[bst->cmprId];
1899 iky = new char[st->blockSize/2+1];
1900 tky = new char[st->blockSize/2+1];
1905 IndexBinary(IndexBase *ib, IndexBaseStorage *b, IndexBinaryStorage *d)
1906 : IndexBase(ib->db, *b)
1908 bst = new(d) IndexBinaryStorage();
1909 compare = cmprFns[bst->cmprId];
1921 int Db::IndexString::
1922 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
1925 keyBlock *sbb; Page *spp; char *sn;
1926 if_err( db->indexRead(s,0,sbb,spp,sn) );
1927 unsigned char *lp = (unsigned char *)sn;
1928 unsigned char *rp = lp + spp->iused();
1929 if( (r=sbb->right_link()) >= 0 ) {
1931 pageId l = getPageId(lp);
1932 lp += st->dataSz; ++lp;
1933 while( *lp++ != 0 );
1934 if_ret( keyMap(l,fn) );
1936 if_ret( keyMap(r,fn) );
1938 if_err( (this->*fn)(s) );
1943 * Db::IndexString::kpress -- compresses kp with prefix
1946 * kp unsigned char * input key string to compress
1947 * lp unsigned char * input left prefix
1948 * cp unsigned char * output compressed string
1950 * returns length of compressed string cp
1951 * safe to kpress buffer in place over cp or lp
1954 int Db::IndexString::
1955 kpress(unsigned char *kp, unsigned char *lp, unsigned char *cp)
1959 while( *kp == *lp ) {
1963 ch = *kp++; *cp++ = n;
1971 * divide tbfr length n into sectors l and s with rlink r
1972 * if i >= 0 then the insert point is i
1974 * promoted key in tky, return left sector
1977 int Db::IndexString::
1978 split(int n, int i, pageId s, pageId &l, pageId r)
1980 unsigned char lky[keysz];
1981 unsigned char *bp = &tbfr[0];
1982 unsigned char *lp = bp;
1983 unsigned char *rp = bp + n;
1984 unsigned char *kp = lp;
1985 unsigned char *tp = 0;
1986 pageId t = NIL, u = NIL;
1987 keyBlock *lbb; Page *lpp; char *ln;
1989 db->indexRead(l,1,lbb,lpp,ln) :
1990 blockAllocate(l,lbb,lpp,ln) );
1992 int blen = lpp->iallocated()-1 - st->dataSz;
1993 if( r >= 0 ) blen -= sizeof(pageId);
1994 if( n > blen ) n = blen;
1995 /* split point is middle, or end if inserting consecutive on rhs */
1997 if( i >= 0 && cInsCount > 2 && s == st->rightHandSide )
1999 unsigned char *mp = lp + promoted;
2000 // get promoted key in lky
2003 // expand key to lky
2004 if( r >= 0 ) u = getPageId(lp);
2005 tp = lp; lp += st->dataSz;
2006 for( i=*lp++; (lky[i++]=*lp++) != 0; );
2007 // stop copy if past mid pt and
2008 // remaining bytes + expanded key bytes fit in block
2010 if( kp != &tbfr[0] && lp >= mp && rlen+i <= blen )
2012 // copy promoted data/key
2013 unsigned char *tkp = tky;
2014 umemmove(tkp, tp, st->dataSz);
2016 // save end of left block, move to next key
2017 kp = bp; bp = lp; t = u;
2021 // must be at least 3 keys in tbfr (left,parent,right)
2022 if( !n || bp >= rp ) Err(errCorrupt);
2023 memmove(ln,&tbfr[0],n);
2026 // add first key in next block, order of moves important
2027 // memmove may be move up since key may expand past split
2029 if( r >= 0 ) putPageId(mp,u);
2030 umemmove(mp, tp, st->dataSz); // data recd
2031 *mp++ = 0; // first prefix is zero length
2032 memmove(mp+i, lp, rlen); // rest of block
2033 memmove(mp, &lky[0], i); // expanded key
2035 int slen = mp - &tbfr[0];
2037 * if root is splitting, allocate new right sibling
2038 * and set root to contain only new parent key.
2040 keyBlock *sbb; Page *spp; char *sn;
2041 if_err( db->indexRead(s,1,sbb,spp,sn) );
2042 if( s == st->rootPageId ) {
2043 keyBlock *rbb; Page *rpp; char *rn;
2044 if_err( blockAllocate(u,rbb,rpp,rn) );
2047 memmove(rn,&tbfr[0],slen);
2049 if( st->rightHandSide == s ) st->rightHandSide = r;
2050 mp = (unsigned char *)sn;
2052 umemmove(mp, kp=&tky[0], st->dataSz);
2054 for( *mp++=0; (*mp++=*kp++)!=0; );
2055 slen = mp - (unsigned char *)sn;
2058 memmove(sn, &tbfr[0], slen);
2064 int Db::IndexString::
2065 chkInsert(void *key, void *data)
2067 unsigned char *ky = (unsigned char *)key;
2068 pageId s = lastInsert.id;
2069 if( s < 0 ) return 0; // must be valid
2070 int n = ustrcmp(&lastInsKey[0],ky);
2071 if( !n ) Fail(errDuplicate);
2072 keyBlock *sbb; Page *spp; char *sn;
2073 if_err( db->indexRead(s,0,sbb,spp,sn) );
2074 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2075 int slen = spp->iused();
2076 int k = lastInsert.offset - sizeof(keyBlock);
2077 if( k < 0 || k >= slen ) return 0; // must be inside block
2078 unsigned char *bp = (unsigned char *)sn;
2079 unsigned char *lp = bp;
2080 unsigned char *rp = bp + k; // beginning to current
2081 unsigned char *tp = bp + slen;
2082 unsigned char nky[keysz]; nky[0] = 0;
2083 if( n < 0 ) { // past here
2084 ustrcpy(&nky[0],&lastInsKey[0]);
2087 else { // before here
2088 n = ustrcmp(bp+st->dataSz+1,ky);
2089 if( !n ) Fail(errDuplicate);
2090 if( n > 0 ) return 0; // before first
2091 unsigned char rb; rp += st->dataSz; // move past curr data
2092 for( int i=*rp++; (rb=*rp++) == lastInsKey[i] && rb != 0; ++i );
2093 if( rb ) return 0; // must match curr
2095 unsigned char lky[keysz]; lky[0] = 0;
2096 unsigned char *kp; n = 0;
2097 while( (kp=lp) < rp ) {
2098 lp += st->dataSz; // scan next
2099 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2100 n = ustrcmp(ky,&nky[0]);
2101 if( !n ) Fail(errDuplicate);
2102 if( n < 0 ) break; // btwn last,next
2103 ustrcpy(&lky[0],&nky[0]);
2105 if( lp >= tp && s != st->rightHandSide ) return 0;// not found, not rhs
2106 // recompress and compute storage demand
2107 int llen = kp - (unsigned char *)sn; // left
2108 int lsz = kpress(ky, &lky[0], &lky[0]); // lky = kpress new key
2109 int mlen = st->dataSz + lsz;
2110 int klen = mlen; // new key demand
2112 if( kp < rp ) { // if next key exists
2113 nsz = kpress(&nky[0], ky, &nky[0]);
2114 mlen += st->dataSz + nsz; // new+next key demand
2116 int rlen = tp - lp; // right
2117 int blen = llen + mlen + rlen; // left + demand + right
2119 if( blen > spp->iallocated() ) return 0; // if insertion wont fit
2121 spg->set_flags(fl_wr);
2122 if( kp < rp ) { // insert not at end of block
2123 memmove(kp+mlen, lp, rlen); // move right up
2124 memmove(kp+klen, kp, st->dataSz); // move next data up
2125 memmove(kp+klen+st->dataSz,&nky[0],nsz); // kpress next key
2127 k = kp - (unsigned char *)sn;
2128 lastAccess.id = lastInsert.id;
2129 lastAccess.offset = sizeof(keyBlock) + k;
2130 ustrcpy(&lastAccKey[0],ky);
2131 umemmove(kp,(unsigned char *)data,st->dataSz); // insert new key
2132 umemmove(kp,&lky[0],lsz);
2138 * insert - add node to compressed b-tree.
2139 * entry - tky - data/key to add
2140 * s - root of tree/subtree
2141 * t - NIL/discontinue or tky->left_link
2143 int Db::IndexString::
2144 keyInsert(pageId &t, pageId s)
2146 unsigned char lky[keysz]; // last key
2147 unsigned char nky[keysz]; // next key
2148 keyBlock *sbb; Page *spp; char *sn;
2149 if_err( db->indexRead(s,1,sbb,spp,sn) );
2150 t = NIL; // set discontinue insertion
2152 pageId r = sbb->right_link();
2154 int n = spp->iused();
2155 unsigned char *lp = (unsigned char *)sn; // rest of block
2156 unsigned char *rp = lp + n; // end of block
2157 unsigned char *kp = 0; // insertion point
2158 unsigned char *tkp = &tky[st->dataSz]; // tky = data/key
2160 while( (kp=lp) < rp ) { // search
2161 if( r >= 0 ) l = getPageId(lp);
2163 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2164 n = ustrcmp(&tkp[0], &nky[0]);
2166 if( n == 0 ) Fail(errDuplicate);
2169 // if non-terminal block, continue search at lower levels
2170 // if lower level splits( t>=0 ), process insertion
2172 if_ret( keyInsert(t, kp>=rp ? r : l) );
2173 if( t < 0 ) return 0; // discontinue
2176 // recompress and compute storage demand
2177 int llen = kp - (unsigned char *)sn; // left
2178 int lsz = kpress(tkp, &lky[0], &lky[0]); // lky = kpress new key
2179 int dsz = st->dataSz;
2180 if( r >= 0 ) dsz += sizeof(pageId); // link+data size
2181 int mlen = dsz + lsz;
2182 int klen = mlen; // new key demand
2184 if( kp < rp ) { // if next key exists
2185 nsz = kpress(&nky[0], &tkp[0], &nky[0]);
2186 mlen += dsz + nsz; // new+next key demand
2188 int rlen = rp - lp; // right
2189 int blen = llen + mlen + rlen; // left + demand + right
2191 if( blen <= spp->iallocated() ) { // if insertion will fit
2192 if( kp < rp ) { // insert not at end of block
2193 memmove(kp+mlen, lp, rlen); // move right up
2194 memmove(kp+klen, kp, dsz); // move next link/data up
2195 memmove(kp+klen+dsz,&nky[0],nsz); // kpress next key
2197 if( r >= 0 ) putPageId(kp, t);
2198 if( lastAccess.id < 0 ) { // update lastAccess
2200 int k = kp - (unsigned char *)sn;
2201 lastAccess.offset = sizeof(keyBlock) + k;
2202 ustrcpy(&lastAccKey[0],&tkp[0]);
2204 umemmove(kp,&tky[0],st->dataSz); // insert new key
2205 memmove(kp,&lky[0],lsz);
2210 unsigned char *bp = &tbfr[0]; // overflowed, insert to tbfr
2211 umemmove(bp,(unsigned char *)sn,llen);
2212 if( r >= 0 ) putPageId(bp, t);
2213 umemmove(bp,&tky[0],st->dataSz); // insert new key
2214 umemmove(bp,&lky[0],lsz);
2215 if( kp < rp ) { // insert not at end of block
2216 umemmove(bp,kp,dsz);
2217 umemmove(bp,&nky[0],nsz); // kpress next key
2218 umemmove(bp,lp,rlen); // copy rest of block
2221 if_err( split(blen,llen,s,t,r) ); // split block, and continue
2226 int Db::IndexString::
2227 Insert(void *key,void *data)
2229 if_err( MakeRoot() );
2231 if( CHK cInsCount > 2 ) { // try the easy way
2232 ret = chkInsert(key,data);
2235 if( !ret ) { // try the hard way
2236 unsigned char *tp = &tky[0];
2237 umemmove(tp,(unsigned char *)data,st->dataSz);
2238 ustrcpy(tp,(unsigned char*)key);
2239 pageId t = NIL; lastAccess.id = NIL;
2240 if_ret( keyInsert(t, st->rootPageId) );
2242 ustrcpy(&lastInsKey[0],&lastAccKey[0]);
2249 int Db::IndexString::
2253 if_err( db->indexRead(s,0,sbb) );
2254 char *sn = (char *)(sbb+1);
2256 while( sbb->right_link() >= 0 ) {
2258 if_err( db->indexRead(s,0,sbb) );
2259 sn = (char *)(sbb+1);
2263 lastAccess.offset = sizeof(keyBlock);
2264 unsigned char *kp = (unsigned char *)sn;
2265 ustrcpy(&lastAccKey[0],kp+st->dataSz+1);
2269 int Db::IndexString::
2270 First(void *rtnKey,void *rtnData)
2272 if( st->rootPageId == NIL ) Fail(errNotFound);
2273 if_ret( keyFirst(st->rootPageId) );
2275 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2278 if_err( db->addrRead_(lastAccess,kp) );
2279 memmove(rtnData,kp,st->dataSz);
2281 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2282 lastNext = lastAccess;
2286 int Db::IndexString::
2290 if_err( db->indexRead(s,0,sbb) );
2293 while( (u=sbb->right_link()) >= 0 ) {
2294 if_err( db->indexRead(s=u,0,sbb) );
2297 unsigned char lky[keysz];
2298 Page *spp = db->get_page(s);
2299 char *sn = (char *)(sbb+1);
2300 unsigned char *lp = (unsigned char *)sn;
2301 unsigned char *rp = lp + spp->iused();
2302 unsigned char *kp = 0;
2306 kp = lp; lp += st->dataSz;
2307 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2310 if( !kp ) Fail(errNotFound);
2311 int k = kp - (unsigned char *)sn;
2313 lastAccess.offset = sizeof(keyBlock) + k;
2314 ustrcpy(&lastAccKey[0],&lky[0]);
2318 int Db::IndexString::
2319 Last(void *rtnKey,void *rtnData)
2321 if( st->rootPageId == NIL ) Fail(errNotFound);
2322 if_ret( keyLast(st->rootPageId) );
2324 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2327 if_err( db->addrRead_(lastAccess,kp) );
2328 memmove(rtnData,kp,st->dataSz);
2330 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2331 lastNext = lastAccess;
2335 int Db::IndexString::
2336 chkFind(pgRef &loc, char *key, unsigned char *lkey, unsigned char *lkp)
2339 if( s < 0 ) return 0; // must be valid block
2340 keyBlock *sbb; Page *spp; char *sn;
2341 if_err( db->indexRead(s,0,sbb,spp,sn) );
2342 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2343 int slen = spp->iused();
2344 int k = loc.offset - sizeof(keyBlock);
2345 if( k < 0 || k >= slen ) return 0; // must be inside/end of block
2346 unsigned char *ky = (unsigned char *)key;
2347 unsigned char *bp = (unsigned char *)sn;
2348 unsigned char *kp = bp + k;
2349 unsigned char *rp = bp + slen;
2350 unsigned char *lp = kp; kp += st->dataSz;
2351 unsigned char *ip = &lkey[*kp++];
2352 while( kp<rp && *kp!=0 && *ip==*kp ) { ++ip; ++kp; }
2353 if( *ip || *kp++ ) return 0; // must match curr
2354 int n = ustrcmp(&lkey[0], ky);
2355 if( !n && !lkp ) goto xit; // found here, and no last key
2356 unsigned char lky[keysz];
2358 if( n > 0 ) { // before here
2359 rp = lp; lp = kp = bp;
2360 ip = lp + st->dataSz;
2361 if( *ip++ ) Err(errCorrupt);
2362 if( (n=ustrcmp(ip, ky)) > 0 ) return 0; // before first
2363 if( !n ) { lky[0] = 0; goto xit; } // found here, first
2365 ustrcpy(&lky[0], ip);
2367 lp = kp; kp += st->dataSz;
2368 unsigned char nky[keysz];
2369 ustrcpy(&nky[0], &lky[0]);
2370 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2371 n = ustrcmp(ky, &nky[0]);
2372 if( !n ) goto xit; // found here
2373 if( n < 0 ) Fail(errNotFound); // btwn prev,next
2374 ustrcpy(&lky[0], &nky[0]);
2376 return 0; // not in block
2378 if( lkp ) ustrcpy(lkp, &lky[0]);
2380 loc.offset = sizeof(keyBlock) + k;
2384 int Db::IndexString::
2385 keyFind(pgRef &loc,unsigned char *ky)
2387 unsigned char nky[keysz];
2389 for( pageId s=st->rootPageId; s>=0; ) {
2390 keyBlock *sbb; Page *spp; char *sn;
2391 if_err( db->indexRead(s,0,sbb,spp,sn) );
2392 unsigned char *lp = (unsigned char *)sn;
2393 unsigned char *rp = lp + spp->iused();
2395 pageId r = sbb->right_link();
2397 if( r >= 0 ) l = getPageId(lp);
2398 unsigned char *kp = lp;
2400 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2401 int n = ustrcmp(ky, &nky[0]);
2404 int k = kp - (unsigned char *)sn;
2405 loc.offset = sizeof(keyBlock) + k;
2408 if( n < 0 ) { r = l; break; }
2416 int Db::IndexString::
2417 refFind(pgRef &loc, void *key)
2419 if( st->rootPageId == NIL ) Fail(errNotFound);
2423 if( CHK cFindCount > 2 ) ret = 1; }
2424 if( ret ) { // try the easy way
2425 ret = chkFind(loc,(char *)key,&lastFndKey[0]);
2428 if( !ret ) { // try the hard way
2429 if_fail( keyFind(loc, (unsigned char *)key) );
2433 ustrcpy(&lastAccKey[0],(unsigned char *)key);
2434 ustrcpy(&lastFndKey[0],&lastAccKey[0]);
2435 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2439 int Db::IndexString::
2440 Find(void *key, void *rtnData)
2443 if_fail( refFind(last, key) );
2445 if( !db->addrRead_(last,kp) ) {
2447 memmove(rtnData,kp,st->dataSz);
2454 * remap sectors on underflow
2455 * s - parent sector, t - pageId if overflowed
2456 * k - parent key offset
2457 * updates tky with parent data/key
2460 int Db::IndexString::
2461 keyUnderflow(pageId s, pageId &t, int k)
2463 unsigned char lky[keysz], nky[keysz];
2464 keyBlock *sbb; Page *spp; char *sn;
2465 if_err( db->indexRead(s,1,sbb,spp,sn) );
2466 unsigned char *lp = (unsigned char *)sn;
2467 unsigned char *rp = lp + spp->iused();
2468 unsigned char *kp = lp + k;
2469 unsigned char *mp = &tbfr[0];
2470 // mark continue underlow
2472 pageId r = sbb->right_link();
2473 lky[0] = nky[0] = 0;
2474 // if underflow, copy to parent key offset
2476 putPageId(mp,getPageId(lp));
2477 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2478 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2480 // read l, key, r -- remove key from parent block
2481 unsigned char *tp = &tky[0];
2482 pageId l = getPageId(lp);
2483 umemmove(tp,lp,st->dataSz); lp += st->dataSz;
2484 ustrcpy(tp,&lky[0]);
2485 for( int i=*lp++; (tp[i]=*lp++) != 0; ++i );
2487 putPageId(mp, r=getPageId(lp));
2488 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2489 ustrcpy(&nky[0],tp);
2490 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2491 mp += kpress(&nky[0],&lky[0],mp);
2492 umemmove(mp,lp,rp-lp);
2494 int n = mp-&tbfr[0];
2495 memmove(sn,&tbfr[0],n);
2498 keyBlock *lbb; Page *lpp; char *ln;
2499 if_err( db->indexRead(l,1,lbb,lpp,ln) );
2500 lp = (unsigned char *)ln;
2501 rp = lp + lpp->iused();
2502 pageId m = lbb->right_link();
2503 mp = &tbfr[0]; nky[0] = 0;
2505 if( m >= 0 ) putPageId(mp,getPageId(lp));
2506 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2507 for( int i=*mp++=*lp++; (nky[i]=*mp++=*lp++) != 0; ++i );
2509 // parent key to tbfr
2510 if( m >= 0 ) putPageId(mp,m);
2511 umemmove(mp,&tky[0],st->dataSz);
2512 mp += kpress(tp,&nky[0],mp);
2514 keyBlock *rbb; Page *rpp; char *rn;
2515 if_err( db->indexRead(r,1,rbb,rpp,rn) );
2516 lp = (unsigned char *)rn;
2517 rp = lp + rpp->iused();
2518 m = rbb->right_link();
2520 if( m >= 0 ) putPageId(mp,getPageId(lp));
2521 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2522 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2523 mp += kpress(&nky[0],tp,mp);
2524 umemmove(mp,lp,rp-lp);
2527 if( n > rpp->iallocated() ) {
2528 // tbfr too big for r
2529 if_err( split(n, -1, r, l, m) );
2530 t = l; // start overflow
2533 if( s == st->rootPageId && !spp->iused() ) {
2534 // if root, delete r
2535 memmove(sn,&tbfr[0],n);
2538 if_err( blockFree(r) );
2539 st->rightHandSide = s;
2542 // update r with tbfr
2543 memmove(rn,&tbfr[0],n);
2547 if_err( blockFree(l) );
2553 * remap sectors on overflow
2554 * s - parent sector, k - parent key offset, o - insertion offset
2555 * t parent link on input,
2556 * t == DDONE on output, end overflow
2557 * tky with parent data/key
2559 int Db::IndexString::
2560 keyOverflow(pageId s, pageId &t, int k, int o)
2562 unsigned char lky[keysz], nky[keysz];
2563 keyBlock *sbb; Page *spp; char *sn;
2564 if_err( db->indexRead(s,1,sbb,spp,sn) );
2565 unsigned char *lp = (unsigned char *)sn;
2566 unsigned char *rp = lp + spp->iused();
2567 unsigned char *kp = lp + k;
2568 unsigned char *mp = &tbfr[0];
2569 pageId r = sbb->right_link();
2570 lky[0] = nky[0] = 0;
2571 // copy parent up to parent key to tbfr
2573 putPageId(mp,getPageId(lp));
2574 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2575 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2577 // if at insertion point, add new key
2580 unsigned char *tp = &tky[0];
2581 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2582 mp += kpress(kp=tp, &lky[0], mp);
2586 // copy parent key, if insertion - add tky, copy rest
2588 ustrcpy(&nky[0],kp);
2589 putPageId(mp,getPageId(lp));
2590 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2591 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2592 mp += kpress(&lky[0],&nky[0],mp);
2595 unsigned char *tp = &tky[0];
2596 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2597 mp += kpress(tp, &lky[0], mp);
2598 ustrcpy(&lky[0],tp);
2601 putPageId(mp,getPageId(lp));
2602 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2603 ustrcpy(&nky[0],&lky[0]);
2604 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2605 mp += kpress(&lky[0],&nky[0],mp);
2607 umemmove(mp,lp,rp-lp);
2609 int n = mp - &tbfr[0];
2610 // if overflow, split sector and promote new parent key
2611 if( n > spp->iallocated() ) {
2612 t = NIL; lastAccess.id = NIL;
2613 if_ret( split(n, -1, s, t, r) );
2619 memmove(sn,&tbfr[0],n);
2624 int Db::IndexString::
2625 keyRemap(pageId s, pageId &t, int k, int o)
2628 if_err( keyUnderflow(s,t,k) );
2632 if_err( keyOverflow(s,t,k,o) );
2637 * delete or replace key
2638 * s - starting sector, nky - replacement key if != 0
2639 * t - returned state -2/done,-1/underflow,pageid/overflow
2642 int Db::IndexString::
2643 keyDelete(pageId s, pageId &t)
2645 unsigned char lky[keysz], nky[keysz];
2646 unsigned char *ky = &akey[0];
2648 keyBlock *sbb; Page *spp; char *sn;
2649 if_err( db->indexRead(s,1,sbb,spp,sn) );
2650 unsigned char *bp = (unsigned char *)sn;
2651 unsigned char *lp = bp;
2652 unsigned char *rp = lp + spp->iused();
2653 unsigned char *kp = lp;
2654 unsigned char *mp = 0;
2657 pageId r = sbb->right_link();
2658 lky[0] = nky[0] = 0;
2659 // mark delete done and search
2661 while( (kp=lp) < rp ) {
2663 if( r >= 0 ) l = getPageId(lp);
2665 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2666 if( idf <= 0 && (n=ustrcmp(ky,&nky[0])) <= 0)
2668 ustrcpy(&lky[0],&nky[0]);
2672 if( !idf ) Fail(errNotFound);
2673 // found greatest node less than ky, delete and return with underflow
2674 // deleted key must be on rt side of block to be next to interior key
2675 unsigned char *dp = &dky[0];
2676 umemmove(dp,mp,st->dataSz);
2677 ustrcpy(dp,&nky[0]);
2679 t = NIL; // start underflow
2682 // not found in block, try next level
2683 int status = keyDelete(kp<rp ? l : r,t);
2686 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2689 else if( r < 0 || idf < 0 ) { // found here
2692 int dsz = st->dataSz;
2693 if( r >= 0 ) dsz += sizeof(pageId);
2694 unsigned char *tp = &lky[0];
2696 if( idf < 0 ) { // translating data/key
2697 lsz = kpress(&dky[st->dataSz],tp,tp); // kpress dky to lky
2699 tp = &dky[st->dataSz];
2702 unsigned char *np = lp;
2705 lp += dsz; // get next key
2706 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2707 if( r < 0 ) ustrcpy(&lastAccKey[0],&nky[0]);// new curr key
2708 nsz = kpress(&nky[0],tp,&nky[0]);
2712 int slen = llen + mlen + rlen;
2713 unsigned char *sp = &tbfr[0];
2715 if( (llen > 0 || rlen > 0) && // at least 3 data/keys
2716 slen <= spp->iallocated() ) { // and fits in block
2717 sp = bp; mp = kp; // edit in place
2720 umemmove(mp,bp,llen); // use tbfr, copy left
2721 if( np < rp ) { // next exists
2723 unsigned char *ip = mp;
2724 if( idf < 0 ) ip += dsz + lsz;
2725 if( sp == bp && tp > lp ) {
2726 memmove(tp,lp,rlen); // up copy/move right
2727 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2728 memmove(ip,&nky[0],nsz);
2731 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2732 memmove(ip,&nky[0],nsz);
2734 memmove(tp,lp,rlen); // down copy/move right
2737 if( idf < 0 ) { // move exterior key
2738 if(r >= 0) putPageId(mp,getPageId(kp));
2739 umemmove(mp,&dky[0],st->dataSz); // add dky data/key
2740 umemmove(mp,&lky[0],lsz);
2742 if( sp == bp ) { // in place
2746 lastAccess.id = s; // position to curr
2747 lastAccess.offset = sizeof(keyBlock) + llen;
2748 ustrcpy(&lastAccKey[0],ky);
2753 if( slen > spp->iallocated() ) {
2754 if_ret( split(slen, -1, s, t, r) ); // continue with overflow
2757 memmove(sn,&tbfr[0],slen); // continue with underflow
2763 // deleting nonterminal key, translate to greatest node less than ky
2765 int status = keyDelete(l,t);
2768 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2774 int Db::IndexString::
2777 if( st->rootPageId == NIL ) Fail(errNotFound);
2778 unsigned char lky[keysz]; lky[0] = 0;
2779 pgRef *last = &lastDelete;
2780 unsigned char *lkey = &lastDelKey[0];
2782 if( CHK cDelCount > 2 ) { // try the easy way
2783 if( lastFind.id >= 0 ) { // chk find/delete
2784 if( !ustrcmp((unsigned char *)key,&lastFndKey[0]) ) {
2785 last = &lastFind; lkey = &lastFndKey[0];
2788 ret = chkFind(*last,(char *)key,lkey,&lky[0]);
2792 pageId s = lastAccess.id;
2793 keyBlock *sbb; Page *spp; char *sn;
2794 if_err( db->indexRead(s,1,sbb,spp,sn) );
2795 unsigned char *bp = (unsigned char *)sn;
2796 int slen = spp->iused();
2797 int llen = lastAccess.offset - sizeof(keyBlock);
2798 unsigned char *kp = bp + llen; // curr data/key
2799 unsigned char *lp = kp;
2800 unsigned char *rp = bp + slen;
2802 lp += st->dataSz; ++lp; while( *lp++ ); // skip curr
2806 unsigned char *np = lp;
2807 unsigned char nky[keysz]; nky[0] = 0;
2809 lp += st->dataSz; // get next key
2810 ustrcpy(&nky[0],(unsigned char *)key);
2811 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2812 nsz = kpress(&nky[0],&lky[0],&lky[0]);
2813 mlen += st->dataSz + nsz;
2816 slen = llen + mlen + rlen;
2817 if( (llen > 0 || rlen > 0 ) && // at least 3 data/keys
2818 slen <= spp->iallocated() ) { // and fits in block
2819 if( np < rp ) { // right exists
2820 unsigned char *tp = kp + mlen;
2821 umemmove(kp,np,st->dataSz); // next data/key
2822 memmove(kp,&lky[0],nsz);
2823 if( tp != lp && rlen > 0 )
2824 memmove(tp,lp,rlen); // move right down
2827 ustrcpy(&lastAccKey[0],&nky[0]); // new curr key
2832 if( !ret ) { // try the hard way
2833 ustrcpy(&this->akey[0],(unsigned char*)key);
2834 lastAccess.id = NIL;
2835 pageId t = NIL; idf = 0;
2836 if_ret( keyDelete(st->rootPageId,t) );
2839 if_ret( keyDelete(st->rootPageId,t) );
2841 if_err( UnmakeRoot() );
2843 ustrcpy(&lastDelKey[0],&lastAccKey[0]);
2849 int Db::IndexString::
2850 keyLocate(pgRef &last,pageId s, int &t, int op,
2851 unsigned char *ky, CmprFn cmpr, unsigned char *rky)
2853 unsigned char lky[keysz], nky[keysz];
2854 keyBlock *sbb; Page *spp; char *sn;
2855 if_err( db->indexRead(s,0,sbb,spp,sn) );
2857 pageId r = sbb->right_link();
2858 unsigned char *lp = (unsigned char *)sn;
2859 unsigned char *rp = lp + spp->iused();
2860 unsigned char *kp = 0, *mp = 0;
2861 lky[0] = nky[0] = 0;
2864 while( (kp=lp) < rp ) {
2865 if( r >= 0 ) l = getPageId(lp);
2866 kp = lp; lp += st->dataSz;
2867 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2868 int n = cmpr((char *)ky,(char *)&nky[0]);
2869 if( op <= keyEQ ? n <= 0 : n < 0 ) break;
2870 ustrcpy(&lky[0],&nky[0]);
2875 int status = keyLocate(last, (kp<rp ? l : r), t, op, ky, cmpr, rky);
2876 if( !t && status == errNotFound ) status = 0;
2881 if( op == keyLT || op == keyGE ) {
2882 if( !mp ) Fail(errNotFound);
2884 ustrcpy(rky,&lky[0]);
2887 if( kp >= rp ) Fail(errNotFound);
2888 ustrcpy(rky,&nky[0]);
2891 int k = kp - (unsigned char *)sn;
2892 last.offset = sizeof(keyBlock) + k;
2899 int Db::IndexString::
2900 refLocate(pgRef &loc, int op,void *key,CmprFn cmpr, unsigned char *rkey)
2902 if( st->rootPageId == NIL ) Fail(errNotFound);
2903 if( op == keyEQ ) op = keyLE;
2904 if( !cmpr ) cmpr = cmprStr;
2906 if_fail( keyLocate(loc,st->rootPageId,t,op,(unsigned char*)key,cmpr, rkey) );
2909 ustrcpy(&lastAccKey[0], rkey);
2910 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2914 int Db::IndexString::
2915 Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData)
2917 pgRef last; uint8_t rkey[keysz];
2918 if_fail( refLocate(last, op, key, cmpr, rkey) );
2920 ustrcpy((unsigned char *)rtnKey, rkey);
2923 if_err( db->addrRead_(last,kp) );
2924 memmove(rtnData,kp,st->dataSz);
2930 int Db::IndexString::
2931 Modify(void *key,void *recd)
2933 if_fail( Find(key,0) );
2935 if_err( db->addrWrite_(lastAccess,bp) );
2936 memmove(bp,recd,st->dataSz);
2940 int Db::IndexString::
2941 keyNext(pgRef &loc, unsigned char *rky)
2943 unsigned char lky[keysz]; lky[0] = 0;
2945 keyBlock *sbb; Page *spp; char *sn;
2946 if_err( db->indexRead(s,0,sbb,spp,sn) );
2947 unsigned char *bp = (unsigned char *)sn;
2948 int k = loc.offset - sizeof(keyBlock);
2949 unsigned char *lp = bp;
2950 unsigned char *kp = bp + k;
2951 unsigned char *rp = bp + spp->iused();
2952 if( kp >= rp ) Err(errInvalid);
2953 pageId r = sbb->right_link();
2954 if( &loc != &lastNext ) { // find last key
2955 while( lp <= kp ) { // until past last
2956 if( r >= 0 ) lp += sizeof(pageId);
2957 bp = lp; lp += st->dataSz;
2958 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2962 ustrcpy(&lky[0],&lastNxtKey[0]);
2963 lp = kp; lp += st->dataSz; // verify lastNext
2964 unsigned char *ip = &lky[*lp++];
2965 while( lp<rp && *lp!=0 && *ip==*lp ) { ++ip; ++lp; }
2966 if( *ip || *lp++ ) Fail(errInvalid); // bad lastNxtKey
2968 if( r < 0 && lp < rp ) { // exterior and more data
2970 bp = lp; lp += st->dataSz;
2971 for( int i=*lp++; lp<rp && (rky[i]=*lp) != 0; ++i,++lp );
2972 if( *lp ) Err(errCorrupt);
2973 loc.offset = (bp-(unsigned char *)sn) + sizeof(keyBlock);
2977 if_fail( keyLocate(loc,st->rootPageId,t,keyGT,lky,cmprStr, rky) );
2981 int Db::IndexString::
2982 Next(pgRef &loc,void *rtnKey,void *rtnData)
2984 if( st->rootPageId == NIL ) Fail(errNotFound);
2985 unsigned char rky[keysz];
2986 if_ret( keyNext(loc, rky) );
2988 ustrcpy(&lastAccKey[0], rky);
2990 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2993 if_err( db->addrRead_(loc,kp) );
2994 memmove(rtnData,kp,st->dataSz);
2996 if( &loc == &lastNext )
2997 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2998 lastAccess = lastNext = loc; }
3002 void Db::IndexString::
3008 IndexString(Db *zdb, int zidx, int dsz)
3009 : IndexBase(zdb, idxStr, zidx, 0, dsz)
3011 sst = new(st+1) IndexStringStorage();
3012 dky = new unsigned char[st->dataSz+keysz+1];
3013 tky = new unsigned char[st->dataSz+keysz+1];
3014 tbfr = new unsigned char[3*st->blockSize];
3019 IndexString(Db *zdb, IndexBaseStorage *b, IndexStringStorage *d)
3020 : IndexBase(zdb, *b)
3023 dky = new unsigned char[st->dataSz+keysz+1];
3024 tky = new unsigned char[st->dataSz+keysz+1];
3025 tbfr = new unsigned char[3*st->blockSize];
3030 IndexString(IndexBase *ib, IndexBaseStorage *b, IndexStringStorage *d)
3031 : IndexBase(ib->db, *b)
3033 sst = new(d) IndexStringStorage();
3048 * Db::IndexBase::Clear - clear index
3052 * returns 0 on success,
3053 * error code otherwise
3059 while( st->freeBlocks >= 0 ) {
3060 keyBlock *kbb; pgRef loc;
3061 pageId id = st->freeBlocks;
3062 loc.id = id; loc.offset = 0;
3063 if_err( db->addrWrite_(loc,kbb) );
3064 st->freeBlocks = kbb->right_link();
3067 if( st->rootPageId >= 0 ) {
3068 if_err( keyMap(st->rootPageId, &Db::IndexBase::blockRelease) );
3069 st->rootPageId = st->rightHandSide = NIL;
3078 if( st->rootPageId == NIL ) {
3079 pageId u; keyBlock *ubb; Page *upp; char *un;
3080 if_err( blockAllocate(u,ubb,upp,un) );
3082 ubb->right_link(NIL);
3083 st->rootPageId = st->rightHandSide = u;
3091 pageId u = st->rootPageId;
3092 Page *upp = db->get_page(u);
3093 if( !upp->iused() ) {
3094 if_err( blockFree(u) );
3095 st->rootPageId = st->rightHandSide = NIL;
3100 void Db::IndexBase::
3104 lastAccess.id = lastDelete.id = lastInsert.id =
3105 lastFind.id = lastNext.id = NIL;
3106 lastAccess.offset = lastDelete.offset = lastInsert.offset =
3107 lastFind.offset = lastNext.offset = 0;
3108 cFindCount = cDelCount = cInsCount = 0;
3111 void Db::IndexBase::
3114 if( lastAccess.id >= 0 && lastAccess.id == lastInsert.id )
3118 lastInsert = lastAccess;
3119 cDelCount = 0; lastDelete.id = NIL;
3120 cFindCount = 0; lastFind.id = NIL;
3121 lastOp = opInsert; lastNext.id = NIL;
3124 void Db::IndexBase::
3127 if( lastAccess.id >= 0 && lastAccess.id == lastDelete.id )
3131 lastDelete = lastAccess;
3132 cInsCount = 0; lastInsert.id = NIL;
3133 cFindCount = 0; lastFind.id = NIL;
3134 lastOp = opDelete; lastNext.id = NIL;
3137 void Db::IndexBase::
3138 chkLastFind(pgRef &last)
3140 if( last.id >= 0 && last.id == lastFind.id )
3151 IndexBaseType(int typ)
3154 memset(&name[0],0,sizeof(name));
3159 defaultBlockSizes[] = {
3161 defaultBinaryBlockSize, // idxBin
3162 defaultStringBlockSize, // idxStr
3166 IndexBaseRecd(int typ, int zidx, int ksz, int dsz)
3172 rightHandSide = NIL;
3174 count = 0; pad1 = 0;
3175 blockSize = defaultBlockSizes[typ];
3178 Db::IndexBaseStorage::
3179 IndexBaseStorage(int typ, int zidx, int ksz, int dsz)
3181 new((IndexTypeInfo *)this) IndexBaseType(typ);
3182 new((IndexRecdInfo *)this) IndexBaseRecd(typ, zidx, ksz, dsz);
3185 void Db::IndexBase::
3189 kdSz = st->keySz + st->dataSz;
3190 lastAccess.id = lastDelete.id = lastInsert.id =
3191 lastFind.id = lastNext.id = NIL;
3192 lastAccess.offset = lastDelete.offset = lastInsert.offset =
3193 lastFind.offset = lastNext.offset = 0;
3194 cFindCount = cDelCount = cInsCount = 0;
3198 IndexBase(Db *zdb, IndexBaseStorage &d)
3205 IndexBase(Db *db, int typ, int zidx, int ksz, int dsz)
3207 st = new(&db->index_info[zidx]) IndexBaseStorage(typ, zidx, ksz, dsz);
3218 /*** int Db::objectHeapInsert(int sz,int pg,int off)
3220 * insert a free space record into the entity heap
3223 * sz int record size
3225 * offset int record offset
3227 * returns zero on success, error code on failure
3231 objectHeapInsert(int sz,int id,int offset,AllocCache &cache)
3233 freeSpaceRecord free;
3234 free.size = sz; free.id = id; free.offset = offset;
3235 if_err( freeSpaceIndex->Insert(&free,0) );
3236 addrSpaceRecord addr;
3237 addr.id = id; addr.offset = offset; addr.size = sz;
3238 if_err( addrSpaceIndex->Insert(&addr,0) );
3242 /*** int Db::objectHeapDelete(int sz,int pg,int off)
3244 * delete a free space record from the entity heap
3247 * sz int record size
3249 * offset int record offset
3251 * returns zero on success, error code on failure
3255 objectHeapDelete(int sz,int id,int offset,AllocCache &cache)
3257 freeSpaceRecord free;
3258 free.size = sz; free.id = id; free.offset = offset;
3259 if_err( freeSpaceIndex->Delete(&free) );
3260 addrSpaceRecord addr;
3261 addr.id = id; addr.offset = offset; addr.size = sz;
3262 if_err( addrSpaceIndex->Delete(&addr) );
3266 /*** int Db::pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3268 * find existing persistent free storage.
3271 * size int input/output requested/allocated size
3272 * loc pgRef & output locator returned for new storage
3273 * cache AllocCache& output allocator cache to operate
3275 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3279 pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3281 freeSpaceRecord look, find;
3282 look.size = size; look.id = 0; look.offset = 0;
3283 int status = freeSpaceIndex->Locate(keyGE, &look, 0, &find, 0);
3284 if( status == errNotFound ) return 1;
3286 // if record is at offset 0, need extra space for prefix
3287 while( !find.offset && look.size+(int)sizeof(pagePrefix) > find.size ) {
3288 status = freeSpaceIndex->Next(&find, 0);
3289 if( status == errNotFound ) return 1;
3292 if_err( objectHeapDelete(find.size,find.id,find.offset,cache) );
3294 loc.offset = find.offset ? find.offset : sizeof(pagePrefix);
3295 Page &pg = *get_page(loc.id);
3296 unsigned int ofs = loc.offset + look.size;
3297 if( ofs > pg->used ) pg->used = ofs;
3298 int sz = find.offset+find.size - ofs;
3299 if( sz >= min_heap_allocation ) {
3300 //if_err( objectHeapInsert(sz,find.id,ofs,cache) );
3301 if_err( cache.Load(this, find.id, ofs, sz) );
3304 size = look.size + sz;
3308 /*** int Db::pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3310 * allocate new persistent free storage.
3313 * size int input/output requested/allocated size
3314 * loc pgRef & output locator returned for new storage
3315 * cache AllocCache& output allocator cache to operate
3317 * returns: zero on success, error code.
3321 pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3323 pageId id = new_page();
3325 unsigned int sz = size + sizeof(pagePrefix);
3326 sz = entityPages(sz) * entityPageSize;
3327 Page &pg = *get_page(id);
3328 if( pg->allocated != sz ) {
3334 loc.offset = sizeof(pagePrefix);
3335 int used = loc.offset + size;
3336 int free = pg->allocated - used;
3337 if( free >= min_heap_allocation ) {
3338 //if_err( objectHeapInsert(free,id,used,cache) );
3339 if_err( cache.Load(this, id, used, free) );
3342 size = pg->allocated - loc.offset;
3346 /*** int Db::pgRefAllocate(int &size, pgRef &loc)
3348 * find persistent free storage. existing/new
3349 * does not allocate virtual memory storage
3352 * size int input/output requested/allocated size
3353 * loc pgRef & output locator returned for new storage
3354 * cache AllocCache& output allocator cache to operate
3356 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3360 pgRefAllocate(int &size, pgRef &loc, AllocCache &cache)
3362 int status = pgRefGet(size, loc, cache);
3365 if_err( pgRefNew(size, loc, cache) );
3369 /*** int Db::objectAllocate(int typ, int &size, pgRef &loc)
3371 * find persistent free storage. existing/new
3372 * does allocate virtual memory storage
3373 * inits pagePrefix, inits allocPrefix
3376 * type int input entity type id
3377 * size int input/output requested/allocated size
3378 * loc pgRef & output locator returned for new storage
3379 * cache AllocCache& output allocator cache to operate
3381 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3385 objectAllocate(int typ, int &size, pgRef &loc, AllocCache &cache)
3387 int status = cache.Get(this, size, loc);
3390 if_err( pgRefAllocate(size, loc, cache) );
3391 Page &pg = *get_page(loc.id);
3393 pagePrefix *bpp; pgRef ref;
3394 ref.id = loc.id; ref.offset = 0;
3395 if_err( addrWrite_(ref,bpp) );
3396 bpp->magic = page_magic;
3397 bpp->type = pg_entity + typ;
3398 pg->type = bpp->type;
3400 unsigned int sz = loc.offset + size;
3407 /*** int Db::objectFree(pgRef &loc)
3409 * Purpose: Immediate release of entity memory
3412 * loc pgRef & Page/offset
3414 * returns zero on success, error code on failure
3417 int Db::objectFree(pgRef &loc,AllocCache &cache)
3420 if_err( addrRead_(loc,mp) );
3421 if( mp->magic != entity_magic ) Err(errBadMagic);
3422 addrSpaceRecord addr, prev, next;
3423 /* get prev, next addrSpace free heap items near this addr */
3424 addr.size = mp->size;
3426 addr.offset = loc.offset;
3427 int status = addrSpaceIndex->Locate(keyLT,&addr,0,&prev,0);
3428 if( status == errNotFound ) {
3430 status = addrSpaceIndex->Locate(keyGT,&addr,0,&next,0);
3433 status = addrSpaceIndex->Next(&next,0);
3434 if( status == errNotFound ) {
3439 /* merge with prev if possible */
3440 if( prev.id == addr.id &&
3441 prev.offset + prev.size == addr.offset ) {
3442 if_err( objectHeapDelete(prev.size,prev.id,prev.offset,cache) );
3443 addr.offset = prev.offset;
3444 addr.size += prev.size;
3446 /* merge with next if possible */
3447 if( addr.id == next.id &&
3448 addr.offset + addr.size == next.offset ) {
3449 if_err( objectHeapDelete(next.size,next.id,next.offset,cache) );
3450 addr.size += next.size;
3452 /* reduce used block bytes if possible */
3453 Page &pg = *get_page(loc.id);
3454 if( pg->used <= addr.offset + addr.size )
3455 pg->used = addr.offset;
3456 // if only page prefix, release page, otherwise save new fragment
3457 if( pg->used == sizeof(pagePrefix) )
3460 if_err( objectHeapInsert(addr.size,addr.id,addr.offset,cache) );
3464 /*** int Db::blockAllocate(pageId *pid, keyBlock *&pkbb)
3466 * allocate persistent storage.
3469 * pageId & pid page allocated
3470 * keyBlock *& pkbb keyblock* pointer
3474 blockAllocate(pageId &pid, keyBlock *&pkbb)
3476 locked by(db->db_info->blkAlLk);
3477 pageId id; Page *kpp;
3478 pgRef loc; keyBlock *kbb;
3479 if( st->freeBlocks != NIL ) {
3481 id = st->freeBlocks;
3482 loc.id = id; loc.offset = 0;
3483 if_err( db->addrWrite_(loc,kbb) );
3484 st->freeBlocks = kbb->right_link();
3485 Page &kpg = *db->get_page(id);
3486 if( kpg->allocated != st->blockSize ) {
3487 db->pageDealloc(kpg);
3488 kpg->allocated = st->blockSize;
3489 if_err( db->addrWrite_(loc, kbb) );
3494 id = db->new_page();
3496 loc.id = id; loc.offset = 0;
3497 Page &kpg = *db->get_page(id);
3498 kpg->allocated = st->blockSize;
3499 if_err( db->addrWrite_(loc, kbb) );
3502 kbb->magic = page_magic;
3504 kbb->type = pg_index + st->idx;
3507 kpg->type = kbb->type;
3513 /*** int Db::blockFree(pageId pid)
3515 * Purpose: delayed release database memory
3518 * pid int input Page
3520 * returns zero on success, error code on failure
3524 blockFree(pageId pid)
3526 locked by(db->db_info->blkAlLk);
3528 pageId id = st->freeBlocks; pgRef loc;
3529 loc.id = NIL; loc.offset = 0;
3532 while( id >= 0 && id < pid ) {
3534 if_err( db->addrRead_(loc,kbb) );
3535 id = kbb->right_link();
3539 if_err( db->addrWrite_(loc,kbb) );
3540 kbb->right_link(pid);
3543 st->freeBlocks = pid;
3545 if_err( db->addrWrite_(loc,kbb) );
3546 kbb->right_link(id);
3547 Page *kpp = db->get_page(pid);
3553 blockRelease(pageId pid)
3555 Page *pp = db->get_page(pid);
3556 return pp->release();
3560 blockLoad(pageId pid)
3562 pgRef loc; char *op = 0;
3563 loc.id = pid; loc.offset = 0;
3564 return db->addrRead(loc, op);
3567 /*** int Db::deleteFreeBlock()
3569 * Purpose: release database memory/storage
3571 * returns zero on success, error code on failure
3578 while( (id=st->freeBlocks) != NIL ) {
3579 keyBlock *kbb; pgRef loc;
3580 loc.id = id; loc.offset = 0;
3581 if_err( db->addrRead_(loc,kbb) );
3582 st->freeBlocks = kbb->right_link();
3583 Page &pg = *db->get_Page(id);
3584 if_err( db->storeFree(pg->wr_allocated, pg->wr_io_addr) );
3585 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
3586 if_err( db->storeFree(pg->io_allocated, pg->io_addr) );
3587 pg->io_allocated = 0; pg->io_addr = NIL;
3593 /*** int Db::storeInsert(int sz, ioAddr io_addr)
3595 * insert a storage record into the storage heap
3599 * io_addr ioAddr blob addr
3601 * returns zero on success, error code on failure
3605 storeInsert(long sz, ioAddr io_addr)
3607 //dmsg(-1, "storeInsert %08lx/%012lx\n",sz,io_addr);
3608 freeStoreRecord free;
3609 free.size = sz; free.io_addr = io_addr;
3610 if_err( freeStoreIndex->Insert(&free,0) );
3612 addrStoreRecord addr;
3613 addr.io_addr = io_addr; addr.size = sz;
3614 if_err( addrStoreIndex->Insert(&addr,0) );
3619 /*** int Db::storeDelete(int sz, ioAddr io_addr)
3621 * delete a storage record from the storage heap
3625 * io_addr ioAddr blob addr
3627 * returns zero on success, error code on failure
3631 storeDelete(long sz, ioAddr io_addr)
3633 //dmsg(-1, "storeDelete %08lx/%012lx\n",sz,io_addr);
3634 freeStoreRecord free;
3635 free.size = sz; free.io_addr = io_addr;
3636 if_err( freeStoreIndex->Delete(&free) );
3638 addrStoreRecord addr;
3639 addr.io_addr = io_addr; addr.size = sz;
3640 if_err( addrStoreIndex->Delete(&addr) );
3645 /*** int Db::storeGet(int &size, ioAddr &io_addr)
3647 * allocate storage record from the storage heap
3649 * size int input/output requested/allocated blob size
3650 * io_addr ioAddr output blob addr
3652 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3656 storeGet(int &size, ioAddr &io_addr)
3658 freeStoreRecord look, find;
3659 look.size = size; look.io_addr = 0;
3660 int status = freeStoreIndex->Locate(keyGE, &look,0, &find,0);
3661 if( status == errNotFound ) return 1;
3663 if_err( storeDelete(find.size,find.io_addr) );
3664 io_addr = find.io_addr;
3665 long sz = find.size - look.size;
3666 if( sz >= min_heap_allocation ) {
3667 /* put back extra */
3668 find.size = sz; find.io_addr += look.size;
3669 if_err( storeInsert(find.size,find.io_addr) );
3672 look.size = find.size;
3677 /*** int Db::storeNew(int &size, ioAddr &io_addr)
3679 * allocate storage by extending file
3681 * size int input/output requested/allocated blob size
3682 * io_addr ioAddr output blob addr
3684 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3688 storeNew(int &size, ioAddr &io_addr)
3690 if_err( seek_data(root_info->file_size) );
3691 io_addr = root_info->file_size;
3692 size = storeBlocks(size) * storeBlockSize;
3693 /* make sure file storage exists */
3694 if_err( write_padding(root_info->file_size + size) );
3698 /*** int Db::storeAllocate(int &size, ioAddr &io_addr)
3700 * find persistent free storage. existing/new
3701 * does not allocate virtual memory storage
3704 * size int input/output requested/allocated size
3705 * io_addr ioAddr & output returned storage address
3707 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3711 storeAllocate(int &size, ioAddr &io_addr)
3713 int status = storeGet(size, io_addr);
3715 status = storeNew(size, io_addr);
3720 /*** int Db::storeFree(int size, ioAddr io_addr)
3722 * Immediate release of store heap
3724 * size int input blob size
3725 * io_addr ioAddr input blob addr
3727 * returns zero on success, error code on failure
3731 storeFree(int size, ioAddr io_addr)
3733 if( io_addr < 0 || size == 0 ) return 0;
3734 /* get prev, next addrStore heap items near this io_addr */
3735 addrStoreRecord addr, prev, next;
3736 addr.io_addr = io_addr; addr.size = size;
3737 int status = addrStoreIndex->Locate(keyLT,&addr,0, &prev,0);
3738 if( status == errNotFound ) {
3740 status = addrStoreIndex->Locate(keyGT,&addr,0, &next,0);
3743 status = addrStoreIndex->Next(&next,0);
3744 if( status == errNotFound ) {
3749 /* merge with prev if possible */
3750 if( prev.io_addr >= 0 && prev.io_addr + prev.size == addr.io_addr ) {
3751 if_err( storeDelete(prev.size,prev.io_addr) );
3752 addr.io_addr = prev.io_addr;
3753 addr.size += prev.size;
3755 /* merge with next if possible */
3756 if( next.io_addr >= 0 && addr.io_addr + addr.size == next.io_addr ) {
3757 if_err( storeDelete(next.size,next.io_addr) );
3758 addr.size += next.size;
3761 return storeInsert(addr.size,addr.io_addr);
3764 /*** int Db::pageLoad(pageId id)
3769 * id pageId pageTable element to load
3774 pageLoad(pageId id, Page &pg)
3777 uint8_t *bp = (uint8_t*)pg.addr;
3778 int rd = pg->chk_flags(fl_rd);
3779 int shmid = pg->shmid, used = pg->used;
3781 if( no_shm || shmid < 0 ) {
3782 bp = new_uint8_t(pg->allocated, shmid, id);
3783 if( used ) rd = fl_rd;
3786 bp = get_uint8_t(shmid, id);
3789 if( !bp ) Err(errNoMemory);
3791 if( likely(rd && used > 0) ) {
3792 if_err( pageRead(pg->io_addr, bp, used) );
3794 pg.addr = (pagePrefix*)bp;
3796 pg->clr_flags(fl_rd);
3800 /*** int Db::pageRead(ioAddr io_adr, uint8_t *bp, int len)
3802 * read data from the database file
3805 * pg Page input Page to read
3810 pageRead(ioAddr io_adr, uint8_t *bp, int len)
3812 //locked by(db_info->pgLdLk);
3813 //if_err( seek_data(io_adr) );
3814 //if_err( read_data((char*)bp, len) );
3815 long sz = pread(fd, bp, len, io_adr);
3816 if( len != sz ) Err(errIoRead);
3817 file_position = io_adr+len;
3818 pagePrefix *bpp = (pagePrefix *)bp;
3819 if( bpp->magic != page_magic ) Err(errBadMagic);
3823 /*** int Db::pageWrite(Page *pp)
3825 * writes data to the database file
3828 * pg Page input Page to write
3835 pagePrefix *bpp = pg.addr;
3836 // when io is block aligned and not disk block sized, but
3837 // the allocated areas exceed disk block size, increase
3838 // write to whole blocks to prevent read/modify/write.
3839 int len = bpp->used = pg->used;
3840 unsigned int blen = (len+0xfff) & ~0xfff;
3841 if( blen > pg->allocated ) blen = pg->allocated;
3842 if_err( seek_data(pg->wr_io_addr) );
3843 if_err( write_data((char *)bpp, blen) );
3844 pg->trans_id = active_transaction;
3848 // deallocate page data buffer
3849 // mode<0: delete without shm deallocate
3850 // mode=0: delete and deallocate written page
3851 // mode>0: delete and deallocate page (default)
3854 pageDealloc(Page &pg, int mode)
3856 uint8_t *bp = (uint8_t *)pg.addr;
3857 //int pg=page_table_sz; while( --pg>=0 && pp!=pageTable[pg] );
3858 if( del_uint8_t(bp, pg.shm_id /*, pg*/) <= 1 ||
3859 mode > 0 || (!mode && pg->chk_flags(fl_wr)) )
3865 int Db::AllocCache::
3869 if_ret( db->objectHeapInsert(avail,loc.id,loc.offset,*this) );
3875 int Db::AllocCache::
3876 Get(Db *db,int &size, pgRef &ref)
3879 int isz = avail - size;
3881 unsigned int sz = isz;
3882 if( sz < min_heap_allocation ) {
3885 ref = loc; avail = sz;
3886 if( sz == 0 ) loc.id = NIL;
3887 sz = (loc.offset += size);
3888 Page &pg = *db->get_page(ref.id);
3889 if( pg->used < sz ) pg->used = sz;
3892 if_ret( cacheFlush(db) );
3897 int Db::AllocCache::
3898 Load(Db *db, pageId id, int ofs, int sz)
3902 if_ret( db->objectHeapInsert(sz,id,ofs,*this) );
3912 cacheDelete(AllocCache &cache)
3914 freeSpaceRecord free;
3915 if( !freeSpaceIndex->First(&free,0) ) do {
3916 // printf("free=%04lx %d/%d\n", free.size,free.id,free.offset);
3917 objectHeapInsert(free.size, free.id, free.offset,alloc_cache);
3918 } while( !freeSpaceIndex->Next(&free,0) );
3919 indecies[cache.freeIdx]->Clear();
3920 indecies[cache.addrIdx]->Clear();
3921 del_index(cache.freeIdx);
3922 del_index(cache.addrIdx);
3927 int Db::cache_all_flush()
3929 Entity e(this); EntityLoc &ent = e.ent; int ret;
3930 if( !(ret=entityIdIndex->First(0,&ent.obj)) ) do {
3931 if_err( ent.cacheFlush() );
3932 } while( !(ret=entityIdIndex->Next(0,&ent.obj)) );
3933 if( ret == errNotFound ) ret = 0;
3935 if_err( cacheFlush() );
3940 allocate(int typ, int size, pgRef &loc, AllocCache &cache)
3942 locked by(db_info->objAlLk);
3944 int sz = size + sizeof(allocPrefix);
3945 // granularity is sizeof pagePrefix
3946 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3947 if_err( objectAllocate(typ, sz, loc, cache) );
3949 if_err( addrWrite_(loc,mp) );
3950 mp->magic = entity_magic;
3957 deallocate(pgRef &loc, AllocCache &cache)
3959 locked by(db_info->objAlLk);
3960 cache.cacheFlush(this);
3961 if( loc.id < 0 ) return 0;
3962 if_fail( objectFree(loc, cache) );
3963 loc.id = NIL; loc.offset = 0;
3968 reallocate(int size, pgRef &loc, AllocCache &cache)
3970 int sz = size + sizeof(allocPrefix);
3971 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3972 int typ = 0, msz = 0;
3975 if_err( addrRead_(loc,mp) );
3981 ref.id = NIL; ref.offset = 0;
3983 if_err( allocate(typ, size, ref, cache) );
3984 if( (msz-=sizeof(allocPrefix)) > 0 ) {
3985 char *bp = 0, *cp = 0;
3986 if_err( addrRead(loc,bp) );
3987 if_err( addrWrite(ref,cp) );
3988 if( msz > size ) msz = size;
3992 if_err( deallocate(loc, cache) );
4000 int Db::pack::aligned = 0;
4002 void Db::pack::put(uint64_t v, int n)
4004 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
4007 int k = 8-n-(idx&7), m = (1<<n)-1;
4008 bits[i] = (bits[i] & ~(m<<k)) | (v<<k);
4012 case 8: *((uint8_t *)&bits[i]) = v; break;
4013 case 16: *((uint16_t*)&bits[i]) = htobe16(v); break;
4014 case 32: *((uint32_t*)&bits[i]) = htobe32(v); break;
4015 case 64: *((uint64_t*)&bits[i]) = htobe64(v); break;
4022 if( !aligned && n <= 32 ) {
4023 int i = idx/32, k = 64-n-idx%32;
4024 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
4025 *bp = htobe64(((be64toh(*bp) & ~(m<<k)) | (v<<k)));
4031 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
4032 uint64_t *bp = (uint64_t*)&bits[8*i];
4033 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1; v &= m;
4034 b = l>=0 ? (b & ~(m<<l)) | (v<<l) : (b & ~(m>>-l)) | (v>>-l);
4042 uint64_t Db::pack::get(int n)
4045 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
4048 int k = 8-n-(idx&7), m = (1<<n)-1;
4049 v = (bits[i]>>k) & m;
4053 case 8: v = *((uint8_t *)&bits[i]); break;
4054 case 16: v = be16toh(*(uint16_t*)&bits[i]); break;
4055 case 32: v = be32toh(*(uint32_t*)&bits[i]); break;
4056 case 64: v = be64toh(*(uint64_t*)&bits[i]); break;
4063 if( !aligned && n <= 32 ) {
4064 int i = idx/32, k = 64-n-idx%32;
4065 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
4066 v = (be64toh(*bp) >> k) & m;
4072 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
4073 uint64_t *bp = (uint64_t*)&bits[8*i];
4074 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1;
4075 uint64_t vv = l>=0 ? (b>>l) & m : b & (m>>-l);
4084 int64_t Db::mediaKey::bit_size(int len, int w)
4087 for( int i=0, m=len; m>1; ++i ) {
4089 int b = m * (i+w+1);
4090 b = (b+pack::alignBits-1) & ~(pack::alignBits-1);
4096 int64_t Db::mediaKey::byte_size(int len, int w)
4098 int64_t sz = bit_size(len, w);
4099 sz = (sz+(8*sizeof(uint64_t)-1)) & ~(8*sizeof(uint64_t)-1);
4100 return sz/8 + 2*sizeof(int)+sizeof(int64_t);
4103 int64_t Db::mediaKey::count1(uint8_t *kp)
4106 int w = pk.iget(), len = pk.iget();
4107 pk.seek(pk.pos()+sizeof(int64_t)*8);
4108 int k = high_bit_no(len-1);
4110 int64_t z = (uint64_t)1<<sz++;
4111 return pk.get(sz) - z;
4115 mediaKey(uint8_t *kp, uint8_t *bp, int w, int len)
4118 pk.iput(this->w = w);
4119 pk.iput(this->len = len);
4123 pk.lput(this->cnt = pb.get(w));
4127 // allocate memory, every level length m requires (m+1)/2 differences
4128 // need an extra one when length is power of 2
4129 int k = high_bit_no(len-1) + 1;
4130 struct { int64_t cnt, sz, *wgt; } lt[k+1], rt[k+1];
4131 for( int i=0, m=len; i<=k; ++i ) {
4133 lt[i].wgt = new int64_t[m];
4134 rt[i].wgt = new int64_t[m];
4135 lt[i].sz = rt[i].sz = 0;
4136 lt[i].cnt = rt[i].cnt = 0;
4140 for( int i=0,l=0; ++i<=len; l=i ) {
4142 int m = i&1 ? 0 : high_bit_no(i ^ l); // highest flipped bit
4143 rt[m].cnt = n; // 0->1, begin right
4144 for( int j=0; j<m; ++j ) {
4145 rt[j].wgt[rt[j].sz++] = n-rt[j].cnt;// 1->0, end right
4146 lt[j].cnt = n; // 1->0, begin left
4148 lt[m].wgt[lt[m].sz++] = n-lt[m].cnt; // 0->1, end left
4151 for( int i=0, m=len; m>1; ++i ) {
4153 if( lt[i].sz < m ) { // finish left
4154 lt[i].wgt[lt[i].sz++] = n-lt[i].cnt;
4157 if( lt[i].sz != rt[i].sz ) // finish right
4158 rt[i].wgt[rt[i].sz++] = n-rt[i].cnt;
4163 for( int i=k; --i>=0; ) {
4164 n = (uint64_t)1<<(i+w); // offset for signed difference
4166 for( int j=0; j<sz; ++j ) {
4167 uint64_t v = (lt[i].wgt[j]-rt[i].wgt[j]) + n;
4168 pk.put(v, i+w+1); // output pair differences
4170 if( (n=pk.pos()%pack::alignBits) != 0 ) // align next level
4171 pk.put(0, pack::alignBits-n);
4174 for( int i=0; i<=k; ++i ) {
4175 delete [] lt[i].wgt;
4176 delete [] rt[i].wgt;
4187 mediaLoad(uint8_t *bp)
4190 w = pb.iget(); len = pb.iget(); cnt = pb.lget();
4192 psz = high_bit_no(len-1)+1;
4193 if( psz < 1 ) psz = 1;
4202 void Db::mediaLoad::
4203 get_fields(int n, int k)
4207 dp[k] = dat; dat += m;
4210 int64_t *ap = dp[k];
4212 int64_t z = (uint64_t)1<<sz++;
4214 int64_t *avp = dp[k-1];
4215 for( int j=n/2; --j>=0; ) {
4216 int64_t av = pb.get(sz)-z, a = *ap++;
4217 *avp++ = (a+av)/2; *avp++ = (a-av)/2;
4220 int64_t av = pb.get(sz)-z, a = *ap;
4225 int64_t *ap = dp[0];
4226 for( int j=n/2; --j>=0; ) {
4227 int64_t av = pb.get(sz)-z, a = *ap++;
4228 pk.putc((a+av)/2, w); pk.putc((a-av)/2, w);
4231 int64_t av = pb.get(sz)-z, a = *ap;
4232 pk.putc((a+av)/2, w);
4238 void Db::mediaLoad::
4241 pb.seek(spos); pk.init(kp);
4242 pk.set_max((1<<w)-1);
4243 int64_t d[dsz]; dat = d;
4244 int64_t *p[psz]; dp = p;
4246 for( int i=psz-1; --i>=0; ) dp[i] = 0;
4255 mediaCmpr(uint8_t *bp)
4257 this->err = 0; this->lmt = ~0;
4258 pb.init(bp); w = pb.iget(); len = pb.iget();
4260 psz = high_bit_no(len-1)+1;
4261 if( psz < 1 ) psz = 1;
4270 uint64_t Db::mediaCmpr::
4271 chk_fields(int n, int k)
4275 adp[k] = adat; adat += m;
4276 bdp[k] = bdat; bdat += m;
4277 if( chk_fields(m, k+1) > lmt ) return err;
4279 int64_t *ap = adp[k], *bp = bdp[k];
4280 //uint64_t serr = 0;
4281 err = 0; int sz = k+w;
4282 int64_t z = (uint64_t)1<<sz++;
4284 int64_t *avp = adp[k-1], *bvp = bdp[k-1];
4285 for( int j=n/2; --j>=0; ) {
4286 int64_t a = *ap++, b = *bp++;
4287 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4288 int64_t alt = (a+av)/2, art = (a-av)/2;
4289 int64_t blt = (b+bv)/2, brt = (b-bv)/2;
4290 //serr += sqr(alt-blt) + sqr(art-brt);
4291 *avp++ = alt; *avp++ = art;
4292 *bvp++ = blt; *bvp++ = brt;
4293 err += labs(alt-blt) + labs(art-brt);
4296 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4297 int64_t a = *ap, b = *bp;
4298 int64_t alt = (a+av)/2, blt = (b+bv)/2;
4299 //serr += sqr(alt-blt);
4300 *avp++ = alt; *bvp++ = blt;
4301 err += labs(alt-blt);
4305 int64_t *ap = adp[0], *bp = bdp[0];
4306 for( int j=n/2; --j>=0; ) {
4307 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4308 int64_t a = *ap++, b = *bp++;
4309 int64_t v = a-b, dv = av-bv;
4310 //serr += sqr(v) + sqr(dv);
4311 err += labs(v+dv)/2 + labs(v-dv)/2;
4315 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4316 int64_t a = *ap, b = *bp;
4317 int64_t v = a-b, dv = av-bv, d = v + dv;
4322 pb.align(); pk.align();
4323 //printf("n=%d,k=%d,lerr=%lu,err=%lu\n",n,k,lerr,(int64_t)sqrt((double)serr));
4327 uint64_t Db::mediaCmpr::
4328 chk(uint8_t *kp, uint64_t lmt)
4330 pb.seek(spos); pk.init(kp); err = 0;
4331 if( pk.iget() != w || len != pk.iget() ) return ~0;
4332 acnt = pb.lget(); bcnt = pk.lget();
4333 //if( (err=sqr(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4334 if( (err=labs(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4335 int64_t a[dsz], b[dsz]; adat = a; bdat = b;
4336 int64_t *ap[psz], *bp[psz]; adp = ap; bdp = bp;
4337 adp[psz-1] = &acnt; bdp[psz-1] = &bcnt;
4338 for( int i=psz-1; --i>=0; ) adp[i] = bdp[i] = 0;
4339 return len > 1 ? chk_fields(len, 0) : err;
4343 cmpr(uint8_t *kp, uint64_t lmt)
4345 pb.seek(spos); pk.init(kp);
4346 if( pk.iget() != w || len != pk.iget() ) return ~0;
4347 acnt = pb.lget(); bcnt = pk.lget();
4348 if( acnt < bcnt ) return -1;
4349 if( acnt > bcnt ) return 1;
4350 if( len == 1 ) return 0;
4351 int bsz = Db::mediaKey::bit_size(len, w);
4352 int i = bsz / (8*sizeof(uint64_t));
4353 uint64_t *ap = (uint64_t *)pb.addr(), *bp = (uint64_t *)pk.addr();
4355 if( *ap != *bp ) return be64toh(*ap)<be64toh(*bp) ? -1 : 1;
4358 if( (i=bsz % (8*sizeof(uint64_t))) > 0 ) {
4359 int l = (8*sizeof(uint64_t)) - i;
4360 uint64_t av = be64toh(*ap)>>l, bv = be64toh(*bp)>>l;
4361 if( av != bv ) return av<bv ? -1 : 1;
4370 start_transaction(int undo_save)
4372 //traceback(" start_transaction %d\n",my_pid);
4374 // activate written pages
4375 for( id=0; id<root_info->pageTableUsed; ++id ) {
4376 Page &pg = *get_Page(id);
4377 if( !pg.addr && pg->used ) pg->set_flags(fl_rd);
4378 if( !pg->chk_flags(fl_new) ) continue;
4379 int io_allocated = pg->io_allocated;
4380 pg->io_allocated = pg->wr_allocated;
4381 pg->wr_allocated = io_allocated;
4382 ioAddr io_addr = pg->io_addr;
4383 pg->io_addr = pg->wr_io_addr;
4384 pg->wr_io_addr = io_addr;
4386 // release inactivate page storage
4387 for( id=0; id<root_info->pageTableUsed; ++id ) {
4388 Page &pg = *get_Page(id);
4389 if( !pg->chk_flags(fl_new) ) continue;
4390 pg->clr_flags(fl_new);
4391 if_err( storeFree(pg->wr_allocated, pg->wr_io_addr) );
4392 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
4393 if( pg->used ) continue;
4394 if_err( storeFree(pg->io_allocated, pg->io_addr) );
4395 pg->io_allocated = 0; pg->io_addr = NIL;
4400 addrStoreRecord addr;
4401 int status = addrStoreIndex->Last(&addr,0);
4403 if( addr.io_addr+addr.size == root_info->file_size ) {
4404 if_err( storeDelete(addr.size, addr.io_addr) );
4405 ioAddr fsz = storeBlocks(addr.io_addr) * storeBlockSize;
4406 if( root_info->file_size > fsz ) {
4407 status = ftruncate(fd, fsz);
4408 if( status ) Err(errIoWrite);
4409 addr.size = fsz - addr.io_addr;
4411 if_err( storeInsert(addr.size, addr.io_addr) );
4412 root_info->file_size = fsz;
4417 if( status != errNotFound ) Err(status);
4419 for( int idx=0; idx<root_info->indeciesUsed; ++idx )
4420 if( indecies[idx] ) indecies[idx]->deleteFreeBlocks();
4422 // move root pages lower if possible
4423 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4424 IndexBase *bip = indecies[idx];
4425 if( !bip ) continue;
4426 bip->chkLastReset();
4427 pageId r = bip->st->rootPageId;
4428 if( r < 0 ) continue;
4429 if( r != bip->st->rightHandSide ) continue;
4430 bip->st->rootPageId = bip->st->rightHandSide = lower_page(r);
4433 // truncate pageTable
4434 for( id=root_info->pageTableUsed; id>0; --id ) {
4435 Page &pg = *get_Page(id-1);
4436 if( pg->used ) break;
4438 page_table_sz = root_info->pageTableUsed = id;
4440 // remove released pages from free list
4442 id = root_info->freePages;
4444 Page &pg = *get_Page(id);
4445 pageId next_id = pg->link;
4446 if( id < root_info->pageTableUsed ) {
4447 if( prev ) prev->st->link = id;
4448 else root_info->freePages = id;
4455 if( prev ) prev->st->link = NIL;
4456 else root_info->freePages = NIL;
4458 if( pageTableHunks(root_info->pageTableUsed)*pageTableHunkSize < pageTableAllocated )
4459 alloc_pageTable(root_info->pageTableUsed);
4461 if( undo_save ) undata.save(this);
4462 active_transaction = ++root_info->transaction_id;
4468 #define bfr_sz 0x40000
4473 bfr = inp = new char[bfr_sz];
4474 memset(bfr, 0, bfr_sz);
4482 int ret = flush_bfr();
4484 bfr = inp = lmt = 0;
4493 return n > 0 ? write_data(bfr, n) : 0;
4497 write_bfr(char *dp, int sz)
4501 if( n > sz ) n = sz;
4503 if( (inp+=n) >= lmt && flush_bfr() < 0 )
4515 //traceback(" commit %d\n",my_pid);
4516 int v = attach_wr();
4517 int ret = icommit(force);
4518 if( !ret && force < 0 ) ret = icommit(1);
4520 if( v ) detach_rw();
4528 //traceback(" icommit %d\n",my_pid);
4535 // check for written pages
4536 for( id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
4537 Page &pg = *get_Page(id);
4538 if( pg->chk_flags(fl_wr) ) break;
4540 // if no pages are written
4541 if( n < 0 ) return 0;
4545 // release last root info, allows storage to move down
4546 if_err( storeFree(root_info->last_info_size, root_info->last_info_addr) );
4547 root_info->last_info_addr = NIL; root_info->last_info_size = 0;
4550 ioAddr ri_addr = root_info->last_info_addr;
4551 int ri_size = root_info->last_info_size;
4552 int k = root_info_extra_pages;
4555 // allocate new storage
4558 for( id=0; id<root_info->pageTableUsed; ++id ) {
4559 Page &pg = *get_Page(id);
4560 if( !pg->chk_flags(fl_wr) ) continue;
4561 if( pg->chk_flags(fl_new) ) continue;
4562 pg->set_flags(fl_new);
4564 pg->wr_allocated = pg->allocated;
4565 if_err( storeAllocate(pg->wr_allocated, pg->wr_io_addr) );
4570 // compute rootInfo size
4571 int rsz = writeRootInfo(&Db::size_data);
4572 int rsz0 = entityPages(rsz) * entityPageSize;
4573 int rsz1 = rsz0 + k * entityPageSize;
4574 // retry while no storage, too little, or too big
4575 if( !(ri_addr < 0 || ri_size < rsz0 || ri_size > rsz1) ) break;
4576 // delete/allocate can change pageTable;
4577 if_err( storeFree(ri_size, ri_addr) );
4578 ri_addr = NIL; ri_size = 0;
4579 rsz1 = rsz0 + k/2 * entityPageSize;
4580 if_err( storeAllocate(rsz1, ri_addr) );
4581 ri_size = rsz1; k += 2;
4584 // delete free index pages
4585 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4586 IndexBase *ib = indecies[idx];
4588 while( (id=ib->st->freeBlocks) != NIL ) {
4589 keyBlock *kbb; pgRef loc;
4590 loc.id = id; loc.offset = 0;
4591 if_err( addrWrite_(loc,kbb) );
4592 ib->st->freeBlocks = kbb->right_link();
4593 Page &pg = *get_Page(id);
4594 pg->used = 0; pg->set_flags(fl_new);
4598 root_info->last_info_addr = root_info->root_info_addr;
4599 root_info->last_info_size = root_info->root_info_size;
4600 root_info->root_info_addr = ri_addr;
4601 root_info->root_info_size = ri_size;
4603 int npages = root_info->pageTableUsed;
4604 pageId *pages = new pageId[npages];
4605 for( int i=0 ; i<npages; ++i ) pages[i] = i;
4606 qsort_r(pages, npages, sizeof(*pages), ioCmpr, this);
4608 // write page storage
4609 for( id=0; id<npages; ++id ) {
4610 Page &pg = *get_Page(pages[id]);
4611 if( pg->chk_flags(fl_wr) ) {
4612 pg->clr_flags(fl_wr);
4614 if_err( pageWrite(pg) );
4617 if( force < 0 || pg->type < pg_index || pg->type > max_index_type ) {
4619 pg->set_flags(fl_rd);
4626 // write rootInfo storage
4627 if_err( seek_data(ri_addr) );
4629 if_err( open_bfr() );
4630 if_err( writeRootInfo(&Db::write_bfr) );
4631 if_err( close_bfr() );
4633 if_err( writeRootInfo(&Db::write_data) );
4635 if_err( write_zeros(ri_addr+ri_size) );
4637 // update rootInfo pointer
4638 if_err( seek_data(root_info_offset) );
4639 if_err( write_data((char*)&ri_addr,sizeof(ri_addr)) );
4641 // commit is finished
4642 return start_transaction();
4648 // evict unwritten page storage
4649 for( int id=0; id<root_info->pageTableUsed; ++id ) {
4650 Page &pg = *get_page(id);
4651 if( !pg.addr ) continue;
4652 if( pg->chk_flags(fl_wr) ) continue;
4654 pg->set_flags(fl_rd);
4660 seek_data(ioAddr io_addr)
4662 if( io_addr < 0 ) Err(errIoSeek);
4663 if( file_position != io_addr ) {
4664 long offset = lseek(fd,io_addr,SEEK_SET);
4665 if( offset != io_addr ) Err(errIoSeek);
4666 file_position = io_addr;
4672 size_data(char *dp, int sz)
4674 return sz > 0 ? sz : 0;
4678 read_data(char *dp, int sz)
4681 long len = read(fd,dp,sz);
4682 if( len != sz ) Err(errIoRead);
4683 file_position += sz;
4689 write_data(char *dp, int sz)
4692 long len = write(fd,dp,sz);
4693 if( len != sz ) Err(errIoWrite);
4694 file_position += sz;
4695 if( file_position > root_info->file_size )
4696 root_info->file_size = file_position;
4702 write_zeros(ioAddr io_addr)
4704 ioAddr len = io_addr - file_position;
4705 if( len < 0 ) Err(errIoWrite);
4706 int sz = len > entityPageSize ? entityPageSize : len;
4707 char bfr[sz]; memset(&bfr[0],0,sz);
4709 sz = len > entityPageSize ? entityPageSize : len;
4710 if_err( write_data(&bfr[0], sz) );
4711 len = io_addr - file_position;
4717 write_padding(ioAddr io_addr)
4719 if( root_info->file_size > io_addr ) Err(errIoWrite);
4721 if_err( write_zeros(io_addr) );
4723 int status = ftruncate(fd, io_addr);
4724 if( status ) Err(errIoSeek);
4725 root_info->file_size = io_addr;
4730 #define Read(n) do { \
4731 if( (count+=sizeof(n)) > root_info->root_info_size ) Err(errCorrupt); \
4732 if_err( (this->*fn)((char*)&(n),sizeof(n)) ); \
4736 readRootInfo(int(Db::*fn)(char *dp,int sz))
4742 root_info->root_info_size = sizeof(RootInfo);
4744 if( root_info->root_magic != root_magic ) Err(errBadMagic);
4747 sz = indeciesHunks(root_info->indeciesUsed) * indexTableHunkSize;
4748 indecies = new IndexBase*[sz];
4749 if( !indecies ) Err(errNoMemory);
4750 index_info = (IndexInfo *)new_uint8_t(sz*sizeof(IndexInfo),id);
4751 if( !index_info ) Err(errNoMemory);
4752 db_info->index_info_id = index_info_id = id;
4753 indeciesAllocated = sz;
4755 indecies_sz = root_info->indeciesUsed;
4756 for( int idx=0; idx<indecies_sz; ++idx ) {
4757 IndexBaseInfo *b = &index_info[idx].base;
4758 Read(*(IndexTypeInfo*)b);
4759 if( b->magic != idx_magic ) Err(errBadMagic);
4760 if( b->type != idxNil )
4761 Read(*(IndexRecdInfo*)b);
4764 IndexBinaryInfo *bi = &index_info[idx].bin;
4766 if_err( new_index(indecies[idx], b, bi) );
4769 IndexStringInfo *si = &index_info[idx].str;
4771 if_err( new_index(indecies[idx], b, si) );
4782 int fidx = get_index(".free");
4783 if( fidx < 0 ) Err(errCorrupt);
4784 int aidx = get_index(".addr");
4785 if( aidx < 0 ) Err(errCorrupt);
4786 alloc_cache.freeIdx = fidx;
4787 alloc_cache.addrIdx = aidx;
4790 page_table_sz = root_info->pageTableUsed;
4791 sz = pageTableHunks(page_table_sz) * pageTableHunkSize;
4792 pageTable = (Page **)new Page*[sz];
4793 if( !pageTable ) Err(errNoMemory);
4794 pageTableAllocated = sz;
4795 sz = pageTableAllocated*sizeof(PageStorage);
4796 page_info = (PageStorage *)new_uint8_t(sz, id);
4797 if( !page_info ) Err(errNoMemory);
4798 db_info->page_info_id = page_info_id = id;
4799 for( id=0; id<root_info->pageTableUsed; ++id ) {
4800 Read(page_info[id]);
4801 Page *pp = new Page(page_info[id]);
4802 if( !pp ) Err(errNoMemory);
4805 if( !pg->chk_flags(fl_free) ) pg->shmid = -1;
4806 if( pg->used ) pg->set_flags(fl_rd);
4808 while( id < pageTableAllocated )
4816 #define Write(n) do { \
4817 if_err( (sz=(this->*fn)((char*)&(n),sizeof(n))) ); \
4822 writeRootInfo(int(Db::*fn)(char *data, int sz))
4831 for( id=0; id<root_info->indeciesUsed; ++id ) {
4832 IndexBase *ib = indecies[id];
4834 switch( ib->st->type ) {
4836 IndexBinary *b = (IndexBinary*)ib;
4841 IndexString *b = (IndexString*)ib;
4848 IndexBaseType b(idxNil);
4854 for( id=0; id<root_info->pageTableUsed; ++id ) {
4855 Page *pp = get_Page(id);
4867 int n = db->indeciesAllocated - usrIdx + 1;
4868 if( cfnAllocated != n ) {
4870 cfn = new cfnData[n];
4873 cfnData *cdp = &cfn[0];
4874 cfnUsed = db->root_info->indeciesUsed;
4875 for( int idx=usrIdx; idx<cfnUsed; ++idx,++cdp ) {
4876 IndexBase *ib = db->indecies[idx];
4878 switch( ib->st->type ) {
4880 IndexBinary *bidx = (IndexBinary *)ib;
4881 cdp->cmprId = bidx->bst->cmprId;
4882 cdp->compare = bidx->compare;
4895 cfnData *cdp = &cfn[0];
4896 int n = db->root_info->indeciesUsed<cfnUsed ? db->root_info->indeciesUsed : cfnUsed;
4897 for( int idx=usrIdx; idx<n; ++idx,++cdp ) {
4898 IndexBase *ib = db->indecies[idx];
4900 switch( ib->st->type ) {
4902 IndexBinary *bidx = (IndexBinary *)ib;
4903 int cid = cdp->cmprId;
4904 bidx->bst->cmprId = cid;
4905 bidx->compare = cid>0 ? db->cmprFns[cid] : cdp->compare;
4918 int v = attach_wr();
4921 if( v ) detach_rw();
4931 undata.restore(this);
4938 DbInfo(int pid, int key, int id)
4953 int id = -1, flk = 0, ret = 0;
4956 if( !shm_init ) init_shm();
4957 get_mem = &get_shm8_t;
4958 new_mem = &new_shm8_t;
4959 del_mem = &del_shm8_t;
4960 if( flock(fd, LOCK_EX) ) ret = errInvalid;
4963 id = shmget(key, sizeof(DbInfo), IPC_CREAT+IPC_EXCL+0666);
4964 if( id < 0 ) ret = errno==EEXIST ? errDuplicate : errNoMemory;
4967 vp = shmat(id, NULL, 0);
4968 if( vp == (void*)-1 ) ret = errNoMemory;
4972 get_mem = &get_mem8_t;
4973 new_mem = &new_mem8_t;
4974 del_mem = &del_mem8_t;
4975 vp = (void *)new uint8_t[sizeof(DbInfo)];
4976 if( !vp ) Err(errNoMemory);
4979 db_info = new(vp) DbInfo(my_pid, key, id);
4983 if( flk ) flock(fd, LOCK_UN);
4984 //traceback("new_info %s\n",ret ? "failed" : "success");
4992 if( no_shm ) Err(errInvalid);
4993 get_mem = &get_shm8_t;
4994 new_mem = &new_shm8_t;
4995 del_mem = &del_shm8_t;
4996 int id = shmget(key, sizeof(DbInfo), 0666);
4997 if( id < 0 ) Fail(errNotFound);
4998 void *vp = shmat(id, NULL, 0);
4999 if( vp == (void*)-1 ) Err(errNoMemory);
5000 if( del_uint8_t(0, id) <= 1 ) {
5001 shmctl(id, IPC_RMID, 0);
5003 //traceback("get_info failed\n");
5006 DbInfo *dip = (DbInfo *)vp;
5007 if( dip->magic != info_magic ) Err(errBadMagic);
5008 if( dip->info_key != key || dip->info_id != id ) Err(errCorrupt);
5010 //traceback("get_info success\n");
5018 db_info->index_info_id = -1;
5019 db_info->page_info_id = -1;
5020 db_info->root_info_id = -1;
5021 db_info->owner = -1;
5022 db_info->last_owner = my_pid;
5024 int flk = !flock(fd, LOCK_EX) ? 1 : 0;
5026 int ret = del_uint8_t(0, db_info->info_id);
5028 shmctl(db_info->info_id, IPC_RMID, 0);
5032 delete [] (uint8_t*)db_info;
5033 if( flk ) flock(fd, LOCK_UN);
5034 //traceback("del_info %d, refs=%d\n",my_pid,ret);
5041 open(int zfd, int zkey)
5043 //traceback("open %d\n",my_pid);
5045 if( fd >= 0 ) Err(errDuplicate);
5048 if( fd < 0 ) Err(errNotFound);
5050 if( fstat(fd,&st) < 0 ) Err(errIoStat);
5051 if( zkey >= 0 ) use_shm(1);
5053 if_err( new_info(zkey) );
5061 iopen(int undo_save)
5063 //traceback(" iopen %d\n",my_pid);
5065 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5066 int ret = !root_info ? errNoMemory : 0;
5069 db_info->root_info_id = root_info_id = info_id;
5072 if( !ret ) ret = seek_data(db_magic_offset);
5073 if( !ret ) ret = read_data((char*)&magic,sizeof(magic));
5074 if( !ret && magic != db_magic ) ret = errBadMagic;
5075 if( !ret ) ret = seek_data(root_info_offset);
5076 if( !ret ) ret = read_data((char*)&root_info->root_info_addr,sizeof(root_info->root_info_addr));
5077 if( !ret ) ret = seek_data(root_info->root_info_addr);
5078 if( !ret ) ret = readRootInfo(&Db::read_data) >= 0 ? 0 : ret;
5079 if( !ret ) ret = start_transaction(undo_save);
5081 active_transaction = root_info->transaction_id;
5090 //traceback("close %d\n",my_pid);
5091 if( !db_info || fd < 0 ) return 0;
5099 //traceback(" iclose %d\n",my_pid);
5109 //traceback(" ireopen %d\n",my_pid);
5110 Page **old_page_table = pageTable; pageTable = 0;
5111 PageStorage *old_page_info = page_info; page_info = 0;
5112 int old_page_table_sz = page_table_sz; page_table_sz = 0;
5115 for( pageId id=0; id<old_page_table_sz; ++id ) {
5116 Page *opp = old_page_table[id], &opg = *opp;
5117 if( id < page_table_sz && opg.addr && !opg->chk_flags(fl_wr | fl_rd) ) {
5118 Page &pg = *get_Page(id);
5119 if( !pg.addr && pg->shmid < 0 && opg.shm_id == opg->shmid &&
5120 pg->io_addr == opg->io_addr && pg->trans_id == opg->trans_id ) {
5121 pg.addr = opg.addr; pg->shmid = pg.shm_id = opg.shm_id;
5122 pg->clr_flags(fl_rd);
5130 delete [] old_page_table;
5131 del_uint8_t(old_page_info);
5138 //traceback(" iattach %d\n",my_pid);
5139 RootInfo *new_root_info = 0;
5140 IndexInfo *new_index_info = 0;
5141 PageStorage *new_page_info = 0;
5142 int new_indecies_alloc = 0;
5143 int new_indecies_sz = 0;
5144 IndexBase **new_indecies = 0;
5145 int new_page_table_alloc = 0;
5146 int new_page_table_sz = 0;
5147 Page **new_page_table = 0;
5150 if( !ret && root_info_id != db_info->root_info_id ) {
5151 new_root_info = (RootInfo *)get_uint8_t(db_info->root_info_id);
5152 if( !new_root_info ) ret = errCorrupt;
5155 new_root_info = root_info;
5160 new_indecies_sz = new_root_info->indeciesUsed;
5161 new_indecies_alloc = indeciesHunks(new_indecies_sz) * indexTableHunkSize;
5162 new_indecies = new IndexBase*[new_indecies_alloc];
5163 if( !new_indecies ) ret = errNoMemory;
5167 if( index_info_id != db_info->index_info_id ) {
5168 new_index_info = (IndexInfo *)get_uint8_t(db_info->index_info_id);
5169 if( !new_index_info ) ret = errCorrupt;
5172 new_index_info = index_info;
5177 for( int idx=0; !ret && idx<new_indecies_sz; ++idx ) {
5178 IndexBaseInfo *b = &new_index_info[idx].base;
5179 if( b->magic == idx_magic ) {
5180 IndexBase *ib = idx<indecies_sz ? indecies[idx] : 0;
5181 if( ib && ib->st->type != b->type ) ib = 0;
5185 new_indecies[idx] = new(ib) IndexBinary(ib,*b,new_index_info[idx].bin);
5189 ret = new_index(new_indecies[idx], b, &new_index_info[idx].bin);
5193 new_indecies[idx] = new(ib) IndexString(ib,*b,new_index_info[idx].str);
5197 ret = new_index(new_indecies[idx], b, &new_index_info[idx].str);
5200 new_indecies[idx] = 0;
5210 for( int idx=0; idx<indecies_sz; ++idx ) {
5211 if( !indecies[idx] ) continue;
5212 delete indecies[idx]; indecies[idx] = 0;
5217 new_page_table_sz = new_root_info->pageTableUsed;
5218 new_page_table_alloc = pageTableHunks(new_page_table_sz) * pageTableHunkSize;
5219 new_page_table = (Page **)new Page*[new_page_table_alloc];
5220 if( !new_page_table ) ret = errNoMemory;
5224 if( page_info_id != db_info->page_info_id ) {
5225 new_page_info = (PageStorage *)get_uint8_t(db_info->page_info_id);
5226 if( !new_page_info ) ret = errCorrupt;
5229 new_page_info = page_info;
5235 for( id=0; !ret && id<new_page_table_sz; ++id ) {
5236 Page *pp = id<page_table_sz ? pageTable[id] : 0;
5237 PageStorage *st = &new_page_info[id];
5239 pageTable[id] = 0; pp->st = st; Page &pg = *pp;
5240 if( pg->chk_flags(fl_rd | fl_free) )
5242 else if( pg->shmid >= 0 && pp->shm_id != pg->shmid ) {
5243 del_uint8_t(pp->addr);
5244 pp->addr = (pagePrefix *)get_uint8_t(pp->shm_id=pg->shmid);
5249 if( !pp ) ret = errNoMemory;
5251 new_page_table[id] = pp;
5253 while( id<page_table_sz ) del_page(id++);
5256 delete [] new_indecies;
5257 delete [] new_page_table;
5258 if( !root_info ) root_info = new_root_info;
5259 else del_uint8_t(new_root_info);
5260 if( !index_info ) index_info = new_index_info;
5261 else del_uint8_t(new_index_info);
5262 if( !page_info ) page_info = new_page_info;
5263 else del_uint8_t(new_page_info);
5269 indecies = new_indecies;
5270 indeciesAllocated = new_indecies_alloc;
5271 indecies_sz = new_indecies_sz;
5272 delete [] pageTable;
5273 pageTable = new_page_table;
5274 pageTableAllocated = new_page_table_alloc;
5275 page_table_sz = new_page_table_sz;
5276 root_info_id = db_info->root_info_id;
5277 del_uint8_t(root_info);
5278 root_info = new_root_info;
5279 index_info_id = db_info->index_info_id;
5280 del_uint8_t(index_info);
5281 index_info = new_index_info;
5282 page_info_id = db_info->page_info_id;
5283 del_uint8_t(page_info);
5284 page_info = new_page_info;
5285 active_transaction = root_info->transaction_id;
5290 attach(int zfd, int zkey, int zrw)
5292 //traceback("attach %s %d\n",zrw ? "wr" : "rd", my_pid);
5294 if_ret( get_info(zkey) );
5295 fd = zfd; key = zkey;
5298 else if( zfd != fd || zkey != key )
5302 ( root_info && active_transaction == root_info->transaction_id &&
5303 db_info->root_info_id >= 0 && root_info_id == db_info->root_info_id &&
5304 db_info->index_info_id >= 0 && index_info_id == db_info->index_info_id &&
5305 db_info->page_info_id >= 0 && page_info_id == db_info->page_info_id ) )
5307 int ret = db_info->root_info_id < 0 ? ireopen() : iattach();
5325 if( fd >= 0 ) Err(errDuplicate);
5329 if( new_info(key) ) Err(errNotFound);
5331 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5332 if( !root_info ) { Err(errNoMemory); }
5334 db_info->root_info_id = root_info_id = info_id;
5335 if_err( init_idx() );
5336 if_err( seek_data(db_magic_offset) );
5337 int magic = db_magic;
5338 if_err( write_data((char *)&magic,sizeof(magic)) );
5339 write_zeros(entityPageSize);
5344 // in-order traversal copying IndexBinary
5345 int Db::IndexBinary::
5346 keyCopy(pageId s, IndexBase *ib)
5348 IndexBinary *idx = (IndexBinary *)ib;
5350 keyBlock *sbb; Page *spp; char *sn;
5351 if_err( db->indexRead(s,0,sbb,spp,sn) );
5352 if( (r=sbb->right_link()) >= 0 ) {
5353 int lkdSz = kdSz + sizeof(pageId);
5354 int n = spp->iused() / lkdSz;
5355 for( int i=0; i<n; ++i ) {
5356 pageId l = readPageId(sn);
5357 sn += sizeof(pageId);
5358 if_ret( keyCopy(l,idx) );
5359 if_ret( idx->Insert(sn,sn+st->keySz) );
5362 if_ret( keyCopy(r,idx) );
5365 int n = spp->iused() / kdSz;
5366 for( int i=0; i<n; ++i ) {
5367 if_ret( idx->Insert(sn,sn+st->keySz) );
5374 // in-order traversal copying IndexString
5375 int Db::IndexString::
5376 keyCopy(pageId s, IndexBase *ib)
5378 IndexString *idx = (IndexString *)ib;
5379 pageId r; unsigned char lky[keysz];
5380 keyBlock *sbb; Page *spp; char *sn;
5381 if_err( db->indexRead(s,0,sbb,spp,sn) );
5382 unsigned char *lp = (unsigned char *)sn;
5383 unsigned char *rp = lp + spp->iused();
5385 if( (r=sbb->right_link()) >= 0 ) {
5387 pageId l = getPageId(lp);
5388 if_ret( keyCopy(l,idx) );
5389 char *dp = (char *)lp; lp += st->dataSz;
5390 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5391 if_ret( idx->Insert(&lky[0],dp) );
5393 if_ret( keyCopy(r,idx) );
5397 char *dp = (char *)lp; lp += st->dataSz;
5398 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5399 if_ret( idx->Insert(&lky[0],dp) );
5406 EntityObj(EntityObj &eobj, int eid)
5409 memmove(&name[0],&eobj.name[0],nmSz);
5410 recdSz = eobj.recdSz;
5412 indexs[idxId] = eobj.indexs[idxId];
5413 for( int i=idxId+1; i<nidxs; ++i )
5414 indexs[i] = eobj.indexs[i];
5420 copy(ObjectLoc &dobj)
5423 Obj *dp = dobj.addr();
5424 if( !dp ) Err(errNoMemory);
5425 allocPrefix *mp = ((allocPrefix *)dp) - 1;
5426 int sz = mp->size - sizeof(*mp);
5427 if_err( allocate(sz - dobj.entity->ent->recdSz) );
5429 if( !np ) Err(errNoMemory);
5431 // copy varObj data using ref data, if any
5432 for( varObjs vp=dobj.entity->vobjs; vp!=0; vp=vp->next ) {
5434 varObj &vd = dp->*ref;
5435 varObj &vn = np->*ref;
5437 if( (sz=vd.size()) > 0 ) {
5439 memcpy(addr(vn),dobj.addr(vd), sz);
5446 copy(Db *db, Objects objs)
5448 int id, n = db->root_info->indeciesUsed;
5449 for( id=usrIdx; id<n; ++id ) {
5450 IndexBase *ib = db->indecies[id];
5453 switch( ib->st->type ) {
5454 // copy binary index
5456 IndexBinary *bidx = (IndexBinary *)ib;
5457 int idx = get_index(&bidx->st->name[0]);
5459 int kySz = bidx->st->keySz, dtSz = bidx->st->dataSz;
5460 idx = new_binary_index(&bidx->st->name[0], kySz, dtSz, bidx->compare);
5463 IndexBase *bib = indecies[idx];
5464 bib->st->key_type = ib->st->key_type;
5465 // ignore empty indecies
5466 if( bidx->st->rootPageId < 0 ) break;
5467 // ignore allocator indecies
5468 if( bidx->compare == Db::cmprFrSp ) break;
5469 if( bidx->compare == Db::cmprAdSp ) break;
5470 // entity id indecies are processed below
5471 if( !db->entityNmIndex->Find(&ib->st->name[0],0) ) break;
5472 IndexBinary *bip = (IndexBinary *)bib;
5473 // use cmprLast since index is in-order. Avoids using
5474 // user defined class key cmprs and compare functions.
5475 bip->compare = cmprLast;
5476 bib->st->key_type = ktyBin;
5477 ret = bidx->keyCopy(bidx->st->rootPageId, bib);
5478 bip->compare = cmprFns[bip->bst->cmprId];
5479 bib->st->key_type = ib->st->key_type;
5481 // copy string index
5483 IndexString *sidx = (IndexString *)ib;
5484 int idx = get_index(&sidx->st->name[0]);
5486 int dtSz = sidx->st->dataSz;
5487 idx = new_string_index(&sidx->st->name[0], dtSz);
5490 IndexBase *bib = indecies[idx];
5491 bib->st->key_type = ib->st->key_type;
5492 if( sidx->st->rootPageId < 0 ) break;
5494 ret = sidx->keyCopy(sidx->st->rootPageId, bib);
5498 if_err( db->flush() );
5499 if_err( commit(-1) );
5501 // copy entity indecies/data
5502 IndexBinary *eidx = (IndexBinary *)db->entityIdIndex;
5503 int eid, status; pgRef loc;
5504 if( !(status=eidx->First(&eid,&loc)) ) do {
5507 Entity *ep = op->obj->entity;
5508 if( ep->ent->id == eid ) break;
5513 EntityLoc &dent = db_ent.ent;
5515 ObjectLoc objd(&db_ent), *dobj = &objd;
5516 // if old db copy fn, use ObjectLoc::copy
5517 if( op->obj->entity->db == db ) dobj = op->obj;
5518 char name[nmSz]; memset(name,0,sizeof(name));
5519 strncpy(&name[0],&dent->name[0],sizeof(name));
5521 Entity entity(this);
5522 EntityLoc &nent = entity.ent;
5524 if( entityNmIndex->Find(&name[0],&nid) != 0 ) {
5525 int nidx1 = dent->nidxs-1;
5526 int sz = sizeof(EntityObj) + sizeof(dent->indexs[0])*nidx1;
5528 if_err( allocate(dent->id, sz, nent.obj, alloc_cache) );
5529 if( !nent.addr_wr() ) Err(errNoMemory);
5531 new((EntityObj *)nent.addr())
5532 EntityObj(*(EntityObj*)dent.addr(),eid);
5533 if_err( entityIdIndex->Insert(&eid,&nent.obj) );
5534 if_err( entityNmIndex->Insert(&name[0],&eid) );
5535 // connect entity allocator
5536 char idxNm[nmSz]; memset(idxNm,0,sizeof(idxNm));
5537 strncpy(idxNm,name,sizeof(idxNm)-1);
5538 strncat(idxNm,".free",sizeof(idxNm)-1);
5539 int fidx = get_index(idxNm);
5540 if( fidx < 0 ) Err(errCorrupt);
5541 memset(idxNm,0,sizeof(idxNm));
5542 strncpy(idxNm,name,sizeof(idxNm)-1);
5543 strncat(idxNm,".addr",sizeof(idxNm)-1);
5544 int aidx = get_index(idxNm);
5545 if( aidx < 0 ) Err(errCorrupt);
5546 nent->alloc_cache.freeIdx = fidx;
5547 nent->alloc_cache.addrIdx = aidx;
5549 else if( nid == eid )
5550 if_err( entityIdIndex->Find(&eid,&nent.obj) );
5553 ObjectLoc objn(&entity), *nobj = &objn;
5554 // if new db copy fn, use it instead of ObjectLoc::copy
5555 if( op->obj->entity->db == this ) nobj = op->obj;
5557 if( !(status = dobj->FirstId(cur)) ) do {
5559 if_err( nobj->copy(*dobj) );
5561 if_err( entity.construct_(*nobj, dobj->id()) );
5562 } while( !(status=dobj->NextId(cur)) );
5563 if( status == errNotFound ) status = 0;
5565 if_err( db->flush() );
5566 if_err( commit(-1) );
5568 } while( !(status=eidx->Next(&eid,&loc)) );
5569 if( status == errNotFound ) status = 0;
5571 if_err( db->flush() );
5572 if_err( commit(-1) );
5577 cmprFrSt(char *a, char *b)
5579 freeStoreRecord *ap = (freeStoreRecord *)a;
5580 freeStoreRecord *bp = (freeStoreRecord *)b;
5581 if( ap->size > bp->size ) return 1;
5582 if( ap->size < bp->size ) return -1;
5583 if( ap->io_addr > bp->io_addr ) return 1;
5584 if( ap->io_addr < bp->io_addr ) return -1;
5589 cmprAdSt(char *a, char *b)
5591 addrStoreRecord *ap = (addrStoreRecord *)a;
5592 addrStoreRecord *bp = (addrStoreRecord *)b;
5593 if( ap->io_addr > bp->io_addr ) return 1;
5594 if( ap->io_addr < bp->io_addr ) return -1;
5595 if( ap->size > bp->size ) return 1;
5596 if( ap->size < bp->size ) return -1;
5601 cmprFrSp(char *a, char *b)
5603 freeSpaceRecord *ap = (freeSpaceRecord *)a;
5604 freeSpaceRecord *bp = (freeSpaceRecord *)b;
5605 if( ap->size > bp->size ) return 1;
5606 if( ap->size < bp->size ) return -1;
5607 if( ap->id > bp->id ) return 1;
5608 if( ap->id < bp->id ) return -1;
5609 if( ap->offset > bp->offset ) return 1;
5610 if( ap->offset < bp->offset ) return -1;
5615 cmprAdSp(char *a, char *b)
5617 addrSpaceRecord *ap = (addrSpaceRecord *)a;
5618 addrSpaceRecord *bp = (addrSpaceRecord *)b;
5619 if( ap->id > bp->id ) return 1;
5620 if( ap->id < bp->id ) return -1;
5621 if( ap->offset > bp->offset ) return 1;
5622 if( ap->offset < bp->offset ) return -1;
5623 if( ap->size > bp->size ) return 1;
5624 if( ap->size < bp->size ) return -1;
5629 cmprOIds(char *a, char *b)
5633 if( ap->id > bp->id ) return 1;
5634 if( ap->id < bp->id ) return -1;
5639 cmprStr(char *a, char *b)
5641 return strncmp(a,b,keysz);
5645 cmprKey(char *a, char *b)
5648 return kp->cmpr(a,b);
5652 cmprLast(char *a, char *b)
5657 Db::CmprFn Db::cmprFns[] = {
5666 findCmprFn(CmprFn fn)
5669 for( i=lengthof(cmprFns); --i>0; )
5670 if( fn == cmprFns[i] ) return i;
5674 int Db::AllocCache::
5675 init_idx(Db *db,const char *nm)
5678 memset(idxNm,0,sizeof(idxNm));
5679 snprintf(idxNm,sizeof(idxNm),"%s.free",nm);
5680 int fidx = db->new_binary_index(idxNm, sizeof(freeSpaceRecord), 0, cmprFrSp);
5682 memset(idxNm,0,sizeof(idxNm));
5683 snprintf(idxNm,sizeof(idxNm),"%s.addr",nm);
5684 int aidx = db->new_binary_index(idxNm, sizeof(addrSpaceRecord), 0, cmprAdSp);
5685 if( aidx < 0 ) db->del_index(fidx);
5687 freeIdx = fidx; addrIdx = aidx;
5688 loc.id = NIL; loc.offset = 0;
5696 if_err( new_binary_index("entityIdIndex", sizeof(int), sizeof(pgRef), cmprOIds) );
5697 if_err( new_binary_index("entityNmIndex", sizeof(char[nmSz]), sizeof(int), cmprStr) );
5698 if_err( new_binary_index("freeStoreIndex", sizeof(freeStoreRecord), 0, cmprFrSt) );
5699 if_err( new_binary_index("addrStoreIndex", sizeof(addrStoreRecord), 0, cmprAdSt) );
5700 if_err( alloc_cache.init_idx(this,"") );
5709 FILE *fp = fopen("/proc/sys/kernel/shmall","w");
5710 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5711 fp = fopen("/proc/sys/kernel/shmmax","w");
5712 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5713 fp = fopen("/proc/sys/kernel/shmmni","w");
5714 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5720 root_magic = Db::root_magic;
5721 root_info_size = sizeof(RootInfo);
5724 root_info_addr = NIL;
5725 last_info_addr = NIL;
5741 storeBlockSize = defaultStoreBlockSize;
5742 entityPageSize = defaultEntityPageSize;
5743 pageTableHunkSize = defaultPageTableHunkSize;
5744 indexTableHunkSize = defaultIndexTableHunkSize;
5746 root_info_id = -1; root_info = 0;
5747 index_info_id = -1; index_info = 0;
5748 indecies = 0; indeciesAllocated = 0; indecies_sz = 0;
5749 page_info_id = -1; page_info = 0;
5750 pageTable = 0; pageTableAllocated = 0; page_table_sz = 0;
5754 bfr = lmt = inp = 0;
5755 active_transaction = -1;
5762 for( int idx=indecies_sz; --idx>=0; ) delete indecies[idx];
5763 delete [] indecies; indecies = 0;
5765 del_uint8_t(index_info); index_info = 0;
5766 indeciesAllocated = 0; indecies_sz = 0;
5770 for( pageId id=0; id<page_table_sz; ++id ) {
5771 Page *pp = get_Page(id); pageDealloc(*pp); delete pp;
5773 delete [] pageTable; pageTable = 0;
5775 del_uint8_t(page_info); page_info = 0;
5776 pageTableAllocated = 0; page_table_sz = 0;
5779 del_uint8_t(root_info); root_info = 0;
5786 no_shm = defaultNoShm;
5787 get_mem = !no_shm ? &get_shm8_t : &get_mem8_t;
5788 new_mem = !no_shm ? &new_shm8_t : &new_mem8_t;
5789 del_mem = !no_shm ? &del_shm8_t : &del_mem8_t;
5790 root_info_id = index_info_id = page_info_id = -1;
5812 if( error() < 0 ) Fail(errPrevious); \
5813 if( r < 0 || r >= root_info->indeciesUsed || !indecies[r] ) Err(errInvalid); \
5814 return indecies[r]->fn;
5817 ins(int r, void *key, void *data)
5819 Run(r,Insert(key,data));
5823 del(int r, void *key)
5829 find(int r, void *key, void *rtnData)
5831 Run(r,Find(key,rtnData));
5835 locate(int r, int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
5837 Run(r,Locate(op,key,cmpr,rtnKey,rtnData));
5841 first(int r, void *key, void *rtnData)
5843 Run(r,First(key,rtnData));
5847 last(int r, void *key, void *rtnData)
5849 Run(r,Last(key,rtnData));
5853 next(int r, void *key, void *rtnData)
5855 Run(r,Next(key,rtnData));
5859 nextloc(int r, pgRef &loc)
5861 Run(r,NextLoc(loc));
5868 allocate(ObjectLoc &loc, int sz)
5870 if( loc.entity != this ) Err(errObjEntity);
5872 int n = ent->recdSz + sz;
5873 if_err( db->allocate(id, n, loc.obj, ent->alloc_cache) );
5874 Obj *op = loc.addr();
5876 ent._wr(); loc._wr();
5878 op->id = ent->maxId;
5884 construct_(ObjectLoc &loc, int id)
5886 int idx = ent->indexs[idxId];
5887 loc._wr(); loc->id = id;
5888 if_err( db->indecies[idx]->Insert(&id,&loc.obj) );
5889 ent._wr(); ++ent->count;
5890 if( id >= ent->maxId ) ent->maxId = id+1;
5896 destruct_(ObjectLoc &loc, int id)
5898 if_err( index(idxId)->Delete(&id) );
5899 ent._wr(); --ent->count;
5900 if( id+1 == ent->maxId ) {
5901 if( ent->count > 0 ) {
5902 if_err( index(idxId)->Last(&id,0) );
5914 new_entity_(Entity &entity, const char *nm, int sz)
5917 EntityLoc &ent = entity.ent;
5918 // construct EntityObj
5919 if( root_info->entity_max_id >= max_entity_type ) Err(errLimit);
5920 if_err( allocate(root_info->entity_max_id+1,
5921 sizeof(EntityObj), ent.obj, alloc_cache) );
5922 int id = root_info->entity_max_id;
5923 ent._wr(); ent->id = id;
5924 char name[nmSz]; memset(&name[0],0,sizeof(name));
5925 strncpy(name,nm,sizeof(name)-1);
5926 memmove(&ent->name[0],name,sizeof(name));
5927 if_err( ent->alloc_cache.init_idx(this,name) );
5931 // add to entity indecies
5932 if_err( entityIdIndex->Insert(&id,&ent.obj) );
5933 if_err( entityNmIndex->Insert(&name[0],&id) );
5934 // create entity id/loc
5935 int idx = new_binary_index(nm, sizeof(int),sizeof(pgRef),cmprOIds);
5937 ent->indexs[idxId] = idx;
5939 ++root_info->entity_count;
5940 ++root_info->entity_max_id;
5941 entity.rw_lock = &db_info->rw_locks[id];
5946 del_entity(Entity &entity)
5948 EntityLoc &ent = entity.ent;
5949 if( ent.obj.id >= 0 ) {
5951 ObjectLoc loc(&entity);
5952 int status = loc.FirstId();
5955 entity.deallocate(loc.obj);
5956 } while( !(status=loc.NextId()) );
5957 if( status != errNotFound )
5959 cacheDelete(ent->alloc_cache);
5960 for( int i=ent->nidxs; --i>=0; ) entity.del_index_(i);
5962 entityIdIndex->Delete(&id);
5963 entityNmIndex->Delete(&ent->name[0]);
5964 if_err( deallocate(ent.obj, alloc_cache) );
5966 --root_info->entity_count;
5967 if( id+1 == root_info->entity_max_id ) {
5968 if( root_info->entity_count > 0 ) {
5969 if_err( entityIdIndex->Last(&id,0) );
5974 root_info->entity_max_id = id;
5981 new_entity(Entity &entity, const char *nm, int sz)
5983 int ret = new_entity_(entity, nm, sz);
5984 if( ret ) del_entity(entity);
5989 get_entity(Entity &entity, const char *nm)
5991 EntityLoc &ent = entity.ent;
5992 char name[nmSz]; memset(&name[0],0,sizeof(name));
5993 strncpy(name,nm,sizeof(name)-1); int id;
5994 if_fail( entityNmIndex->Find(&name[0], &id) );
5995 if_err( entityIdIndex->Find(&id, &ent.obj) );
5996 entity.rw_lock = &db_info->rw_locks[id];
6001 get_index(const char *nm, CmprFn cmpr)
6003 int idx = ent->nidxs;
6004 while( --idx >= 0 ) {
6005 int i = ent->indexs[idx];
6006 if( i < 0 ) continue;
6007 IndexBase *ib = db->indecies[i];
6008 if( ib && !strncmp(&ib->st->name[0],nm,nmSz) ) {
6009 if( cmpr && ib->st->type == idxBin ) {
6010 IndexBinary *bidx = (IndexBinary *)ib;
6011 bidx->compare = cmpr;
6012 bidx->bst->cmprId = db->findCmprFn(cmpr);
6017 if( idx < 0 ) Fail(errNotFound);
6022 add_index(int idx, int kty)
6024 EntityLoc nent(this);
6025 // construct EntityObj
6026 int nidx = ent->nidxs;
6027 int sz = sizeof(EntityObj) + sizeof(ent->indexs[0])*nidx;
6028 if_err( db->allocate(ent->id, sz, nent.obj, db->alloc_cache) );
6029 nent._wr(); nent->id = ent->id;
6030 memmove(&nent->name[0],&ent->name[0],nmSz);
6031 nent->alloc_cache = ent->alloc_cache;
6032 nent->maxId = ent->maxId;
6033 nent->recdSz = ent->recdSz;
6034 nent->count = ent->count;
6035 // add to entity indecies
6036 if_err( db->entityIdIndex->Modify(&ent->id,&nent.obj) );
6037 for( int i=0; i<nidx; ++i )
6038 nent->indexs[i] = ent->indexs[i];
6040 nent->indexs[nidx] = idx;
6041 nent->nidxs = nidx+1;
6042 if_err( db->deallocate(ent.obj, db->alloc_cache) );
6044 IndexBase *ib = db->indecies[idx];
6045 ib->st->key_type = kty;
6050 del_index(const char *nm)
6052 int idx; if_ret( idx = get_index(nm) );
6053 return del_index(idx);
6059 if( i <= idxId ) Fail(errInvalid);
6060 if( i >= ent->nidxs ) Fail(errInvalid);
6061 return del_index_(i);
6067 int idx = ent->indexs[i];
6068 if( idx < 0 ) Fail(errNotFound);
6069 if( idx >= db->root_info->indeciesUsed ) Fail(errNotFound);
6070 if( !db->indecies[idx] ) Fail(errNotFound);
6071 ent._wr(); ent->indexs[i] = -1;
6072 db->indecies[idx]->Clear();
6078 finit(Objects objects)
6081 Objects op = objects; objects = op->next;
6082 Entity *ep = op->obj->entity;
6083 for( varObjs nxt=0, vp=ep->vobjs; vp!=0; vp=nxt ) {
6084 nxt = vp->next; delete vp;
6090 void Db::ObjectLoc::
6093 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
6095 (op->*(vp->ref)).init();
6099 void Db::ObjectLoc::
6102 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
6104 (op->*(vp->ref)).del(entity);
6110 size(varObj &vobj, int sz)
6112 if( vobj.len != sz ) {
6113 AllocCache &cache = entity->ent->alloc_cache;
6114 if_ret( entity->db->reallocate(sz,vobj.loc,cache) );
6115 vobj.len = sz; entity->ent._wr(); _wr();
6122 last(Index idx, ObjectLoc &last_loc)
6125 if_ret( idx->Last(0,&id) );
6126 if_err( last_loc.FindId(id) );
6130 // get last index id on member accessed with ip
6132 last(const char *nm,int (ObjectLoc::*ip)())
6134 Index idx = entity->index(nm);
6135 if( !idx ) Err(errInvalid);
6136 ObjectLoc last_loc(*this);
6137 int ret = last(idx, last_loc);
6138 if( ret == errNotFound ) return 0;
6140 return (last_loc.*ip)();
6143 unsigned int Db::ObjectLoc::
6144 last(const char *nm,unsigned int (ObjectLoc::*ip)())
6146 Index idx = entity->index(nm);
6147 if( !idx ) Err(errInvalid);
6148 ObjectLoc last_loc(*this);
6149 int ret = last(idx, last_loc);
6150 if( ret == errNotFound ) return 0;
6152 return (last_loc.*ip)();
6156 #define cmpr_type(nm,ty) int Db::ObjectLoc:: \
6157 nm(const ty *ap, int asz, const ty *bp, int bsz) { \
6158 int n = asz < bsz ? asz : bsz; \
6160 if( *ap > *bp ) return 1; \
6161 if( *ap < *bp ) return -1; \
6162 ++ap; ++bp; n -= sizeof(ty); \
6164 if( asz > bsz ) return 1; \
6165 if( asz < bsz ) return -1; \
6169 cmpr_type(cmpr_char, char)
6170 cmpr_type(cmpr_uchar, unsigned char)
6171 cmpr_type(cmpr_short, short)
6172 cmpr_type(cmpr_ushort, unsigned short)
6173 cmpr_type(cmpr_int, int)
6174 cmpr_type(cmpr_uint, unsigned int)
6175 cmpr_type(cmpr_long, long)
6176 cmpr_type(cmpr_ulong, unsigned long)
6177 cmpr_type(cmpr_float, float)
6178 cmpr_type(cmpr_double, double)
6182 cmpr_media(const unsigned char *ap, int asz, const unsigned char *bp, int bsz)
6184 return mediaCmpr((uint8_t *)ap).cmpr((uint8_t *)bp);
6188 #define KeyFn(fn) { \
6190 if_fail( idx->fn ); \
6191 if_err( loc.FindId(id) ); \
6195 int Db::iKey::Find() KeyFn(Find(*this, &id))
6196 int Db::iKey::Locate(int op) KeyFn(Locate(op, *this,0, 0,&id))
6197 int Db::rKey::First() KeyFn(First(0,&id))
6198 int Db::rKey::Last() KeyFn(Last(0,&id))
6199 int Db::rKey::Next() KeyFn(Next(this,&id))
6200 int Db::rKey::First(pgRef &pos) KeyFn(First(pos,0,&id))
6201 int Db::rKey::Next(pgRef &pos) KeyFn(Next(pos,this,&id))
6202 int Db::rKey::Locate(int op) KeyFn(Locate(op, *this,0, 0,&id))
6204 int Db::ioCmpr(const void *a, const void *b, void *c)
6207 Page &pa = *db->get_page(*(pageId*)a);
6208 Page &pb = *db->get_page(*(pageId*)b);
6209 int64_t v = pa->io_addr - pb->io_addr;
6210 return v < 0 ? -1 : v > 0 ? 1 : 0;
6215 int npages = root_info->pageTableUsed;
6216 pageId *pages = new pageId[npages];
6217 for( int i=0 ; i<npages; ++i ) pages[i] = i;
6218 qsort_r(pages, npages, sizeof(*pages), ioCmpr, this);
6219 for( int i=0 ; i<npages; ++i ) {
6220 pgRef loc; char *op = 0;
6221 loc.id = pages[i]; loc.offset = 0;
6222 if_err( addrRead(loc, op) );
6228 int Db::load_indecies()
6230 for( int i=0 ; i<indecies_sz; ++i ) {
6231 Index idx = indecies[i];
6232 if( !idx || idx->st->rootPageId < 0 ) continue;
6233 if_err( idx->keyMap(idx->st->rootPageId, &Db::IndexBase::blockLoad) );