13 #include <sys/types.h>
21 extern "C" void traceback(const char *fmt,...)
23 FILE *fp = fopen("/tmp/x","a");
25 va_list ap; va_start(ap, fmt);
26 vfprintf(fp, fmt, ap);
28 int nptrs; void *buffer[100];
29 nptrs = backtrace(buffer, lengthof(buffer));
30 struct timeval now; gettimeofday(&now,0);
31 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
32 fprintf(fp,"*** stack %d\n",nptrs);
33 char **strings = backtrace_symbols(buffer, nptrs);
34 if( !strings ) return;
35 for( int i=0; i<nptrs; ++i ) fprintf(fp,"%s\n", strings[i]);
42 // use repeated string byte move for decompress operation.
43 // page table design should be changed to hierachical bit
44 // field, so that only the trace of page table blocks in
45 // the update path are written at commit time. Writing
46 // the entire page table on commit can be expensive.
48 // local memory allocator
49 void *Db::get_mem8_t(int id)
54 void *Db::new_mem8_t(int size, int &id)
56 return new uint8_t[size];
59 int Db::del_mem8_t(const void *vp, int id)
61 delete [] (uint8_t*)vp;
66 // shared memory allocator
70 void *vp = shmat(id, NULL, 0);
71 if( vp == (void*)-1 ) { perror("shmat"); sleep(1000000); vp = 0; }
76 new_shm8_t(int size, int &id)
78 id = shmget(IPC_PRIVATE, size, IPC_CREAT+0666);
79 if( id < 0 ) { perror("shmget"); return 0; }
80 uint8_t *addr = (uint8_t *)get_shm8_t(id);
81 if( addr ) shmctl(id, IPC_RMID, 0);
86 del_shm8_t(const void *vp, int id)
91 if( !shmctl(id, IPC_STAT, &ds) ) ret = ds.shm_nattch;
92 else { perror("shmctl"); sleep(1000000); ret = errIoStat; }
101 get_uint8_t(int id, int pg)
103 uint8_t *bp = (uint8_t *) get_mem(id);
108 new_uint8_t(int size, int &id, int pg)
110 void *vp = new_mem(size, id);
111 return (uint8_t *)vp;
115 del_uint8_t(const void *vp, int id, int pg)
117 return del_mem(vp, id);
126 if( err_callback ) err_callback(this, v);
130 //int Db::debug = DBBUG_ERR + DBBUG_FAIL;
131 int Db::debug = DBBUG_ERR;
135 void dmp(unsigned char *bp, int len)
137 unsigned char ch[16], lch[16];
138 int llen = lengthof(lch)+1;
140 unsigned char *adr = 0;
145 for( n=0; n<lengthof(ch) && --len>=0; ch[n++]=*bp++ );
146 if( (i=n) >= llen ) // check for a duplicate
147 while( --i>=0 && ch[i]==lch[i] );
148 if( i >= 0 || len < 0 ) { /* not a duplicate */
149 if( dups > 0 ) fprintf(stderr," ...%d\n",dups);
151 fprintf(stderr,"%p:",adr);
152 for( i=0; i<n; ++i ) fprintf(stderr," %02x",lch[i]=ch[i]);
153 for( i=n; i<lengthof(ch); ++i ) fprintf(stderr," ");
156 fprintf(stderr,"%c", ch[i]>=' ' && ch[i]<='~' ? ch[i] : '.');
157 fprintf(stderr,"\n");
170 "NoCmprFn", "NotFound", "Duplicate", "NoPage", "NoMemory",
171 "IoRead", "IoWrite", "IoSeek", "IoStat", "BadMagic", "Corrupt",
172 "Invalid", "Previous", "ObjEntity", "Limit", "User",
176 dmsg(int msk, const char *msg,...)
178 if( !(msk & debug) ) return;
179 #ifdef DEBUG_TIMESTAMPS
180 struct timeval now; gettimeofday(&now,0);
181 printf("@%ld.03%ld: ",now.tv_sec,now.tv_usec/1000);
187 FILE *fp = fopen("/tmp/x","a"); if( !fp ) return;
188 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
189 va_start(ap, msg); vfprintf(fp, msg, ap); va_end(ap);
194 _err_(int v,const char *fn,int ln)
197 dmsg(DBBUG_ERR,"%s:%d errored %d (%s)\n",fn,ln,v,errMsgs[-v]);
203 _fail_(int v,const char *fn,int ln)
205 dmsg(DBBUG_FAIL,"%s:%d failed %d (%s)\n",fn,ln,v,errMsgs[-v]);
210 Error(int v,const char *msg)
213 dmsg(DBBUG_ERR,"%s\n",msg);
220 printf("freeStoreIndex\n"); fdmp();
221 printf("addrStoreIndex\n"); admp();
222 printf("freeSpaceIndex\n"); edmp();
223 printf("addrSpaceIndex\n"); bdmp();
230 printf("dmp root_info->file_size %016lx\n",
231 root_info->file_size);
232 printf(" rootInfo root_info->transaction_id %d\n",
233 root_info->transaction_id);
234 printf(" root_info->root_info_addr/size %016lx/%08x\n",
235 root_info->root_info_addr,root_info->root_info_size);
236 printf(" root_info->last_info_addr/size %016lx/%08x\n",
237 root_info->last_info_addr,root_info->last_info_size);
238 printf(" root_info->indeciesUsed %d\n",
239 root_info->indeciesUsed);
240 printf(" alloc_cache: "); alloc_cache.dmp();
241 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
242 IndexBase *ib = indecies[idx];
244 printf(" idx %d. %-24s %s pop %5ld"
245 " root %-5d rhs %-5d ky/Dt %2d/%-2d ",
246 idx, &ib->st->name[0], ib->st->type==idxBin ? "bin" :
247 ib->st->type==idxStr ? "str" : "???", ib->st->count,
248 ib->st->rootPageId, ib->st->rightHandSide,
249 ib->st->keySz, ib->st->dataSz);
250 printf(" free %d/",ib->st->freeBlocks);
252 for( pageId id=ib->st->freeBlocks; id>=0; ++n ) {
253 pgRef loc; loc.id = id; loc.offset = 0;
254 keyBlock *kbb; addrRead_(loc,kbb);
255 if( (id=kbb->right_link()) < 0 ) break;
258 printf("%d pages\n",n);
261 printf(" Entities, count %ld\n", entityIdIndex->Count());
262 Entity e(this); EntityLoc &ent = e.ent; int eid;
263 if( !entityIdIndex->First(&eid,&ent.obj) ) do {
264 int nidxs = ent->nidxs;
265 printf(" id %d. %s maxId %d, recdSz %d, count %d, nidxs %d:",
266 eid, &ent->name[0], ent->maxId, ent->recdSz, ent->count, nidxs);
267 printf(" alloc_cache: "); ent->alloc_cache.dmp();
268 for( int i=0; i<nidxs; ++i ) {
269 int idx = ent->indexs[i];
270 printf(" %d(%s),", idx, idx < 0 ? "" :
271 !indecies[idx] ? "(err)" : &indecies[idx]->st->name[0]);
274 } while( !entityIdIndex->Next(&eid,&ent.obj) );
280 printf(" root_info->pageTableUsed %d\n",root_info->pageTableUsed);
281 for( int pid=0; pid<root_info->pageTableUsed; ++pid ) {
282 Page &pg = *get_page(pid);
283 if( !pg.addr && !pg->io_allocated && pg->io_addr==NIL &&
284 pg->trans_id < 0 && pg->shmid < 0 && !pg->flags &&
285 !pg->wr_allocated && pg->wr_io_addr==NIL ) continue;
286 printf(" pid %d. used %d, type %d, link %d, flags %04x addr %p\n",
287 pid, pg->used, pg->type, pg->link, pg->flags, pg.addr);
288 printf(" allocated %d io_alloc/io_addr %d/%016lx, wr_alloc/io_addr %d/%016lx\n",
289 pg->allocated, pg->io_allocated, pg->io_addr,
290 pg->wr_allocated, pg->wr_io_addr);
292 printf(" root_info->freePages %d",root_info->freePages);
294 for( pageId id=root_info->freePages; id>=0; ++n ) id = (*get_page(id))->link;
295 // printf(" %d\n",(id=(*get_page(id))->link));
296 printf(", pages = %d\n",n);
302 freeStoreRecord free;
303 if( !freeStoreIndex->First(&free,0) ) do {
304 printf("free=%04lx/%012lx\n", free.size,free.io_addr);
305 } while( !freeStoreIndex->Next(&free,0) );
311 addrStoreRecord addr;
312 if( !addrStoreIndex->First(&addr,0) ) do {
313 printf("addr=%012lx/%04lx\n", addr.io_addr,addr.size);
314 } while( !addrStoreIndex->Next(&addr,0) );
320 if( !indecies ) return; addrStoreRecord addr;
321 addrStoreRecord last; last.io_addr = 0; last.size = 0;
322 if( !addrStoreIndex->First(&addr,0) ) do {
323 if( last.io_addr > addr.io_addr ||
324 (last.io_addr == addr.io_addr && last.size >= addr.size ) ) {
325 printf("last=%012lx/%04lx, addr=%012lx/%04lx\n",
326 addr.io_addr, addr.size, last.io_addr, last.size);
330 } while( !addrStoreIndex->Next(&addr,0) );
336 if( !indecies ) return; freeStoreRecord free;
337 freeStoreRecord last; last.size = 0; last.io_addr = 0;
338 if( !freeStoreIndex->First(&free,0) ) do {
339 if( last.size > free.size ||
340 (last.size == free.size && last.io_addr >= free.io_addr ) ) {
341 printf("last=%04lx/%012lx, free=%04lx/%012lx\n",
342 free.size, free.io_addr, last.size, last.io_addr);
346 } while( !freeStoreIndex->Next(&free,0) );
352 freeSpaceRecord free;
353 if( !freeSpaceIndex->First(&free,0) ) do {
354 printf("free=%04lx %d/%d\n", free.size,free.id,free.offset);
355 } while( !freeSpaceIndex->Next(&free,0) );
361 addrSpaceRecord addr;
362 if( !addrSpaceIndex->First(&addr,0) ) do {
363 printf("addr=%d/%d %04lx\n", addr.id,addr.offset,addr.size);
364 } while( !addrSpaceIndex->Next(&addr,0) );
370 long store_allocated=0, store_used=0;
371 long loaded_allocated=0, loaded_used=0;
372 long written_allocated=0, written_used=0;
373 int pages_written=0, pages_loaded=0, pages_deleted=0;
374 for( int id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
375 Page &pg = *get_page(id);
376 store_allocated += pg->allocated;
377 store_used += pg->used;
380 loaded_allocated += pg->allocated;
381 loaded_used += pg->used;
383 if( pg->chk_flags(fl_wr) ) {
385 written_allocated += pg->allocated;
386 written_used += pg->used;
387 if( !pg.addr ) ++pages_deleted;
390 #define percent(a,b) (!(a) || !(b) ? 0.0 : (100.0*(a)) / (b))
391 long read_allocated = loaded_allocated - written_allocated;
392 long read_used = loaded_used - written_used;
393 int pages_read = pages_loaded - pages_written;
394 printf("\ncommit %d\n", root_info->transaction_id);
395 printf(" pages %8d/del %-4d alloc:%-12ld used:%-12ld %7.3f%%\n",
396 root_info->pageTableUsed, pages_deleted, store_allocated, store_used,
397 percent(store_used,store_allocated));
398 printf(" loaded %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
399 pages_loaded, percent(pages_loaded, root_info->pageTableUsed),
400 loaded_allocated, loaded_used, percent(loaded_used, loaded_allocated));
401 printf(" read %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
402 pages_read, percent(pages_read, root_info->pageTableUsed),
403 read_allocated, read_used, percent(read_used, read_allocated));
404 printf(" write %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
405 pages_written, percent(pages_written, root_info->pageTableUsed),
406 written_allocated, written_used, percent(written_used, written_allocated));
408 long store_avail=0, space_avail=0;
409 freeStoreRecord store;
410 if( !freeStoreIndex->First(&store,0) ) do {
411 store_avail += store.size;
412 } while( !freeStoreIndex->Next(&store,0) );
413 freeSpaceRecord space;
414 if( !freeSpaceIndex->First(&space,0) ) do {
415 space_avail += space.size;
416 } while( !freeSpaceIndex->Next(&space,0) );
417 printf(" file %-12ld", root_info->file_size);
418 printf(" store %12ld/%-7.3f%% space %12ld/%-7.3f%%\n",
419 store_avail, percent(store_avail, root_info->file_size),
420 space_avail, percent(space_avail, root_info->file_size));
433 return syscall(SYS_sched_yield);
439 return syscall(SYS_gettid);
446 while( (ret=zfutex(FUTEX_WAKE, nwakeups)) < 0 );
451 zwait(int val, timespec *ts)
453 return zfutex(FUTEX_WAIT, val, ts);
459 fprintf(stderr,"unlocking and not locked\n");
466 if( v || zxchg(1,loc) >= 0 ) do {
468 } while ( zxchg(1,loc) >= 0 );
473 zunlock(int nwakeups)
482 zdecr(loc); lk.lock(); lk.unlock(); zincr(loc);
488 if( lk.loc >= 0 ) zwake(1);
494 lk.lock(); timespec ts = { 1, 0 };
495 int v; while( (v=loc) >= 0 ) zwait(v, &ts);
508 if( !db_info ) return -1;
509 if( is_locked() > 0 && db_info->owner == my_pid ) return 0;
511 //traceback(" attach_wr %d/%d/%d\n",my_pid,db_info->owner,db_info->last_owner);
512 db_info->last_owner = db_info->owner;
513 db_info->owner = my_pid;
521 //traceback(" attach_rd %d/%d\n",my_pid,db_info->owner);
528 if( !db_info ) return;
536 //traceback(" detach_rw %d\n",my_pid);
539 // persistent pageTable element initial constructor
540 void Db::PageStorage::
556 // non-persistent pageTable element initial constructor
574 // deletes storage next start_transaction
578 st->used = 0; st->set_flags(fl_wr);
582 // locked pageTable access
586 read_locked by(db_info->pgTblLk);
587 return get_Page(pid);
591 * Db::alloc_pageTable -- alloc page table
594 * sz int input page table size
598 alloc_pageTable(int sz)
600 write_blocked by(db_info->pgTblLk);
601 int n = pageTableHunks(sz) * pageTableHunkSize;
602 Page **pt = new Page*[n];
603 if( !pt ) Err(errNoMemory);
604 int info_id, info_sz = n*sizeof(PageStorage);
605 PageStorage *new_page_info = (PageStorage *)new_uint8_t(info_sz, info_id);
606 if( !new_page_info ) { delete pt; Err(errNoMemory); }
609 for( ; i<root_info->pageTableUsed; ++i ) {
611 new_page_info[i] = *pt[i]->st;
612 pt[i]->st = &new_page_info[i];
615 for( ; i<n; ++i ) pt[i] = 0;
616 db_info->page_info_id = page_info_id = info_id;
617 del_uint8_t(page_info); page_info = new_page_info;
618 delete [] pageTable; pageTable = pt;
619 pageTableAllocated = n;
624 * Db::new_page -- access/construct new page
627 * returns page id on success (>=zero), error code otherwise(< 0)
633 locked by(db_info->pgAlLk);
634 pageId id = root_info->freePages;
636 if( root_info->pageTableUsed >= pageTableAllocated ) {
637 int sz = root_info->pageTableUsed + root_info->pageTableUsed/2 + 1;
638 if_err( alloc_pageTable(sz) );
640 id = root_info->pageTableUsed;
641 Page * pp = new Page(*new(&page_info[id]) PageStorage());
642 if( !pp ) Err(errNoMemory);
644 page_table_sz = ++root_info->pageTableUsed;
647 Page &pg = *get_page(id);
648 root_info->freePages = pg->link;
649 new(&pg) Page(*new(&page_info[id]) PageStorage());
657 Page *pp = get_Page(id);
664 * Db::free_page -- link to root_info->freePages
667 * pid pageId input page id
673 Page &pg = *get_Page(pid);
677 pg->clr_flags(fl_wr | fl_rd);
678 pg->set_flags(fl_free);
679 pageId id = root_info->freePages;
681 Page *lpp = 0; // use sorted order
682 while( id >= 0 && id < pid ) {
690 root_info->freePages = pid;
695 lower_page(pageId mid)
697 locked by(db_info->pgAlLk);
698 pageId id = root_info->freePages;
700 Page *pp = 0, *lpp = 0;
702 if( id < lid ) { lid = id; lpp = pp; }
707 Page &pg = *get_Page(lid);
709 (*lpp)->link = pg->link;
711 root_info->freePages = pg->link;
720 get_index(const char *nm, CmprFn cmpr)
722 int idx = root_info->indeciesUsed;
723 while( --idx >= 0 ) {
724 IndexBase *ib = indecies[idx];
726 if( !strncmp(&ib->st->name[0],nm,nmSz) ) break;
728 if( idx >= 0 && cmpr && indecies[idx]->st->type == idxBin ) {
729 IndexBinary *bidx = (IndexBinary *)indecies[idx];
730 bidx->compare = cmpr;
731 bidx->bst->cmprId = findCmprFn(cmpr);
739 if( r < 0 || r >= root_info->indeciesUsed ) Fail(errInvalid);
740 if( !indecies[r] ) Fail(errNotFound);
741 return indecies[r]->Count();
745 alloc_indecies(int n)
747 IndexBase **it = new IndexBase*[n];
748 if( !it ) Err(errNoMemory);
750 IndexInfo *info = (IndexInfo *)new_uint8_t(n*sizeof(*info), info_id);
751 if( !info ) { delete it; Err(errNoMemory); }
752 memcpy(info, index_info, indeciesAllocated*sizeof(*info));
754 for( ; i<root_info->indeciesUsed; ++i ) {
755 if( !(it[i] = indecies[i]) ) continue;
756 switch( it[i]->st->type ) {
758 IndexBinary *bidx = (IndexBinary *)it[i];
759 bidx->st = info[i].bin;
760 bidx->bst = info[i].bin;
763 IndexString *sidx = (IndexString *)it[i];
764 sidx->st = info[i].str;
765 sidx->sst = info[i].str;
772 for( ; i<n; ++i ) it[i] = 0;
773 db_info->index_info_id = index_info_id = info_id;
774 del_uint8_t(index_info); index_info = info;
775 delete [] indecies; indecies = it;
776 indeciesAllocated = n;
784 while( idx < root_info->indeciesUsed && indecies[idx] )
786 if( idx >= indeciesAllocated ) {
787 int n = indeciesAllocated + indexTableHunkSize;
788 if( alloc_indecies(n) ) Err(errNoMemory);
790 if( idx >= root_info->indeciesUsed ) {
791 if( idx >= max_index_type ) Err(errLimit);
792 root_info->indeciesUsed = idx+1;
793 indecies_sz = root_info->indeciesUsed;
802 delete indecies[idx];
804 for( idx=root_info->indeciesUsed; idx>0 && indecies[idx-1]==0; --idx );
805 indecies_sz = root_info->indeciesUsed = idx;
809 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexBinaryInfo *bi)
811 IndexBaseStorage *st = new(b) IndexBaseStorage();
812 IndexBinaryStorage *bst = new(bi) IndexBinaryStorage();
813 IndexBinary *bidx = new IndexBinary(this, st, bst);
814 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
815 if( bidx ) delete bidx;
823 new_binary_index(const char *nm, int ksz, int dsz, CmprFn cmpr)
825 if( get_index(nm) >= 0 ) Err(errDuplicate);
826 int idx = new_index();
827 if( idx < 0 ) Err(errNoMemory);
828 IndexBinary *bidx = new IndexBinary(this, idx, ksz, dsz, cmpr);
829 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
833 if( nm ) strncpy(&bidx->st->name[0],nm,sizeof(bidx->st->name));
834 indecies[idx] = bidx;
839 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexStringInfo *si)
841 IndexBaseStorage *st = new(b) IndexBaseStorage();
842 IndexStringStorage *sst = new(si) IndexStringStorage();
843 IndexString *sidx = new IndexString(this, st, sst);
844 if( !sidx ) Err(errNoMemory);
850 new_string_index(const char *nm, int dsz)
852 if( get_index(nm) >= 0 ) Err(errDuplicate);
853 int idx = new_index();
854 if( idx < 0 ) Err(errNoMemory);
855 IndexString *sidx = new IndexString(this, idx, dsz);
860 if( nm ) strncpy(&sidx->st->name[0],nm,sizeof(sidx->st->name));
861 indecies[idx] = sidx;
866 * Db::IndexBinary::keyMap - map function on index pages
869 * s pageId input current id
871 * returns 0 on success,
872 * errNotFound if index is empty
873 * error code otherwise
876 int Db::IndexBinary::
877 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
880 keyBlock *sbb; Page *spp; char *sn;
881 if_err( db->indexRead(s,0,sbb,spp,sn) );
882 if( (r=sbb->right_link()) >= 0 ) {
883 int lkdSz = kdSz + sizeof(pageId);
884 int n = spp->iused() / lkdSz;
885 for( int i=0; i<n; ++i ) {
886 pageId l = readPageId(sn);
887 if_ret( keyMap(l,fn) );
890 if_ret( keyMap(r,fn) );
892 if_err( (this->*fn)(s) );
897 * Db::IndexBinary::setLastKey -- conditionally update lastAccess
900 * s pageId input page of last insert
901 * u pageId input rightLink of page
902 * k int input insert offset in block
905 void Db::IndexBinary::
906 setLastKey(pageId s, pageId u, int k)
908 if( keyInterior < 0 ) {
916 lastAccess.offset = sizeof(keyBlock) + k;
921 * Db::IndexBinary::keyLocate -- generalized database key retreival
924 * s pageId input current id
925 * cmpr CmprFn input key compare function
927 * returns 0 on success
928 * errNotFound if not found,
929 * error code otherwise
932 int Db::IndexBinary::
933 keyLocate(pgRef &last, pageId s, int op,void *ky,CmprFn cmpr)
935 int ret = errNotFound;
936 keyBlock *sbb; Page *spp; char *sn;
937 if_err( db->indexRead(s,0,sbb,spp,sn) );
938 int len = spp->iused();
940 if( sbb->right_link() >= 0 )
941 lkdSz += sizeof(pageId);
945 // binary search block for key
946 while( (r - l) > 1 ) {
949 if( sbb->right_link() >= 0 )
952 int n = cmpr((char*)ky,kn);
954 if( op >= keyLE && op <= keyGE ) {
956 last.offset = sizeof(keyBlock) + k;
959 if( op == keyLE || op == keyGT ) n = 1;
961 if( n > 0 ) l = i; else r = i;
965 int k = op < keyEQ ? l*lkdSz : (r < len ? r : -1);
966 if( op != keyEQ && k >= 0 ) {
967 if( sbb->right_link() >= 0 )
970 last.offset = sizeof(keyBlock) + k;
974 if( (s = sbb->right_link()) >= 0 ) {
975 if( r < len ) s = readPageId(sn+r);
977 ret = keyLocate(last,s,op,ky,cmpr);
978 if( k == 0 ) ret = 0;
985 * Db::IndexBinary::Locate -- interface to generalized database key retreival
988 * op int input key relation in keyLT..keyGT
989 * key void * input retreival key image
990 * cmpr CmprFn input retreival key image
991 * rtnKey void * output resulting key value
992 * rtnData void * output resulting recd value
994 * returns 0 on success
995 * errNotFound on not found,
996 * error code otherwise
999 int Db::IndexBinary::
1000 refLocate(pgRef &loc, int op, void *key, CmprFn cmpr)
1002 if( st->rootPageId == NIL )
1004 if( op == keyEQ ) op = keyLE;
1005 if( !cmpr ) cmpr = compare;
1006 if_fail( keyLocate(loc,st->rootPageId,op, key,cmpr) );
1012 int Db::IndexBinary::
1013 Locate(int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
1016 if_fail( refLocate(last, op, key, cmpr) );
1018 if_err( db->addrRead_(last,kp) );
1020 memmove(rtnKey,kp,st->keySz);
1022 memmove(rtnData,kp+st->keySz,st->dataSz);
1027 * Db::IndexBinary::chkFind - check lastAccess block for key
1030 * key char * input key image
1031 * last pgRef * input last position loc
1033 * returns 0 if not found
1034 * error code otherwise
1037 int Db::IndexBinary::
1038 chkFind(pgRef &loc, char *key)
1041 if( s < 0 ) return 0; // must be valid block
1042 keyBlock *sbb; Page *spp; char *sn;
1043 if_err( db->indexRead(s,0,sbb,spp,sn) );
1044 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1045 int slen = spp->iused();
1046 int k = loc.offset - sizeof(keyBlock);
1047 if( k < 0 || k > slen ) return 0; // must be inside/end of block
1048 int cmpr0 = k>=slen ? -1 : compare(key,sn+k); // compare last access
1049 if( cmpr0 ) { // not found here
1051 if( cmpr0 < 0 ? (k-=kdSz) < 0 : (k+=kdSz) >= slen ) return 0;
1052 int cmpr1 = compare(key,sn+k);
1053 if( !cmpr1 ) goto xit; // found here
1055 if( l >= slen ) return 0; // no curr
1056 if( cmpr0 < 0 ) Fail(errNotFound); // btwn prev/curr
1058 cmpr1 = compare(key,sn+k);
1059 if( !cmpr1 ) goto xit; // found here
1060 if( cmpr1 > 0 ) return 0; // key after last in block
1063 if( cmpr0 > 0 ) Fail(errNotFound); // btwn curr/next
1065 cmpr1 = compare(key,sn);
1066 if( !cmpr1 ) goto xit; // found here
1067 if( cmpr1 < 0 ) return 0; // key before first in block
1069 return errNotFound; // key in block range, but not found
1072 loc.offset = sizeof(keyBlock) + k;
1077 * Db::IndexBinary::keyFind -- database unique key retreival
1080 * s pageId input current id
1082 * returns 0 on success
1083 * errNotFound on not found,
1084 * error code otherwise
1087 int Db::IndexBinary::
1088 keyFind(pgRef &loc, void *ky, pageId s)
1091 keyBlock *sbb; Page *spp; char *sn;
1092 if_err( db->indexRead(s,0,sbb,spp,sn) );
1094 if( sbb->right_link() >= 0 )
1095 lkdSz += sizeof(pageId);
1096 int len = spp->iused();
1100 // binary search block for key
1101 while( (r - l) > 1 ) {
1104 if( sbb->right_link() >= 0 )
1105 k += sizeof(pageId);
1107 int n = compare((char*)ky,kn);
1110 loc.offset = sizeof(keyBlock) + k;
1113 if( n > 0 ) l = i; else r = i;
1116 if( (s = sbb->right_link()) < 0 ) break;
1117 if( (r*=lkdSz) < len ) s = readPageId(sn+r);
1124 * Db::IndexBinary::Find -- interface to database unique key retreival
1127 * key void * input retreival key image
1128 * rtnData void * output resulting recd value
1130 * returns 0 on success
1131 * errNotFound on not found,
1132 * error code otherwise
1135 int Db::IndexBinary::
1136 refFind(pgRef &loc, void *ky)
1138 if( st->rootPageId == NIL )
1140 pageId r = st->rootPageId;
1144 if( CHK cFindCount > 2 ) ret = 1; }
1145 if( ret ) { // try the easy way
1146 ret = chkFind(loc, (char *)ky);
1147 if( ret == errNotFound ) {
1148 r = loc.id; ret = 0;
1152 if( !ret ) { // try the hard way
1153 if_fail( keyFind(loc,ky,r) );
1160 int Db::IndexBinary::
1161 Find(void *ky, void *rtnData)
1164 if_fail( refFind(last, ky) );
1166 if_err( db->addrRead_(last,kp) );
1168 memmove(rtnData,kp+st->keySz,st->dataSz);
1173 int Db::IndexBinary::
1174 chkInsert(void *key, void *data)
1177 char *ky = (char *)key;
1178 pageId s = lastInsert.id;
1179 if( s < 0 || cInsCount < 2 ) return 0; /* >= 2 in a row */
1180 keyBlock *sbb; Page *spp; char *sn;
1181 if_err( db->indexRead(s,1,sbb,spp,sn) );
1182 if( sbb->right_link() >= 0 ) return 0; /* must be exterior */
1183 int slen = spp->iused();
1184 int k = lastInsert.offset - sizeof(keyBlock);
1186 char *kn = kp + kdSz;
1187 char *rp = sn + slen;
1188 int n = compare(ky,kp);
1189 if( n == 0 ) Fail(errDuplicate);
1190 if( n > 0 ) { /* after last one */
1191 if( kn >= rp ) { /* no next one */
1192 if( st->rightHandSide == s )
1197 if( n == 0 ) Fail(errDuplicate);
1199 rhs = 1; /* before next one */
1202 if( !rhs ) return 0; /* not a hit */
1203 if( spp->iallocated()-slen < kdSz ) return 0; /* doesnt fit */
1204 if( rp > kn ) memmove(kn+kdSz,kn,rp-kn); /* move data up */
1205 memmove(kn,key,st->keySz);
1206 memmove(kn+st->keySz,data,st->dataSz); /* add new key/data */
1207 spp->iused(slen + kdSz);
1210 lastAccess.offset = sizeof(keyBlock) + kn-sn;
1215 * Db::IndexBinary::keyInsert - add unique key to database
1218 * s pageId input current id
1220 * iky char * input assembled insertion key
1222 * returns 0 on success,
1223 * errDuplicate if key already exists in database
1224 * error code otherwise
1227 int Db::IndexBinary::
1228 keyInsert(pageId s, pageId &t)
1231 /* mark no continued insertion, and check end of search */
1232 /* if not end, read current pageId, search for key */
1233 /* return error if found. */
1234 keyBlock *sbb; Page *spp; char *sn;
1235 if_err( db->indexRead(s,1,sbb,spp,sn) );
1237 pageId u = sbb->right_link();
1239 lkdSz += sizeof(pageId);
1240 int slen = spp->iused();
1241 int keyCount = slen / lkdSz;
1242 int maxKeysInBlock = spp->iallocated() / lkdSz;
1246 /* binary search block for key */
1247 while( (r - l) > 1 ) {
1250 if( sbb->right_link() >= 0 )
1251 k += sizeof(pageId);
1253 int n = compare(this->akey,kn);
1256 lastAccess.offset = sizeof(keyBlock) + k;
1259 if( n > 0 ) l = i; else r = i;
1262 /* not found in this pageId, continue search at lower levels. */
1263 /* process insertion if lower level splits ( t>=0 ). */
1264 int i = sizeof(pageId);
1268 u = readPageId(sn+k);
1269 if_ret( keyInsert(u, t) );
1270 if( t < 0 ) return 0;
1272 writePageId(sn+k,t);
1278 /* if current pageId is not full, insert into this pageId. */
1279 if( keyCount < maxKeysInBlock ) {
1282 memmove(kn+lkdSz,kn,slen-k);
1283 spp->iused(slen + lkdSz);
1284 memmove(kn,&iky[i],lkdSz);
1285 setLastKey(s,u,k); /* save insert loc */
1289 /* current pageId is full, split pageId and promote new parent key */
1290 keyBlock *tbb; Page *tpp; char *tn;
1291 if_err( blockAllocate(t,tbb,tpp,tn) );
1292 /* split point is middle, or end if inserting consecutive on rhs */
1293 int promoted = maxKeysInBlock/2;
1294 if( cInsCount > 2 && s == st->rightHandSide )
1295 promoted = maxKeysInBlock-1;
1297 int tlen = slen - promoted;
1298 if( st->rightHandSide == s ) st->rightHandSide = t;
1300 if( k <= promoted ) { /* at or left of promoted key */
1301 if( k != promoted ) { /* not promoting current key */
1302 kn = sn+promoted-lkdSz;
1303 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1305 memmove(kn+lkdSz,kn,promoted-lkdSz-k);
1306 memmove(kn,&iky[i],lkdSz); /* insert new key */
1307 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1308 setLastKey(s,u,k); /* save insert loc */
1310 memmove(tn,sn+promoted,tlen);
1312 else { /* key is > center */
1314 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1315 int j = k - promoted - lkdSz;
1316 memmove(tn,kn+=lkdSz,j);
1317 memmove(kn=tn+j,&iky[i],lkdSz); /* insert new key */
1318 setLastKey(t,u,j); /* save insert loc */
1319 memmove(kn+=lkdSz,sn+k,slen-k);
1320 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1323 /* set newly split blocks half full, and set new links. */
1327 tbb->right_link(sbb->right_link());
1328 sbb->right_link(readPageId(&iky[0]));
1329 writePageId(&iky[0],s);
1330 /* if root is splitting, create new left */
1331 if( s == st->rootPageId ) {
1332 keyBlock *ubb; Page *upp; char *un;
1333 if_err( blockAllocate(u,ubb,upp,un) );
1334 memmove(un,sn,slen);
1336 ubb->right_link(sbb->right_link());
1337 writePageId(&iky[0],u);
1338 k = st->keySz + st->dataSz + sizeof(pageId);
1339 memmove(sn,&iky[0],k);
1342 setLastKey(s,t,sizeof(pageId));
1344 /* t >= 0 indicates continued insertion, return success */
1348 void Db::IndexBinary::
1349 makeKey(char *cp,char *key,int l,char *recd,int n)
1351 writePageId(cp,NIL);
1352 memmove(cp+=sizeof(pageId),key,l);
1353 if( recd ) memmove(cp+=l,recd,n);
1357 * Db::Insert - interface to database unique key insertion
1360 * key void * input key image
1361 * data void * input recd value
1363 * returns 0 on success,
1364 * errDuplicate if key already exists in database
1365 * error code otherwise
1368 int Db::IndexBinary::
1369 Insert(void *key, void *data)
1371 if_err( MakeRoot() );
1374 if( CHK cInsCount > 2 ) { // try the easy way
1375 ret = chkInsert(key,data);
1378 if( !ret ) { // try the hard way
1379 makeKey(&iky[0],this->akey=(char *)key,st->keySz,(char *)data,st->dataSz);
1380 pageId t = NIL; lastAccess.id = NIL;
1381 if_ret( keyInsert(st->rootPageId, t) );
1383 if( keyInterior > 0 ) lastAccess.id = NIL;
1390 * Db::keyBlockUnderflow -- balences/merges blocks after a deletion
1393 * t int output continuation flag
1394 * lbb keyBlock * input left sibling keyBlock
1395 * p pageId input parent keyBlock id
1396 * pbb keyBlock * input parent keyBlock
1397 * pi int input deletion key offset parent
1400 * returns 0 on success, errorcode otherwise
1403 int Db::IndexBinary::
1404 keyBlockUnderflow(int &t,keyBlock *lbb,pageId p,keyBlock *pbb,int pi)
1410 int psiz = kdSz + sizeof(pageId);
1412 * if deletion was at right end of block
1413 * back up one key otherwise use this key
1415 char *pn = (char *)(pbb+1);
1416 Page *ppp = db->get_page(p);
1417 int plen = ppp->iused();
1418 if( pi >= plen ) { /* read lt side */
1419 r = pbb->right_link();
1422 l = readPageId(pn+pi);
1423 if_err( db->indexRead(l,1,lbb) );
1425 else { /* read rt side */
1426 l = readPageId(pn+pi);
1428 r = i >= plen ? pbb->right_link() : readPageId(pn+i);
1429 if_err( db->indexRead(r,1,rbb) );
1432 int lsz = lbb->right_link() >= 0 ? sizeof(pageId) : 0;
1433 int lkdSz = kdSz + lsz;
1434 char *ln = (char *)(lbb+1);
1435 Page *lpp = db->get_page(l);
1436 int llen = lpp->iused();
1437 char *rn = (char *)(rbb+1);
1438 Page *rpp = db->get_page(r);
1439 int rlen = rpp->iused();
1442 * if both lt&rt blocks+parent key all fit in one block, merge them
1444 int n = llen + rlen;
1445 if( n <= rpp->iallocated()-lkdSz ) { /* merge */
1446 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1447 memmove(ln+llen,pn+pi+sizeof(pageId)-lsz,lkdSz);
1449 memmove(ln+llen,rn,rlen); /* move right to left */
1451 lbb->right_link(rbb->right_link());
1453 memmove(pn+pi,pn+i,plen-i); /* remove parent key */
1455 if( plen == 0 && p == st->rootPageId ) { /* totally mashed root */
1456 if( st->rightHandSide == r ) st->rightHandSide = p;
1457 if_err( blockFree(r) );
1458 memmove(pn,ln,llen); /* copy to parent page */
1459 pbb->right_link(lbb->right_link());
1461 if_err( blockFree(l) );
1464 if( r != pbb->right_link() ) /* not rightLink */
1465 writePageId(pn+pi,l); /* must be next key */
1468 if( st->rightHandSide == r ) st->rightHandSide = l;
1469 if_err( blockFree(r) );
1473 lastAccess.id = NIL;
1474 return 0; /* continue underflow */
1478 if( tsiz < lkdSz ) tsiz = lkdSz; /* slosh threshold */
1479 if( plen > psiz && plen > tsiz ) t = 0; /* discontinue underflow */
1480 if( (i=(n/=2)-llen) >= tsiz || !llen ) { /* slosh left */
1481 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1482 k = pi+sizeof(pageId);
1483 memmove(kn=ln+llen,pn+k-lsz,lkdSz); /* move parent to left */
1485 memmove(kn+=lkdSz,rn,i*=lkdSz); /* migrate keys left */
1486 llen += i+lkdSz; kn = rn+i;
1487 if( lbb->right_link() >=0 )
1488 lbb->right_link(readPageId(kn));
1489 writePageId(pn+pi,l); /* change parent key */
1490 memmove(pn+k,kn+lsz,kdSz);
1491 kn += lkdSz; i += lkdSz;
1492 memmove(rn,kn,rlen-=i); /* migrate keys left */
1494 else if( (i=n-rlen) >= tsiz || !rlen ) { /* slosh right */
1495 i /= lkdSz; i *= lkdSz;
1496 memmove(kn=rn+i,rn,rlen);
1497 rlen += i; /* migrate keys right */
1498 writePageId(pn+pi,lbb->right_link());
1499 k = pi+sizeof(pageId);
1500 memmove(kn-=lkdSz,pn+k-lsz,lkdSz); /* move parent key */
1501 i -= lkdSz; n = llen-i;
1502 memmove(rn,kn=ln+n,i);
1503 kn -= lkdSz; /* migrate keys right */
1504 if( lbb->right_link() >=0 )
1505 lbb->right_link(readPageId(kn));
1506 memmove(pn+k,kn+lsz,kdSz); /* change parent key */
1507 writePageId(pn+pi,l);
1512 lastAccess.id = NIL;
1513 lpp->iused(llen); /* update lengths */
1520 * Db::IndexBinary::keyDelete - remove unique key from database
1523 * t int input/output check underflow flag
1524 * kp void * input key image to be removed
1525 * s pageId input current id
1526 * p pageId input parent id
1527 * pbb keyBlock * input parent
1528 * pi int input deletion key offset parent
1530 * returns 0 on success,
1531 * errNotFound if key does not exists in database
1532 * error code otherwise
1535 int Db::IndexBinary::
1536 keyDelete(int &t,void *kp,pageId s,pageId p,keyBlock *pbb,int pi)
1539 keyBlock *sbb; Page *spp; char *sn;
1540 if_err( db->indexRead(s,1,sbb,spp,sn) );
1541 int slen = spp->iused();
1542 t = 1; /* check underflow */
1543 if( idf == 0 ) { /* not interior deletion */
1545 if( sbb->right_link() >= 0 )
1546 lkdSz += sizeof(pageId);
1548 int r = slen / lkdSz;
1549 while( (r - l) > 1 ) { /* binary search for key */
1552 if( sbb->right_link() >= 0 )
1553 k += sizeof(pageId);
1555 int n = compare(this->akey,kn);
1557 if( sbb->right_link() < 0 ) { /* terminal key */
1559 memmove(kn,kn+lkdSz,slen-k);
1560 spp->iused(slen); /* delete key */
1561 lastAccess.id = s; /* lastAccess = key */
1562 lastAccess.offset = sizeof(keyBlock) + k;
1564 else { /* interior key */
1565 k -= sizeof(pageId);
1568 idf = 1; /* interior delete */
1569 if_ret( keyDelete(t,(void *)kn,u,s,sbb,k) );
1573 if( n > 0 ) l = i; else r = i;
1575 if( (u=sbb->right_link()) < 0 ) { /* could not find it */
1579 if( (r *= lkdSz) < slen )
1580 u = readPageId(sn+r);
1581 if_ret( keyDelete(t,kp,u,s,sbb,r) ); /* continue search */
1583 else { /* interior deletion */
1584 if( (u=sbb->right_link()) < 0 ) { /* equivalent exterior key */
1585 int i = slen - kdSz;
1586 char *kn = sn + i; /* translate to interior */
1587 memmove((char *)kp+sizeof(pageId),kn,kdSz);
1588 spp->iused(i); /* remove key */
1590 else { /* continue decending */
1591 if_ret( keyDelete(t,kp,u,s,sbb,slen) );
1595 if( t != 0 ) { /* underflow possible */
1597 t = 0; /* no parent, root */
1599 if_err( keyBlockUnderflow(t,sbb,p,pbb,pi) );
1604 int Db::IndexBinary::
1605 chkDelete(pgRef &loc, void *kp)
1609 ret = chkFind(loc, (char*)kp); // try last delete
1610 if( !ret && lastFind.id != loc.id ) {
1612 ret = chkFind(loc, (char*)kp); // try last find
1614 if( !ret ) return 0;
1615 if( ret == errNotFound ) ret = 0;
1618 keyBlock *sbb; Page *spp; char *sn;
1619 if_err( db->indexRead(s,1,sbb,spp,sn) );
1620 int dlen = spp->iused() - kdSz;
1621 if( dlen < kdSz ) return 0; // at least 1 key must remain
1622 if( !ret ) return errNotFound;
1623 spp->iused(dlen); // delete
1624 int k = loc.offset - sizeof(keyBlock);
1627 memmove(kp,kp+kdSz,dlen-k);
1633 * Db::IndexBinary::Delete - interface to remove unique key
1636 * key void * input key image to be removed
1638 * returns 0 on success,
1639 * errNotFound key does not exists in database
1640 * error code otherwise
1643 int Db::IndexBinary::
1646 if( st->rootPageId == NIL ) Fail(errNotFound);
1647 this->akey = (char *)key;
1649 pageId r = st->rootPageId;
1651 if( CHK cDelCount > 2 ) { // try the easy way
1653 ret = chkDelete(loc, key);
1654 if( ret == errNotFound ) { // in exterior block
1655 r = loc.id; ret = 0;
1658 if( !ret ) { // try the hard way
1659 makeKey(&iky[0],this->akey=(char *)key,st->keySz,0,0);
1660 lastAccess.id = NIL; int t = 1;
1661 (void)r; // use full search, r works but is not traditional
1662 if_fail( keyDelete(t,(void *)&iky[0],/*r*/st->rootPageId,0,0,0) );
1663 if_err( UnmakeRoot() );
1671 * Db::IndexBinary::keyFirst - access leftmost key in tree
1674 * s pageId input current id
1676 * returns 0 on success,
1677 * errNotFound if index is empty
1678 * error code otherwise
1681 int Db::IndexBinary::
1682 keyFirst(pgRef &loc, pageId s)
1686 if_err( db->indexRead(s,0,sbb) );
1687 if( sbb->right_link() < 0 ) break;
1688 char *sn = (char *)(sbb+1);
1693 loc.offset = sizeof(keyBlock);
1698 * Db::IndexBinary::First -- interface to database access leftmost key in tree
1701 * rtnKey void * output resulting key value
1702 * rtnData void * output resulting recd value
1704 * returns 0 on success
1705 * errNotFound on not found,
1706 * error code otherwise
1709 int Db::IndexBinary::
1710 First(void *rtnKey,void *rtnData)
1712 if( st->rootPageId == NIL ) Fail(errNotFound);
1714 if_fail( keyFirst(first, st->rootPageId) );
1716 if_err( db->addrRead_(first,kp) );
1718 memmove(rtnKey,kp,st->keySz);
1720 memmove(rtnData,kp+st->keySz,st->dataSz);
1722 lastNext = lastAccess = first; }
1727 * Db::IndexBinary::keyLast - access rightmost key in tree
1730 * s pageId input current id
1732 * returns 0 on success,
1733 * errNotFound if index is empty
1734 * error code otherwise
1737 int Db::IndexBinary::
1738 keyLast(pgRef &loc, pageId s)
1742 if_err( db->indexRead(s,0,sbb) );
1743 pageId u = sbb->right_link();
1748 Page *spp = db->get_page(s);
1750 int k = spp->iused() - kdSz;
1751 loc.offset = sizeof(keyBlock) + k;
1756 * Db::IndexBinary::Last -- interface to database access rightmost key in tree
1759 * rtnKey void * output resulting key value
1760 * rtnData void * output resulting recd value
1762 * returns 0 on success
1763 * errNotFound on not found,
1764 * error code otherwise
1767 int Db::IndexBinary::
1768 Last(void *rtnKey,void *rtnData)
1770 if( st->rootPageId == NIL ) Fail(errNotFound);
1772 if_fail( keyLast(last, st->rootPageId) );
1774 if_err( db->addrRead_(last,kp) );
1776 memmove(rtnKey,kp,st->keySz);
1778 memmove(rtnData,kp+st->keySz,st->dataSz);
1780 lastNext = lastAccess = last; }
1785 * Db::IndexBinary::Modify -- interface to database record modify
1788 * keyDef index input key definition block
1789 * key void * input retreival key image
1790 * recd void * input new recd value
1792 * returns 0 on success
1793 * errNotFound on not found,
1794 * error code otherwise
1797 int Db::IndexBinary::
1798 Modify(void *key,void *recd)
1800 if_fail( Find(key,0) );
1802 if_err( db->addrWrite_(lastAccess,bp) );
1803 memmove(bp+st->keySz,recd,st->dataSz);
1807 int Db::IndexBinary::
1808 chkNext(pgRef &loc, char *&kp)
1811 if( s < 0 ) return 0; // must be valid
1812 keyBlock *sbb; Page *spp; char *sn;
1813 if_err( db->indexRead(s,0,sbb,spp,sn) );
1814 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1815 int k = loc.offset - sizeof(keyBlock);
1816 if( k < 0 || k >= spp->iused() ) return 0; // curr must be in block
1817 if( (k+=kdSz) >= spp->iused() ) return 0; // next must be in block
1819 loc.offset = sizeof(keyBlock) + k;
1823 int Db::IndexBinary::
1824 keyNext(pgRef &loc, char *kp)
1826 if_fail( keyLocate(loc,st->rootPageId, keyGT,kp,compare) );
1831 * Db::IndexBinary::Next -- interface to sequential database key retreival
1834 * loc pgRef & input last known retreival key
1835 * rtnKey void * output resulting key value
1836 * rtnData void * output resulting recd value
1838 * returns 0 on success
1839 * error code otherwise
1842 int Db::IndexBinary::
1843 Next(pgRef &loc,void *rtnKey,void *rtnData)
1845 if( st->rootPageId == NIL ) Fail(errNotFound);
1847 int ret = CHK chkNext(loc,kp); // try the easy way
1851 if( !st->keySz && rtnKey ) // rtnKey is rKey class
1852 ky = (char *)rtnKey;
1854 if_err( db->addrRead_(loc,ky) );
1855 if_ret( keyNext(loc, ky) ); // try the hard way
1857 if_err( db->addrRead_(loc,kp) );
1859 memmove(rtnKey,kp,st->keySz);
1861 memmove(rtnData,kp+st->keySz,st->dataSz);
1867 void Db::IndexBinary::
1875 IndexBinary(Db *zdb, int zidx, int ksz, int dsz, CmprFn cmpr)
1876 : IndexBase(zdb, idxBin, zidx, ksz, dsz)
1878 compare = !cmpr && !ksz ? cmprKey : cmpr;
1879 bst = new(st+1) IndexBinaryStorage(zdb->findCmprFn(compare));
1880 iky = new char[st->blockSize/2+1];
1881 tky = new char[st->blockSize/2+1];
1886 IndexBinary(Db *zdb, IndexBaseStorage *b, IndexBinaryStorage *d)
1887 : IndexBase(zdb, *b)
1889 bst = new(d) IndexBinaryStorage();
1890 compare = !bst->cmprId && !b->keySz ? cmprKey : cmprFns[bst->cmprId];
1891 iky = new char[st->blockSize/2+1];
1892 tky = new char[st->blockSize/2+1];
1897 IndexBinary(IndexBase *ib, IndexBaseStorage *b, IndexBinaryStorage *d)
1898 : IndexBase(ib->db, *b)
1900 bst = new(d) IndexBinaryStorage();
1901 compare = !bst->cmprId && !ib->st->keySz ? cmprKey : cmprFns[bst->cmprId];
1913 int Db::IndexString::
1914 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
1917 keyBlock *sbb; Page *spp; char *sn;
1918 if_err( db->indexRead(s,0,sbb,spp,sn) );
1919 unsigned char *lp = (unsigned char *)sn;
1920 unsigned char *rp = lp + spp->iused();
1921 if( (r=sbb->right_link()) >= 0 ) {
1923 pageId l = getPageId(lp);
1924 lp += st->dataSz; ++lp;
1925 while( *lp++ != 0 );
1926 if_ret( keyMap(l,fn) );
1928 if_ret( keyMap(r,fn) );
1930 if_err( (this->*fn)(s) );
1935 * Db::IndexString::kpress -- compresses kp with prefix
1938 * kp unsigned char * input key string to compress
1939 * lp unsigned char * input left prefix
1940 * cp unsigned char * output compressed string
1942 * returns length of compressed string cp
1943 * safe to kpress buffer in place over cp or lp
1946 int Db::IndexString::
1947 kpress(unsigned char *kp, unsigned char *lp, unsigned char *cp)
1951 while( *kp == *lp ) {
1955 ch = *kp++; *cp++ = n;
1963 * divide tbfr length n into sectors l and s with rlink r
1964 * if i >= 0 then the insert point is i
1966 * promoted key in tky, return left sector
1969 int Db::IndexString::
1970 split(int n, int i, pageId s, pageId &l, pageId r)
1972 unsigned char lky[keysz];
1973 unsigned char *bp = &tbfr[0];
1974 unsigned char *lp = bp;
1975 unsigned char *rp = bp + n;
1976 unsigned char *kp = lp;
1977 unsigned char *tp = 0;
1978 pageId t = NIL, u = NIL;
1979 keyBlock *lbb; Page *lpp; char *ln;
1981 db->indexRead(l,1,lbb,lpp,ln) :
1982 blockAllocate(l,lbb,lpp,ln) );
1984 int blen = lpp->iallocated()-1 - st->dataSz;
1985 if( r >= 0 ) blen -= sizeof(pageId);
1986 if( n > blen ) n = blen;
1987 /* split point is middle, or end if inserting consecutive on rhs */
1989 if( i >= 0 && cInsCount > 2 && s == st->rightHandSide )
1991 unsigned char *mp = lp + promoted;
1992 // get promoted key in lky
1995 // expand key to lky
1996 if( r >= 0 ) u = getPageId(lp);
1997 tp = lp; lp += st->dataSz;
1998 for( i=*lp++; (lky[i++]=*lp++) != 0; );
1999 // stop copy if past mid pt and
2000 // remaining bytes + expanded key bytes fit in block
2002 if( kp != &tbfr[0] && lp >= mp && rlen+i <= blen )
2004 // copy promoted data/key
2005 unsigned char *tkp = tky;
2006 umemmove(tkp, tp, st->dataSz);
2008 // save end of left block, move to next key
2009 kp = bp; bp = lp; t = u;
2013 // must be at least 3 keys in tbfr (left,parent,right)
2014 if( !n || bp >= rp ) Err(errCorrupt);
2015 memmove(ln,&tbfr[0],n);
2018 // add first key in next block, order of moves important
2019 // memmove may be move up since key may expand past split
2021 if( r >= 0 ) putPageId(mp,u);
2022 umemmove(mp, tp, st->dataSz); // data recd
2023 *mp++ = 0; // first prefix is zero length
2024 memmove(mp+i, lp, rlen); // rest of block
2025 memmove(mp, &lky[0], i); // expanded key
2027 int slen = mp - &tbfr[0];
2029 * if root is splitting, allocate new right sibling
2030 * and set root to contain only new parent key.
2032 keyBlock *sbb; Page *spp; char *sn;
2033 if_err( db->indexRead(s,1,sbb,spp,sn) );
2034 if( s == st->rootPageId ) {
2035 keyBlock *rbb; Page *rpp; char *rn;
2036 if_err( blockAllocate(u,rbb,rpp,rn) );
2039 memmove(rn,&tbfr[0],slen);
2041 if( st->rightHandSide == s ) st->rightHandSide = r;
2042 mp = (unsigned char *)sn;
2044 umemmove(mp, kp=&tky[0], st->dataSz);
2046 for( *mp++=0; (*mp++=*kp++)!=0; );
2047 slen = mp - (unsigned char *)sn;
2050 memmove(sn, &tbfr[0], slen);
2056 int Db::IndexString::
2057 chkInsert(void *key, void *data)
2059 unsigned char *ky = (unsigned char *)key;
2060 pageId s = lastInsert.id;
2061 if( s < 0 ) return 0; // must be valid
2062 int n = ustrcmp(&lastInsKey[0],ky);
2063 if( !n ) Fail(errDuplicate);
2064 keyBlock *sbb; Page *spp; char *sn;
2065 if_err( db->indexRead(s,0,sbb,spp,sn) );
2066 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2067 int slen = spp->iused();
2068 int k = lastInsert.offset - sizeof(keyBlock);
2069 if( k < 0 || k >= slen ) return 0; // must be inside block
2070 unsigned char *bp = (unsigned char *)sn;
2071 unsigned char *lp = bp;
2072 unsigned char *rp = bp + k; // beginning to current
2073 unsigned char *tp = bp + slen;
2074 unsigned char nky[keysz]; nky[0] = 0;
2075 if( n < 0 ) { // past here
2076 ustrcpy(&nky[0],&lastInsKey[0]);
2079 else { // before here
2080 n = ustrcmp(bp+st->dataSz+1,ky);
2081 if( !n ) Fail(errDuplicate);
2082 if( n > 0 ) return 0; // before first
2083 unsigned char rb; rp += st->dataSz; // move past curr data
2084 for( int i=*rp++; (rb=*rp++) == lastInsKey[i] && rb != 0; ++i );
2085 if( rb ) return 0; // must match curr
2087 unsigned char lky[keysz]; lky[0] = 0;
2088 unsigned char *kp; n = 0;
2089 while( (kp=lp) < rp ) {
2090 lp += st->dataSz; // scan next
2091 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2092 n = ustrcmp(ky,&nky[0]);
2093 if( !n ) Fail(errDuplicate);
2094 if( n < 0 ) break; // btwn last,next
2095 ustrcpy(&lky[0],&nky[0]);
2097 if( lp >= tp && s != st->rightHandSide ) return 0;// not found, not rhs
2098 // recompress and compute storage demand
2099 int llen = kp - (unsigned char *)sn; // left
2100 int lsz = kpress(ky, &lky[0], &lky[0]); // lky = kpress new key
2101 int mlen = st->dataSz + lsz;
2102 int klen = mlen; // new key demand
2104 if( kp < rp ) { // if next key exists
2105 nsz = kpress(&nky[0], ky, &nky[0]);
2106 mlen += st->dataSz + nsz; // new+next key demand
2108 int rlen = tp - lp; // right
2109 int blen = llen + mlen + rlen; // left + demand + right
2111 if( blen > spp->iallocated() ) return 0; // if insertion wont fit
2113 spg->set_flags(fl_wr);
2114 if( kp < rp ) { // insert not at end of block
2115 memmove(kp+mlen, lp, rlen); // move right up
2116 memmove(kp+klen, kp, st->dataSz); // move next data up
2117 memmove(kp+klen+st->dataSz,&nky[0],nsz); // kpress next key
2119 k = kp - (unsigned char *)sn;
2120 lastAccess.id = lastInsert.id;
2121 lastAccess.offset = sizeof(keyBlock) + k;
2122 ustrcpy(&lastAccKey[0],ky);
2123 umemmove(kp,(unsigned char *)data,st->dataSz); // insert new key
2124 umemmove(kp,&lky[0],lsz);
2130 * insert - add node to compressed b-tree.
2131 * entry - tky - data/key to add
2132 * s - root of tree/subtree
2133 * t - NIL/discontinue or tky->left_link
2135 int Db::IndexString::
2136 keyInsert(pageId &t, pageId s)
2138 unsigned char lky[keysz]; // last key
2139 unsigned char nky[keysz]; // next key
2140 keyBlock *sbb; Page *spp; char *sn;
2141 if_err( db->indexRead(s,1,sbb,spp,sn) );
2142 t = NIL; // set discontinue insertion
2144 pageId r = sbb->right_link();
2146 int n = spp->iused();
2147 unsigned char *lp = (unsigned char *)sn; // rest of block
2148 unsigned char *rp = lp + n; // end of block
2149 unsigned char *kp = 0; // insertion point
2150 unsigned char *tkp = &tky[st->dataSz]; // tky = data/key
2152 while( (kp=lp) < rp ) { // search
2153 if( r >= 0 ) l = getPageId(lp);
2155 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2156 n = ustrcmp(&tkp[0], &nky[0]);
2158 if( n == 0 ) Fail(errDuplicate);
2161 // if non-terminal block, continue search at lower levels
2162 // if lower level splits( t>=0 ), process insertion
2164 if_ret( keyInsert(t, kp>=rp ? r : l) );
2165 if( t < 0 ) return 0; // discontinue
2168 // recompress and compute storage demand
2169 int llen = kp - (unsigned char *)sn; // left
2170 int lsz = kpress(tkp, &lky[0], &lky[0]); // lky = kpress new key
2171 int dsz = st->dataSz;
2172 if( r >= 0 ) dsz += sizeof(pageId); // link+data size
2173 int mlen = dsz + lsz;
2174 int klen = mlen; // new key demand
2176 if( kp < rp ) { // if next key exists
2177 nsz = kpress(&nky[0], &tkp[0], &nky[0]);
2178 mlen += dsz + nsz; // new+next key demand
2180 int rlen = rp - lp; // right
2181 int blen = llen + mlen + rlen; // left + demand + right
2183 if( blen <= spp->iallocated() ) { // if insertion will fit
2184 if( kp < rp ) { // insert not at end of block
2185 memmove(kp+mlen, lp, rlen); // move right up
2186 memmove(kp+klen, kp, dsz); // move next link/data up
2187 memmove(kp+klen+dsz,&nky[0],nsz); // kpress next key
2189 if( r >= 0 ) putPageId(kp, t);
2190 if( lastAccess.id < 0 ) { // update lastAccess
2192 int k = kp - (unsigned char *)sn;
2193 lastAccess.offset = sizeof(keyBlock) + k;
2194 ustrcpy(&lastAccKey[0],&tkp[0]);
2196 umemmove(kp,&tky[0],st->dataSz); // insert new key
2197 memmove(kp,&lky[0],lsz);
2202 unsigned char *bp = &tbfr[0]; // overflowed, insert to tbfr
2203 umemmove(bp,(unsigned char *)sn,llen);
2204 if( r >= 0 ) putPageId(bp, t);
2205 umemmove(bp,&tky[0],st->dataSz); // insert new key
2206 umemmove(bp,&lky[0],lsz);
2207 if( kp < rp ) { // insert not at end of block
2208 umemmove(bp,kp,dsz);
2209 umemmove(bp,&nky[0],nsz); // kpress next key
2210 umemmove(bp,lp,rlen); // copy rest of block
2213 if_err( split(blen,llen,s,t,r) ); // split block, and continue
2218 int Db::IndexString::
2219 Insert(void *key,void *data)
2221 if_err( MakeRoot() );
2223 if( CHK cInsCount > 2 ) { // try the easy way
2224 ret = chkInsert(key,data);
2227 if( !ret ) { // try the hard way
2228 unsigned char *tp = &tky[0];
2229 umemmove(tp,(unsigned char *)data,st->dataSz);
2230 ustrcpy(tp,(unsigned char*)key);
2231 pageId t = NIL; lastAccess.id = NIL;
2232 if_ret( keyInsert(t, st->rootPageId) );
2234 ustrcpy(&lastInsKey[0],&lastAccKey[0]);
2241 int Db::IndexString::
2245 if_err( db->indexRead(s,0,sbb) );
2246 char *sn = (char *)(sbb+1);
2248 while( sbb->right_link() >= 0 ) {
2250 if_err( db->indexRead(s,0,sbb) );
2251 sn = (char *)(sbb+1);
2255 lastAccess.offset = sizeof(keyBlock);
2256 unsigned char *kp = (unsigned char *)sn;
2257 ustrcpy(&lastAccKey[0],kp+st->dataSz+1);
2261 int Db::IndexString::
2262 First(void *rtnKey,void *rtnData)
2264 if( st->rootPageId == NIL ) Fail(errNotFound);
2265 if_ret( keyFirst(st->rootPageId) );
2267 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2270 if_err( db->addrRead_(lastAccess,kp) );
2271 memmove(rtnData,kp,st->dataSz);
2273 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2274 lastNext = lastAccess;
2278 int Db::IndexString::
2282 if_err( db->indexRead(s,0,sbb) );
2285 while( (u=sbb->right_link()) >= 0 ) {
2286 if_err( db->indexRead(s=u,0,sbb) );
2289 unsigned char lky[keysz];
2290 Page *spp = db->get_page(s);
2291 char *sn = (char *)(sbb+1);
2292 unsigned char *lp = (unsigned char *)sn;
2293 unsigned char *rp = lp + spp->iused();
2294 unsigned char *kp = 0;
2298 kp = lp; lp += st->dataSz;
2299 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2302 if( !kp ) Fail(errNotFound);
2303 int k = kp - (unsigned char *)sn;
2305 lastAccess.offset = sizeof(keyBlock) + k;
2306 ustrcpy(&lastAccKey[0],&lky[0]);
2310 int Db::IndexString::
2311 Last(void *rtnKey,void *rtnData)
2313 if( st->rootPageId == NIL ) Fail(errNotFound);
2314 if_ret( keyLast(st->rootPageId) );
2316 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2319 if_err( db->addrRead_(lastAccess,kp) );
2320 memmove(rtnData,kp,st->dataSz);
2322 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2323 lastNext = lastAccess;
2327 int Db::IndexString::
2328 chkFind(pgRef &loc, char *key, unsigned char *lkey, unsigned char *lkp)
2331 if( s < 0 ) return 0; // must be valid block
2332 keyBlock *sbb; Page *spp; char *sn;
2333 if_err( db->indexRead(s,0,sbb,spp,sn) );
2334 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2335 int slen = spp->iused();
2336 int k = loc.offset - sizeof(keyBlock);
2337 if( k < 0 || k >= slen ) return 0; // must be inside/end of block
2338 unsigned char *ky = (unsigned char *)key;
2339 unsigned char *bp = (unsigned char *)sn;
2340 unsigned char *kp = bp + k;
2341 unsigned char *rp = bp + slen;
2342 unsigned char *lp = kp; kp += st->dataSz;
2343 unsigned char *ip = &lkey[*kp++];
2344 while( kp<rp && *kp!=0 && *ip==*kp ) { ++ip; ++kp; }
2345 if( *ip || *kp++ ) return 0; // must match curr
2346 int n = ustrcmp(&lkey[0], ky);
2347 if( !n && !lkp ) goto xit; // found here, and no last key
2348 unsigned char lky[keysz];
2350 if( n > 0 ) { // before here
2351 rp = lp; lp = kp = bp;
2352 ip = lp + st->dataSz;
2353 if( *ip++ ) Err(errCorrupt);
2354 if( (n=ustrcmp(ip, ky)) > 0 ) return 0; // before first
2355 if( !n ) { lky[0] = 0; goto xit; } // found here, first
2357 ustrcpy(&lky[0], ip);
2359 lp = kp; kp += st->dataSz;
2360 unsigned char nky[keysz];
2361 ustrcpy(&nky[0], &lky[0]);
2362 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2363 n = ustrcmp(ky, &nky[0]);
2364 if( !n ) goto xit; // found here
2365 if( n < 0 ) Fail(errNotFound); // btwn prev,next
2366 ustrcpy(&lky[0], &nky[0]);
2368 return 0; // not in block
2370 if( lkp ) ustrcpy(lkp, &lky[0]);
2372 loc.offset = sizeof(keyBlock) + k;
2376 int Db::IndexString::
2377 keyFind(pgRef &loc,unsigned char *ky)
2379 unsigned char nky[keysz];
2381 for( pageId s=st->rootPageId; s>=0; ) {
2382 keyBlock *sbb; Page *spp; char *sn;
2383 if_err( db->indexRead(s,0,sbb,spp,sn) );
2384 unsigned char *lp = (unsigned char *)sn;
2385 unsigned char *rp = lp + spp->iused();
2387 pageId r = sbb->right_link();
2389 if( r >= 0 ) l = getPageId(lp);
2390 unsigned char *kp = lp;
2392 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2393 int n = ustrcmp(ky, &nky[0]);
2396 int k = kp - (unsigned char *)sn;
2397 loc.offset = sizeof(keyBlock) + k;
2400 if( n < 0 ) { r = l; break; }
2408 int Db::IndexString::
2409 refFind(pgRef &loc, void *key)
2411 if( st->rootPageId == NIL ) Fail(errNotFound);
2415 if( CHK cFindCount > 2 ) ret = 1; }
2416 if( ret ) { // try the easy way
2417 ret = chkFind(loc,(char *)key,&lastFndKey[0]);
2420 if( !ret ) { // try the hard way
2421 if_fail( keyFind(loc, (unsigned char *)key) );
2425 ustrcpy(&lastAccKey[0],(unsigned char *)key);
2426 ustrcpy(&lastFndKey[0],&lastAccKey[0]);
2427 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2431 int Db::IndexString::
2432 Find(void *key, void *rtnData)
2435 if_fail( refFind(last, key) );
2437 if( !db->addrRead_(last,kp) ) {
2439 memmove(rtnData,kp,st->dataSz);
2446 * remap sectors on underflow
2447 * s - parent sector, t - pageId if overflowed
2448 * k - parent key offset
2449 * updates tky with parent data/key
2452 int Db::IndexString::
2453 keyUnderflow(pageId s, pageId &t, int k)
2455 unsigned char lky[keysz], nky[keysz];
2456 keyBlock *sbb; Page *spp; char *sn;
2457 if_err( db->indexRead(s,1,sbb,spp,sn) );
2458 unsigned char *lp = (unsigned char *)sn;
2459 unsigned char *rp = lp + spp->iused();
2460 unsigned char *kp = lp + k;
2461 unsigned char *mp = &tbfr[0];
2462 // mark continue underlow
2464 pageId r = sbb->right_link();
2465 lky[0] = nky[0] = 0;
2466 // if underflow, copy to parent key offset
2468 putPageId(mp,getPageId(lp));
2469 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2470 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2472 // read l, key, r -- remove key from parent block
2473 unsigned char *tp = &tky[0];
2474 pageId l = getPageId(lp);
2475 umemmove(tp,lp,st->dataSz); lp += st->dataSz;
2476 ustrcpy(tp,&lky[0]);
2477 for( int i=*lp++; (tp[i]=*lp++) != 0; ++i );
2479 putPageId(mp, r=getPageId(lp));
2480 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2481 ustrcpy(&nky[0],tp);
2482 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2483 mp += kpress(&nky[0],&lky[0],mp);
2484 umemmove(mp,lp,rp-lp);
2486 int n = mp-&tbfr[0];
2487 memmove(sn,&tbfr[0],n);
2490 keyBlock *lbb; Page *lpp; char *ln;
2491 if_err( db->indexRead(l,1,lbb,lpp,ln) );
2492 lp = (unsigned char *)ln;
2493 rp = lp + lpp->iused();
2494 pageId m = lbb->right_link();
2495 mp = &tbfr[0]; nky[0] = 0;
2497 if( m >= 0 ) putPageId(mp,getPageId(lp));
2498 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2499 for( int i=*mp++=*lp++; (nky[i]=*mp++=*lp++) != 0; ++i );
2501 // parent key to tbfr
2502 if( m >= 0 ) putPageId(mp,m);
2503 umemmove(mp,&tky[0],st->dataSz);
2504 mp += kpress(tp,&nky[0],mp);
2506 keyBlock *rbb; Page *rpp; char *rn;
2507 if_err( db->indexRead(r,1,rbb,rpp,rn) );
2508 lp = (unsigned char *)rn;
2509 rp = lp + rpp->iused();
2510 m = rbb->right_link();
2512 if( m >= 0 ) putPageId(mp,getPageId(lp));
2513 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2514 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2515 mp += kpress(&nky[0],tp,mp);
2516 umemmove(mp,lp,rp-lp);
2519 if( n > rpp->iallocated() ) {
2520 // tbfr too big for r
2521 if_err( split(n, -1, r, l, m) );
2522 t = l; // start overflow
2525 if( s == st->rootPageId && !spp->iused() ) {
2526 // if root, delete r
2527 memmove(sn,&tbfr[0],n);
2530 if_err( blockFree(r) );
2531 st->rightHandSide = s;
2534 // update r with tbfr
2535 memmove(rn,&tbfr[0],n);
2539 if_err( blockFree(l) );
2545 * remap sectors on overflow
2546 * s - parent sector, k - parent key offset, o - insertion offset
2547 * t parent link on input,
2548 * t == DDONE on output, end overflow
2549 * tky with parent data/key
2551 int Db::IndexString::
2552 keyOverflow(pageId s, pageId &t, int k, int o)
2554 unsigned char lky[keysz], nky[keysz];
2555 keyBlock *sbb; Page *spp; char *sn;
2556 if_err( db->indexRead(s,1,sbb,spp,sn) );
2557 unsigned char *lp = (unsigned char *)sn;
2558 unsigned char *rp = lp + spp->iused();
2559 unsigned char *kp = lp + k;
2560 unsigned char *mp = &tbfr[0];
2561 pageId r = sbb->right_link();
2562 lky[0] = nky[0] = 0;
2563 // copy parent up to parent key to tbfr
2565 putPageId(mp,getPageId(lp));
2566 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2567 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2569 // if at insertion point, add new key
2572 unsigned char *tp = &tky[0];
2573 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2574 mp += kpress(kp=tp, &lky[0], mp);
2578 // copy parent key, if insertion - add tky, copy rest
2580 ustrcpy(&nky[0],kp);
2581 putPageId(mp,getPageId(lp));
2582 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2583 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2584 mp += kpress(&lky[0],&nky[0],mp);
2587 unsigned char *tp = &tky[0];
2588 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2589 mp += kpress(tp, &lky[0], mp);
2590 ustrcpy(&lky[0],tp);
2593 putPageId(mp,getPageId(lp));
2594 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2595 ustrcpy(&nky[0],&lky[0]);
2596 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2597 mp += kpress(&lky[0],&nky[0],mp);
2599 umemmove(mp,lp,rp-lp);
2601 int n = mp - &tbfr[0];
2602 // if overflow, split sector and promote new parent key
2603 if( n > spp->iallocated() ) {
2604 t = NIL; lastAccess.id = NIL;
2605 if_ret( split(n, -1, s, t, r) );
2611 memmove(sn,&tbfr[0],n);
2616 int Db::IndexString::
2617 keyRemap(pageId s, pageId &t, int k, int o)
2620 if_err( keyUnderflow(s,t,k) );
2624 if_err( keyOverflow(s,t,k,o) );
2629 * delete or replace key
2630 * s - starting sector, nky - replacement key if != 0
2631 * t - returned state -2/done,-1/underflow,pageid/overflow
2634 int Db::IndexString::
2635 keyDelete(pageId s, pageId &t)
2637 unsigned char lky[keysz], nky[keysz];
2638 unsigned char *ky = &akey[0];
2640 keyBlock *sbb; Page *spp; char *sn;
2641 if_err( db->indexRead(s,1,sbb,spp,sn) );
2642 unsigned char *bp = (unsigned char *)sn;
2643 unsigned char *lp = bp;
2644 unsigned char *rp = lp + spp->iused();
2645 unsigned char *kp = lp;
2646 unsigned char *mp = 0;
2649 pageId r = sbb->right_link();
2650 lky[0] = nky[0] = 0;
2651 // mark delete done and search
2653 while( (kp=lp) < rp ) {
2655 if( r >= 0 ) l = getPageId(lp);
2657 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2658 if( idf <= 0 && (n=ustrcmp(ky,&nky[0])) <= 0)
2660 ustrcpy(&lky[0],&nky[0]);
2664 if( !idf ) Fail(errNotFound);
2665 // found greatest node less than ky, delete and return with underflow
2666 // deleted key must be on rt side of block to be next to interior key
2667 unsigned char *dp = &dky[0];
2668 umemmove(dp,mp,st->dataSz);
2669 ustrcpy(dp,&nky[0]);
2671 t = NIL; // start underflow
2674 // not found in block, try next level
2675 int status = keyDelete(kp<rp ? l : r,t);
2678 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2681 else if( r < 0 || idf < 0 ) { // found here
2684 int dsz = st->dataSz;
2685 if( r >= 0 ) dsz += sizeof(pageId);
2686 unsigned char *tp = &lky[0];
2688 if( idf < 0 ) { // translating data/key
2689 lsz = kpress(&dky[st->dataSz],tp,tp); // kpress dky to lky
2691 tp = &dky[st->dataSz];
2694 unsigned char *np = lp;
2697 lp += dsz; // get next key
2698 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2699 if( r < 0 ) ustrcpy(&lastAccKey[0],&nky[0]);// new curr key
2700 nsz = kpress(&nky[0],tp,&nky[0]);
2704 int slen = llen + mlen + rlen;
2705 unsigned char *sp = &tbfr[0];
2707 if( (llen > 0 || rlen > 0) && // at least 3 data/keys
2708 slen <= spp->iallocated() ) { // and fits in block
2709 sp = bp; mp = kp; // edit in place
2712 umemmove(mp,bp,llen); // use tbfr, copy left
2713 if( np < rp ) { // next exists
2715 unsigned char *ip = mp;
2716 if( idf < 0 ) ip += dsz + lsz;
2717 if( sp == bp && tp > lp ) {
2718 memmove(tp,lp,rlen); // up copy/move right
2719 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2720 memmove(ip,&nky[0],nsz);
2723 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2724 memmove(ip,&nky[0],nsz);
2726 memmove(tp,lp,rlen); // down copy/move right
2729 if( idf < 0 ) { // move exterior key
2730 if(r >= 0) putPageId(mp,getPageId(kp));
2731 umemmove(mp,&dky[0],st->dataSz); // add dky data/key
2732 umemmove(mp,&lky[0],lsz);
2734 if( sp == bp ) { // in place
2738 lastAccess.id = s; // position to curr
2739 lastAccess.offset = sizeof(keyBlock) + llen;
2740 ustrcpy(&lastAccKey[0],ky);
2745 if( slen > spp->iallocated() ) {
2746 if_ret( split(slen, -1, s, t, r) ); // continue with overflow
2749 memmove(sn,&tbfr[0],slen); // continue with underflow
2755 // deleting nonterminal key, translate to greatest node less than ky
2757 int status = keyDelete(l,t);
2760 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2766 int Db::IndexString::
2769 if( st->rootPageId == NIL ) Fail(errNotFound);
2770 unsigned char lky[keysz]; lky[0] = 0;
2771 pgRef *last = &lastDelete;
2772 unsigned char *lkey = &lastDelKey[0];
2774 if( CHK cDelCount > 2 ) { // try the easy way
2775 if( lastFind.id >= 0 ) { // chk find/delete
2776 if( !ustrcmp((unsigned char *)key,&lastFndKey[0]) ) {
2777 last = &lastFind; lkey = &lastFndKey[0];
2780 ret = chkFind(*last,(char *)key,lkey,&lky[0]);
2784 pageId s = lastAccess.id;
2785 keyBlock *sbb; Page *spp; char *sn;
2786 if_err( db->indexRead(s,1,sbb,spp,sn) );
2787 unsigned char *bp = (unsigned char *)sn;
2788 int slen = spp->iused();
2789 int llen = lastAccess.offset - sizeof(keyBlock);
2790 unsigned char *kp = bp + llen; // curr data/key
2791 unsigned char *lp = kp;
2792 unsigned char *rp = bp + slen;
2794 lp += st->dataSz; ++lp; while( *lp++ ); // skip curr
2798 unsigned char *np = lp;
2799 unsigned char nky[keysz]; nky[0] = 0;
2801 lp += st->dataSz; // get next key
2802 ustrcpy(&nky[0],(unsigned char *)key);
2803 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2804 nsz = kpress(&nky[0],&lky[0],&lky[0]);
2805 mlen += st->dataSz + nsz;
2808 slen = llen + mlen + rlen;
2809 if( (llen > 0 || rlen > 0 ) && // at least 3 data/keys
2810 slen <= spp->iallocated() ) { // and fits in block
2811 if( np < rp ) { // right exists
2812 unsigned char *tp = kp + mlen;
2813 umemmove(kp,np,st->dataSz); // next data/key
2814 memmove(kp,&lky[0],nsz);
2815 if( tp != lp && rlen > 0 )
2816 memmove(tp,lp,rlen); // move right down
2819 ustrcpy(&lastAccKey[0],&nky[0]); // new curr key
2824 if( !ret ) { // try the hard way
2825 ustrcpy(&this->akey[0],(unsigned char*)key);
2826 lastAccess.id = NIL;
2827 pageId t = NIL; idf = 0;
2828 if_ret( keyDelete(st->rootPageId,t) );
2831 if_ret( keyDelete(st->rootPageId,t) );
2833 if_err( UnmakeRoot() );
2835 ustrcpy(&lastDelKey[0],&lastAccKey[0]);
2841 int Db::IndexString::
2842 keyLocate(pgRef &last,pageId s, int &t, int op,
2843 unsigned char *ky, CmprFn cmpr, unsigned char *rky)
2845 unsigned char lky[keysz], nky[keysz];
2846 keyBlock *sbb; Page *spp; char *sn;
2847 if_err( db->indexRead(s,0,sbb,spp,sn) );
2849 pageId r = sbb->right_link();
2850 unsigned char *lp = (unsigned char *)sn;
2851 unsigned char *rp = lp + spp->iused();
2852 unsigned char *kp = 0, *mp = 0;
2853 lky[0] = nky[0] = 0;
2856 while( (kp=lp) < rp ) {
2857 if( r >= 0 ) l = getPageId(lp);
2858 kp = lp; lp += st->dataSz;
2859 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2860 int n = cmpr((char *)ky,(char *)&nky[0]);
2861 if( op <= keyEQ ? n <= 0 : n < 0 ) break;
2862 ustrcpy(&lky[0],&nky[0]);
2867 int status = keyLocate(last, (kp<rp ? l : r), t, op, ky, cmpr, rky);
2868 if( !t && status == errNotFound ) status = 0;
2873 if( op == keyLT || op == keyGE ) {
2874 if( !mp ) Fail(errNotFound);
2876 ustrcpy(rky,&lky[0]);
2879 if( kp >= rp ) Fail(errNotFound);
2880 ustrcpy(rky,&nky[0]);
2883 int k = kp - (unsigned char *)sn;
2884 last.offset = sizeof(keyBlock) + k;
2891 int Db::IndexString::
2892 refLocate(pgRef &loc, int op,void *key,CmprFn cmpr, unsigned char *rkey)
2894 if( st->rootPageId == NIL ) Fail(errNotFound);
2895 if( op == keyEQ ) op = keyLE;
2896 if( !cmpr ) cmpr = cmprStr;
2898 if_fail( keyLocate(loc,st->rootPageId,t,op,(unsigned char*)key,cmpr, rkey) );
2901 ustrcpy(&lastAccKey[0], rkey);
2902 ustrcpy(&lastNxtKey[0],&lastAccKey[0]); }
2906 int Db::IndexString::
2907 Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData)
2909 pgRef last; uint8_t rkey[keysz];
2910 if_fail( refLocate(last, op, key, cmpr, rkey) );
2912 ustrcpy((unsigned char *)rtnKey, rkey);
2915 if_err( db->addrRead_(last,kp) );
2916 memmove(rtnData,kp,st->dataSz);
2922 int Db::IndexString::
2923 Modify(void *key,void *recd)
2925 if_fail( Find(key,0) );
2927 if_err( db->addrWrite_(lastAccess,bp) );
2928 memmove(bp,recd,st->dataSz);
2932 int Db::IndexString::
2933 keyNext(pgRef &loc, unsigned char *rky)
2935 unsigned char lky[keysz]; lky[0] = 0;
2937 keyBlock *sbb; Page *spp; char *sn;
2938 if_err( db->indexRead(s,0,sbb,spp,sn) );
2939 unsigned char *bp = (unsigned char *)sn;
2940 int k = loc.offset - sizeof(keyBlock);
2941 unsigned char *lp = bp;
2942 unsigned char *kp = bp + k;
2943 unsigned char *rp = bp + spp->iused();
2944 if( kp >= rp ) Err(errInvalid);
2945 pageId r = sbb->right_link();
2946 if( &loc != &lastNext ) { // find last key
2947 while( lp <= kp ) { // until past last
2948 if( r >= 0 ) lp += sizeof(pageId);
2949 bp = lp; lp += st->dataSz;
2950 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2954 ustrcpy(&lky[0],&lastNxtKey[0]);
2955 lp = kp; lp += st->dataSz; // verify lastNext
2956 unsigned char *ip = &lky[*lp++];
2957 while( lp<rp && *lp!=0 && *ip==*lp ) { ++ip; ++lp; }
2958 if( *ip || *lp++ ) Fail(errInvalid); // bad lastNxtKey
2960 if( r < 0 && lp < rp ) { // exterior and more data
2962 bp = lp; lp += st->dataSz;
2963 for( int i=*lp++; lp<rp && (rky[i]=*lp) != 0; ++i,++lp );
2964 if( *lp ) Err(errCorrupt);
2965 loc.offset = (bp-(unsigned char *)sn) + sizeof(keyBlock);
2969 if_fail( keyLocate(loc,st->rootPageId,t,keyGT,lky,cmprStr, rky) );
2973 int Db::IndexString::
2974 Next(pgRef &loc,void *rtnKey,void *rtnData)
2976 if( st->rootPageId == NIL ) Fail(errNotFound);
2977 unsigned char rky[keysz];
2978 if_ret( keyNext(loc, rky) );
2980 ustrcpy(&lastAccKey[0], rky);
2982 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2985 if_err( db->addrRead_(loc,kp) );
2986 memmove(rtnData,kp,st->dataSz);
2988 if( &loc == &lastNext )
2989 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2990 lastAccess = lastNext = loc; }
2994 void Db::IndexString::
3000 IndexString(Db *zdb, int zidx, int dsz)
3001 : IndexBase(zdb, idxStr, zidx, 0, dsz)
3003 sst = new(st+1) IndexStringStorage();
3004 dky = new unsigned char[st->dataSz+keysz+1];
3005 tky = new unsigned char[st->dataSz+keysz+1];
3006 tbfr = new unsigned char[3*st->blockSize];
3011 IndexString(Db *zdb, IndexBaseStorage *b, IndexStringStorage *d)
3012 : IndexBase(zdb, *b)
3015 dky = new unsigned char[st->dataSz+keysz+1];
3016 tky = new unsigned char[st->dataSz+keysz+1];
3017 tbfr = new unsigned char[3*st->blockSize];
3022 IndexString(IndexBase *ib, IndexBaseStorage *b, IndexStringStorage *d)
3023 : IndexBase(ib->db, *b)
3025 sst = new(d) IndexStringStorage();
3040 * Db::IndexBase::Clear - clear index
3044 * returns 0 on success,
3045 * error code otherwise
3051 while( st->freeBlocks >= 0 ) {
3052 keyBlock *kbb; pgRef loc;
3053 pageId id = st->freeBlocks;
3054 loc.id = id; loc.offset = 0;
3055 if_err( db->addrWrite_(loc,kbb) );
3056 st->freeBlocks = kbb->right_link();
3059 if( st->rootPageId >= 0 ) {
3060 if_err( keyMap(st->rootPageId, &Db::IndexBase::blockRelease) );
3061 st->rootPageId = st->rightHandSide = NIL;
3070 if( st->rootPageId == NIL ) {
3071 pageId u; keyBlock *ubb; Page *upp; char *un;
3072 if_err( blockAllocate(u,ubb,upp,un) );
3074 ubb->right_link(NIL);
3075 st->rootPageId = st->rightHandSide = u;
3083 pageId u = st->rootPageId;
3084 Page *upp = db->get_page(u);
3085 if( !upp->iused() ) {
3086 if_err( blockFree(u) );
3087 st->rootPageId = st->rightHandSide = NIL;
3092 void Db::IndexBase::
3095 if( lastAccess.id >= 0 && lastAccess.id == lastInsert.id )
3099 lastInsert = lastAccess;
3100 cDelCount = 0; lastDelete.id = NIL;
3101 cFindCount = 0; lastFind.id = NIL;
3102 lastOp = opInsert; lastNext.id = NIL;
3105 void Db::IndexBase::
3108 if( lastAccess.id >= 0 && lastAccess.id == lastDelete.id )
3112 lastDelete = lastAccess;
3113 cInsCount = 0; lastInsert.id = NIL;
3114 cFindCount = 0; lastFind.id = NIL;
3115 lastOp = opDelete; lastNext.id = NIL;
3118 void Db::IndexBase::
3119 chkLastFind(pgRef &last)
3121 if( last.id >= 0 && last.id == lastFind.id )
3132 IndexBaseType(int typ)
3135 memset(&name[0],0,sizeof(name));
3140 defaultBlockSizes[] = {
3142 defaultBinaryBlockSize, // idxBin
3143 defaultStringBlockSize, // idxStr
3147 IndexBaseRecd(int typ, int zidx, int ksz, int dsz)
3153 rightHandSide = NIL;
3155 count = 0; pad1 = 0;
3156 blockSize = defaultBlockSizes[typ];
3159 Db::IndexBaseStorage::
3160 IndexBaseStorage(int typ, int zidx, int ksz, int dsz)
3162 new((IndexTypeInfo *)this) IndexBaseType(typ);
3163 new((IndexRecdInfo *)this) IndexBaseRecd(typ, zidx, ksz, dsz);
3166 void Db::IndexBase::
3170 kdSz = st->keySz + st->dataSz;
3171 lastAccess.id = lastDelete.id = lastInsert.id =
3172 lastFind.id = lastNext.id = NIL;
3173 lastAccess.offset = lastDelete.offset = lastInsert.offset =
3174 lastFind.offset = lastNext.offset = 0;
3175 cFindCount = cDelCount = cInsCount = 0;
3179 IndexBase(Db *zdb, IndexBaseStorage &d)
3186 IndexBase(Db *db, int typ, int zidx, int ksz, int dsz)
3188 st = new(&db->index_info[zidx]) IndexBaseStorage(typ, zidx, ksz, dsz);
3199 /*** int Db::objectHeapInsert(int sz,int pg,int off)
3201 * insert a free space record into the entity heap
3204 * sz int record size
3206 * offset int record offset
3208 * returns zero on success, error code on failure
3212 objectHeapInsert(int sz,int id,int offset)
3214 freeSpaceRecord free;
3215 free.size = sz; free.id = id; free.offset = offset;
3216 if_err( freeSpaceIndex->Insert(&free,0) );
3217 addrSpaceRecord addr;
3218 addr.id = id; addr.offset = offset; addr.size = sz;
3219 if_err( addrSpaceIndex->Insert(&addr,0) );
3223 /*** int Db::objectHeapDelete(int sz,int pg,int off)
3225 * delete a free space record from the entity heap
3228 * sz int record size
3230 * offset int record offset
3232 * returns zero on success, error code on failure
3236 objectHeapDelete(int sz,int id,int offset)
3238 freeSpaceRecord free;
3239 free.size = sz; free.id = id; free.offset = offset;
3240 if_err( freeSpaceIndex->Delete(&free) );
3241 addrSpaceRecord addr;
3242 addr.id = id; addr.offset = offset; addr.size = sz;
3243 if_err( addrSpaceIndex->Delete(&addr) );
3247 /*** int Db::pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3249 * find existing persistent free storage.
3252 * size int input/output requested/allocated size
3253 * loc pgRef & output locator returned for new storage
3254 * cache AllocCache& output allocator cache to operate
3256 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3260 pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3262 freeSpaceRecord look, find;
3263 look.size = size; look.id = 0; look.offset = 0;
3264 int status = freeSpaceIndex->Locate(keyGE, &look, 0, &find, 0);
3265 if( status == errNotFound ) return 1;
3267 // if record is at offset 0, need extra space for prefix
3268 while( !find.offset && look.size+(int)sizeof(pagePrefix) > find.size ) {
3269 status = freeSpaceIndex->Next(&find, 0);
3270 if( status == errNotFound ) return 1;
3273 if_err( objectHeapDelete(find.size,find.id,find.offset) );
3275 loc.offset = find.offset ? find.offset : sizeof(pagePrefix);
3276 Page &pg = *get_page(loc.id);
3277 unsigned int ofs = loc.offset + look.size;
3278 if( ofs > pg->used ) pg->used = ofs;
3279 int sz = find.offset+find.size - ofs;
3280 if( sz >= min_heap_allocation ) {
3281 //if_err( objectHeapInsert(sz,find.id,ofs) );
3282 if_err( cache.Load(this, find.id, ofs, sz) );
3285 size = look.size + sz;
3289 /*** int Db::pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3291 * allocate new persistent free storage.
3294 * size int input/output requested/allocated size
3295 * loc pgRef & output locator returned for new storage
3296 * cache AllocCache& output allocator cache to operate
3298 * returns: zero on success, error code.
3302 pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3304 pageId id = new_page();
3306 unsigned int sz = size + sizeof(pagePrefix);
3307 sz = entityPages(sz) * entityPageSize;
3308 Page &pg = *get_page(id);
3309 if( pg->allocated != sz ) {
3315 loc.offset = sizeof(pagePrefix);
3316 int used = loc.offset + size;
3317 int free = pg->allocated - used;
3318 if( free >= min_heap_allocation ) {
3319 //if_err( objectHeapInsert(free,id,used) );
3320 if_err( cache.Load(this, id, used, free) );
3323 size = pg->allocated - loc.offset;
3327 /*** int Db::pgRefAllocate(int &size, pgRef &loc)
3329 * find persistent free storage. existing/new
3330 * does not allocate virtual memory storage
3333 * size int input/output requested/allocated size
3334 * loc pgRef & output locator returned for new storage
3335 * cache AllocCache& output allocator cache to operate
3337 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3341 pgRefAllocate(int &size, pgRef &loc, AllocCache &cache)
3343 int status = pgRefGet(size, loc, cache);
3346 if_err( pgRefNew(size, loc, cache) );
3350 /*** int Db::objectAllocate(int typ, int &size, pgRef &loc)
3352 * find persistent free storage. existing/new
3353 * does allocate virtual memory storage
3354 * inits pagePrefix, inits allocPrefix
3357 * type int input entity type id
3358 * size int input/output requested/allocated size
3359 * loc pgRef & output locator returned for new storage
3360 * cache AllocCache& output allocator cache to operate
3362 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3366 objectAllocate(int typ, int &size, pgRef &loc, AllocCache &cache)
3368 int status = cache.Get(this, size, loc);
3371 if_err( pgRefAllocate(size, loc, cache) );
3372 Page &pg = *get_page(loc.id);
3374 pagePrefix *bpp; pgRef ref;
3375 ref.id = loc.id; ref.offset = 0;
3376 if_err( addrWrite_(ref,bpp) );
3377 bpp->magic = page_magic;
3378 bpp->type = pg_entity + typ;
3379 pg->type = bpp->type;
3381 unsigned int sz = loc.offset + size;
3388 /*** int Db::objectFree(pgRef &loc)
3390 * Purpose: Immediate release of entity memory
3393 * loc pgRef & Page/offset
3395 * returns zero on success, error code on failure
3398 int Db::objectFree(pgRef &loc)
3401 if_err( addrRead_(loc,mp) );
3402 if( mp->magic != entity_magic ) Err(errBadMagic);
3403 addrSpaceRecord addr, prev, next;
3404 /* get prev, next addrSpace free heap items near this addr */
3405 addr.size = mp->size;
3407 addr.offset = loc.offset;
3408 int status = addrSpaceIndex->Locate(keyLT,&addr,0,&prev,0);
3409 if( status == errNotFound ) {
3411 status = addrSpaceIndex->Locate(keyGT,&addr,0,&next,0);
3414 status = addrSpaceIndex->Next(&next,0);
3415 if( status == errNotFound ) {
3420 /* merge with prev if possible */
3421 if( prev.id == addr.id &&
3422 prev.offset + prev.size == addr.offset ) {
3423 if_err( objectHeapDelete(prev.size,prev.id,prev.offset) );
3424 addr.offset = prev.offset;
3425 addr.size += prev.size;
3427 /* merge with next if possible */
3428 if( addr.id == next.id &&
3429 addr.offset + addr.size == next.offset ) {
3430 if_err( objectHeapDelete(next.size,next.id,next.offset) );
3431 addr.size += next.size;
3433 /* reduce used block bytes if possible */
3434 Page &pg = *get_page(loc.id);
3435 if( pg->used <= addr.offset + addr.size )
3436 pg->used = addr.offset;
3437 // if only page prefix, release page, otherwise save new fragment
3438 if( pg->used == sizeof(pagePrefix) )
3441 if_err( objectHeapInsert(addr.size,addr.id,addr.offset) );
3445 /*** int Db::blockAllocate(pageId *pid, keyBlock *&pkbb)
3447 * allocate persistent storage.
3450 * pageId & pid page allocated
3451 * keyBlock *& pkbb keyblock* pointer
3455 blockAllocate(pageId &pid, keyBlock *&pkbb)
3457 locked by(db->db_info->blkAlLk);
3458 pageId id; Page *kpp;
3459 pgRef loc; keyBlock *kbb;
3460 if( st->freeBlocks != NIL ) {
3462 id = st->freeBlocks;
3463 loc.id = id; loc.offset = 0;
3464 if_err( db->addrWrite_(loc,kbb) );
3465 st->freeBlocks = kbb->right_link();
3466 Page &kpg = *db->get_page(id);
3467 if( kpg->allocated != st->blockSize ) {
3468 db->pageDealloc(kpg);
3469 kpg->allocated = st->blockSize;
3470 if_err( db->addrWrite_(loc, kbb) );
3475 id = db->new_page();
3477 loc.id = id; loc.offset = 0;
3478 Page &kpg = *db->get_page(id);
3479 kpg->allocated = st->blockSize;
3480 if_err( db->addrWrite_(loc, kbb) );
3483 kbb->magic = page_magic;
3485 kbb->type = pg_index + st->idx;
3488 kpg->type = kbb->type;
3494 /*** int Db::blockFree(pageId pid)
3496 * Purpose: delayed release database memory
3499 * pid int input Page
3501 * returns zero on success, error code on failure
3505 blockFree(pageId pid)
3507 locked by(db->db_info->blkAlLk);
3509 pageId id = st->freeBlocks; pgRef loc;
3510 loc.id = NIL; loc.offset = 0;
3513 while( id >= 0 && id < pid ) {
3515 if_err( db->addrRead_(loc,kbb) );
3516 id = kbb->right_link();
3520 if_err( db->addrWrite_(loc,kbb) );
3521 kbb->right_link(pid);
3524 st->freeBlocks = pid;
3526 if_err( db->addrWrite_(loc,kbb) );
3527 kbb->right_link(id);
3528 Page *kpp = db->get_page(pid);
3534 blockRelease(pageId pid)
3536 Page *pp = db->get_page(pid);
3537 return pp->release();
3540 /*** int Db::deleteFreeBlock()
3542 * Purpose: release database memory/storage
3544 * returns zero on success, error code on failure
3551 while( (id=st->freeBlocks) != NIL ) {
3552 keyBlock *kbb; pgRef loc;
3553 loc.id = id; loc.offset = 0;
3554 if_err( db->addrRead_(loc,kbb) );
3555 st->freeBlocks = kbb->right_link();
3556 Page &pg = *db->get_Page(id);
3557 if_err( db->storeFree(pg->wr_allocated, pg->wr_io_addr) );
3558 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
3559 if_err( db->storeFree(pg->io_allocated, pg->io_addr) );
3560 pg->io_allocated = 0; pg->io_addr = NIL;
3566 /*** int Db::storeInsert(int sz, ioAddr io_addr)
3568 * insert a storage record into the storage heap
3572 * io_addr ioAddr blob addr
3574 * returns zero on success, error code on failure
3578 storeInsert(long sz, ioAddr io_addr)
3580 //dmsg(-1, "storeInsert %08lx/%012lx\n",sz,io_addr);
3581 freeStoreRecord free;
3582 free.size = sz; free.io_addr = io_addr;
3583 if_err( freeStoreIndex->Insert(&free,0) );
3585 addrStoreRecord addr;
3586 addr.io_addr = io_addr; addr.size = sz;
3587 if_err( addrStoreIndex->Insert(&addr,0) );
3592 /*** int Db::storeDelete(int sz, ioAddr io_addr)
3594 * delete a storage record from the storage heap
3598 * io_addr ioAddr blob addr
3600 * returns zero on success, error code on failure
3604 storeDelete(long sz, ioAddr io_addr)
3606 //dmsg(-1, "storeDelete %08lx/%012lx\n",sz,io_addr);
3607 freeStoreRecord free;
3608 free.size = sz; free.io_addr = io_addr;
3609 if_err( freeStoreIndex->Delete(&free) );
3611 addrStoreRecord addr;
3612 addr.io_addr = io_addr; addr.size = sz;
3613 if_err( addrStoreIndex->Delete(&addr) );
3618 /*** int Db::storeGet(int &size, ioAddr &io_addr)
3620 * allocate storage record from the storage heap
3622 * size int input/output requested/allocated blob size
3623 * io_addr ioAddr output blob addr
3625 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3629 storeGet(int &size, ioAddr &io_addr)
3631 freeStoreRecord look, find;
3632 look.size = size; look.io_addr = 0;
3633 int status = freeStoreIndex->Locate(keyGE, &look,0, &find,0);
3634 if( status == errNotFound ) return 1;
3636 if_err( storeDelete(find.size,find.io_addr) );
3637 io_addr = find.io_addr;
3638 long sz = find.size - look.size;
3639 if( sz >= min_heap_allocation ) {
3640 /* put back extra */
3641 find.size = sz; find.io_addr += look.size;
3642 if_err( storeInsert(find.size,find.io_addr) );
3645 look.size = find.size;
3650 /*** int Db::storeNew(int &size, ioAddr &io_addr)
3652 * allocate storage by extending file
3654 * size int input/output requested/allocated blob size
3655 * io_addr ioAddr output blob addr
3657 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3661 storeNew(int &size, ioAddr &io_addr)
3663 if_err( seek_data(root_info->file_size) );
3664 io_addr = root_info->file_size;
3665 size = storeBlocks(size) * storeBlockSize;
3666 /* make sure file storage exists */
3667 if_err( write_padding(root_info->file_size + size) );
3671 /*** int Db::storeAllocate(int &size, ioAddr &io_addr)
3673 * find persistent free storage. existing/new
3674 * does not allocate virtual memory storage
3677 * size int input/output requested/allocated size
3678 * io_addr ioAddr & output returned storage address
3680 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3684 storeAllocate(int &size, ioAddr &io_addr)
3686 int status = storeGet(size, io_addr);
3688 status = storeNew(size, io_addr);
3693 /*** int Db::storeFree(int size, ioAddr io_addr)
3695 * Immediate release of store heap
3697 * size int input blob size
3698 * io_addr ioAddr input blob addr
3700 * returns zero on success, error code on failure
3704 storeFree(int size, ioAddr io_addr)
3706 if( io_addr < 0 || size == 0 ) return 0;
3707 /* get prev, next addrStore heap items near this io_addr */
3708 addrStoreRecord addr, prev, next;
3709 addr.io_addr = io_addr; addr.size = size;
3710 int status = addrStoreIndex->Locate(keyLT,&addr,0, &prev,0);
3711 if( status == errNotFound ) {
3713 status = addrStoreIndex->Locate(keyGT,&addr,0, &next,0);
3716 status = addrStoreIndex->Next(&next,0);
3717 if( status == errNotFound ) {
3722 /* merge with prev if possible */
3723 if( prev.io_addr >= 0 && prev.io_addr + prev.size == addr.io_addr ) {
3724 if_err( storeDelete(prev.size,prev.io_addr) );
3725 addr.io_addr = prev.io_addr;
3726 addr.size += prev.size;
3728 /* merge with next if possible */
3729 if( next.io_addr >= 0 && addr.io_addr + addr.size == next.io_addr ) {
3730 if_err( storeDelete(next.size,next.io_addr) );
3731 addr.size += next.size;
3734 return storeInsert(addr.size,addr.io_addr);
3737 /*** int Db::pageLoad(pageId id)
3742 * id pageId pageTable element to load
3747 pageLoad(pageId id, Page &pg)
3750 uint8_t *bp = (uint8_t*)pg.addr;
3751 int rd = pg->chk_flags(fl_rd);
3752 int shmid = pg->shmid, used = pg->used;
3754 if( no_shm || shmid < 0 ) {
3755 bp = new_uint8_t(pg->allocated, shmid, id);
3756 if( used ) rd = fl_rd;
3759 bp = get_uint8_t(shmid, id);
3762 if( !bp ) Err(errNoMemory);
3764 if( likely(rd && used > 0) ) {
3765 if_err( pageRead(pg->io_addr, bp, used) );
3767 pg.addr = (pagePrefix*)bp;
3769 pg->clr_flags(fl_rd);
3773 /*** int Db::pageRead(ioAddr io_adr, uint8_t *bp, int len)
3775 * read data from the database file
3778 * pg Page input Page to read
3783 pageRead(ioAddr io_adr, uint8_t *bp, int len)
3785 //locked by(db_info->pgLdLk);
3786 //if_err( seek_data(io_adr) );
3787 //if_err( read_data((char*)bp, len) );
3788 long sz = pread(fd, bp, len, io_adr);
3789 if( len != sz ) Err(errIoRead);
3790 file_position = io_adr+len;
3791 pagePrefix *bpp = (pagePrefix *)bp;
3792 if( bpp->magic != page_magic ) Err(errBadMagic);
3796 /*** int Db::pageWrite(Page *pp)
3798 * writes data to the database file
3801 * pg Page input Page to write
3808 pagePrefix *bpp = pg.addr;
3809 // when io is block aligned and not disk block sized, but
3810 // the allocated areas exceed disk block size, increase
3811 // write to whole blocks to prevent read/modify/write.
3812 int len = bpp->used = pg->used;
3813 unsigned int blen = (len+0xfff) & ~0xfff;
3814 if( blen > pg->allocated ) blen = pg->allocated;
3815 if_err( seek_data(pg->wr_io_addr) );
3816 if_err( write_data((char *)bpp, blen) );
3817 pg->trans_id = active_transaction;
3821 // deallocate page data buffer
3822 // mode<0: delete without shm deallocate
3823 // mode=0: delete and deallocate written page
3824 // mode>0: delete and deallocate page (default)
3827 pageDealloc(Page &pg, int mode)
3829 uint8_t *bp = (uint8_t *)pg.addr;
3830 //int pg=page_table_sz; while( --pg>=0 && pp!=pageTable[pg] );
3831 if( del_uint8_t(bp, pg.shm_id /*, pg*/) <= 1 ||
3832 mode > 0 || (!mode && pg->chk_flags(fl_wr)) )
3838 int Db::AllocCache::
3842 if_ret( db->objectHeapInsert(avail,loc.id,loc.offset) );
3848 int Db::AllocCache::
3849 Get(Db *db,int &size, pgRef &ref)
3852 int isz = avail - size;
3854 unsigned int sz = isz;
3855 if( sz < min_heap_allocation ) {
3858 ref = loc; avail = sz;
3859 if( sz == 0 ) loc.id = NIL;
3860 sz = (loc.offset += size);
3861 Page &pg = *db->get_page(ref.id);
3862 if( pg->used < sz ) pg->used = sz;
3865 if_ret( cacheFlush(db) );
3870 int Db::AllocCache::
3871 Load(Db *db, pageId id, int ofs, int sz)
3875 if_ret( db->objectHeapInsert(sz,id,ofs) );
3884 int Db::cache_all_flush()
3886 if_err( cacheFlush() );
3887 Entity e(this); EntityLoc &ent = e.ent; int ret;
3888 if( !(ret=entityIdIndex->First(0,&ent.obj)) ) do {
3889 if_err( ent.cacheFlush() );
3890 } while( !(ret=entityIdIndex->Next(0,&ent.obj)) );
3891 if( ret == errNotFound ) ret = 0;
3897 allocate(int typ, int size, pgRef &loc, AllocCache &cache)
3899 locked by(db_info->objAlLk);
3901 int sz = size + sizeof(allocPrefix);
3902 // granularity is sizeof pagePrefix
3903 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3904 if_err( objectAllocate(typ, sz, loc, cache) );
3906 if_err( addrWrite_(loc,mp) );
3907 mp->magic = entity_magic;
3914 deallocate(pgRef &loc, AllocCache &cache)
3916 locked by(db_info->objAlLk);
3917 cache.cacheFlush(this);
3918 if( loc.id < 0 ) return 0;
3919 if_fail( objectFree(loc) );
3920 loc.id = NIL; loc.offset = 0;
3925 reallocate(int size, pgRef &loc, AllocCache &cache)
3927 int sz = size + sizeof(allocPrefix);
3928 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3929 int typ = 0, msz = 0;
3932 if_err( addrRead_(loc,mp) );
3938 ref.id = NIL; ref.offset = 0;
3940 if_err( allocate(typ, size, ref, cache) );
3941 if( (msz-=sizeof(allocPrefix)) > 0 ) {
3942 char *bp = 0, *cp = 0;
3943 if_err( addrRead(loc,bp) );
3944 if_err( addrWrite(ref,cp) );
3945 if( msz > size ) msz = size;
3949 if_err( deallocate(loc, cache) );
3957 int Db::pack::aligned = 0;
3959 void Db::pack::put(uint64_t v, int n)
3961 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
3964 int k = 8-n-(idx&7), m = (1<<n)-1;
3965 bits[i] = (bits[i] & ~(m<<k)) | (v<<k);
3969 case 8: *((uint8_t *)&bits[i]) = v; break;
3970 case 16: *((uint16_t*)&bits[i]) = htobe16(v); break;
3971 case 32: *((uint32_t*)&bits[i]) = htobe32(v); break;
3972 case 64: *((uint64_t*)&bits[i]) = htobe64(v); break;
3979 if( !aligned && n <= 32 ) {
3980 int i = idx/32, k = 64-n-idx%32;
3981 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
3982 *bp = htobe64(((be64toh(*bp) & ~(m<<k)) | (v<<k)));
3988 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
3989 uint64_t *bp = (uint64_t*)&bits[8*i];
3990 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1; v &= m;
3991 b = l>=0 ? (b & ~(m<<l)) | (v<<l) : (b & ~(m>>-l)) | (v>>-l);
3999 uint64_t Db::pack::get(int n)
4002 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
4005 int k = 8-n-(idx&7), m = (1<<n)-1;
4006 v = (bits[i]>>k) & m;
4010 case 8: v = *((uint8_t *)&bits[i]); break;
4011 case 16: v = be16toh(*(uint16_t*)&bits[i]); break;
4012 case 32: v = be32toh(*(uint32_t*)&bits[i]); break;
4013 case 64: v = be64toh(*(uint64_t*)&bits[i]); break;
4020 if( !aligned && n <= 32 ) {
4021 int i = idx/32, k = 64-n-idx%32;
4022 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
4023 v = (be64toh(*bp) >> k) & m;
4029 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
4030 uint64_t *bp = (uint64_t*)&bits[8*i];
4031 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1;
4032 uint64_t vv = l>=0 ? (b>>l) & m : b & (m>>-l);
4041 int64_t Db::mediaKey::bit_size(int len, int w)
4044 for( int i=0, m=len; m>1; ++i ) {
4046 int b = m * (i+w+1);
4047 b = (b+pack::alignBits-1) & ~(pack::alignBits-1);
4053 int64_t Db::mediaKey::byte_size(int len, int w)
4055 int64_t sz = bit_size(len, w);
4056 sz = (sz+(8*sizeof(uint64_t)-1)) & ~(8*sizeof(uint64_t)-1);
4057 return sz/8 + 2*sizeof(int)+sizeof(int64_t);
4060 int64_t Db::mediaKey::count1(uint8_t *kp)
4063 int w = pk.iget(), len = pk.iget();
4064 pk.seek(pk.pos()+sizeof(int64_t)*8);
4065 int k = high_bit_no(len-1);
4067 int64_t z = (uint64_t)1<<sz++;
4068 return pk.get(sz) - z;
4072 mediaKey(uint8_t *kp, uint8_t *bp, int w, int len)
4075 pk.iput(this->w = w);
4076 pk.iput(this->len = len);
4080 pk.lput(this->cnt = pb.get(w));
4084 // allocate memory, every level length m requires (m+1)/2 differences
4085 // need an extra one when length is power of 2
4086 int k = high_bit_no(len-1) + 1;
4087 struct { int64_t cnt, sz, *wgt; } lt[k+1], rt[k+1];
4088 for( int i=0, m=len; i<=k; ++i ) {
4090 lt[i].wgt = new int64_t[m];
4091 rt[i].wgt = new int64_t[m];
4092 lt[i].sz = rt[i].sz = 0;
4093 lt[i].cnt = rt[i].cnt = 0;
4097 for( int i=0,l=0; ++i<=len; l=i ) {
4099 int m = i&1 ? 0 : high_bit_no(i ^ l); // highest flipped bit
4100 rt[m].cnt = n; // 0->1, begin right
4101 for( int j=0; j<m; ++j ) {
4102 rt[j].wgt[rt[j].sz++] = n-rt[j].cnt;// 1->0, end right
4103 lt[j].cnt = n; // 1->0, begin left
4105 lt[m].wgt[lt[m].sz++] = n-lt[m].cnt; // 0->1, end left
4108 for( int i=0, m=len; m>1; ++i ) {
4110 if( lt[i].sz < m ) { // finish left
4111 lt[i].wgt[lt[i].sz++] = n-lt[i].cnt;
4114 if( lt[i].sz != rt[i].sz ) // finish right
4115 rt[i].wgt[rt[i].sz++] = n-rt[i].cnt;
4120 for( int i=k; --i>=0; ) {
4121 n = (uint64_t)1<<(i+w); // offset for signed difference
4123 for( int j=0; j<sz; ++j ) {
4124 uint64_t v = (lt[i].wgt[j]-rt[i].wgt[j]) + n;
4125 pk.put(v, i+w+1); // output pair differences
4127 if( (n=pk.pos()%pack::alignBits) != 0 ) // align next level
4128 pk.put(0, pack::alignBits-n);
4131 for( int i=0; i<=k; ++i ) {
4132 delete [] lt[i].wgt;
4133 delete [] rt[i].wgt;
4144 mediaLoad(uint8_t *bp)
4147 w = pb.iget(); len = pb.iget(); cnt = pb.lget();
4149 psz = high_bit_no(len-1)+1;
4150 if( psz < 1 ) psz = 1;
4159 void Db::mediaLoad::
4160 get_fields(int n, int k)
4164 dp[k] = dat; dat += m;
4167 int64_t *ap = dp[k];
4169 int64_t z = (uint64_t)1<<sz++;
4171 int64_t *avp = dp[k-1];
4172 for( int j=n/2; --j>=0; ) {
4173 int64_t av = pb.get(sz)-z, a = *ap++;
4174 *avp++ = (a+av)/2; *avp++ = (a-av)/2;
4177 int64_t av = pb.get(sz)-z, a = *ap;
4182 int64_t *ap = dp[0];
4183 for( int j=n/2; --j>=0; ) {
4184 int64_t av = pb.get(sz)-z, a = *ap++;
4185 pk.putc((a+av)/2, w); pk.putc((a-av)/2, w);
4188 int64_t av = pb.get(sz)-z, a = *ap;
4189 pk.putc((a+av)/2, w);
4195 void Db::mediaLoad::
4198 pb.seek(spos); pk.init(kp);
4199 pk.set_max((1<<w)-1);
4200 int64_t d[dsz]; dat = d;
4201 int64_t *p[psz]; dp = p;
4203 for( int i=psz-1; --i>=0; ) dp[i] = 0;
4212 mediaCmpr(uint8_t *bp)
4214 this->err = 0; this->lmt = ~0;
4215 pb.init(bp); w = pb.iget(); len = pb.iget();
4217 psz = high_bit_no(len-1)+1;
4218 if( psz < 1 ) psz = 1;
4227 uint64_t Db::mediaCmpr::
4228 chk_fields(int n, int k)
4232 adp[k] = adat; adat += m;
4233 bdp[k] = bdat; bdat += m;
4234 if( chk_fields(m, k+1) > lmt ) return err;
4236 int64_t *ap = adp[k], *bp = bdp[k];
4237 //uint64_t serr = 0;
4238 err = 0; int sz = k+w;
4239 int64_t z = (uint64_t)1<<sz++;
4241 int64_t *avp = adp[k-1], *bvp = bdp[k-1];
4242 for( int j=n/2; --j>=0; ) {
4243 int64_t a = *ap++, b = *bp++;
4244 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4245 int64_t alt = (a+av)/2, art = (a-av)/2;
4246 int64_t blt = (b+bv)/2, brt = (b-bv)/2;
4247 //serr += sqr(alt-blt) + sqr(art-brt);
4248 *avp++ = alt; *avp++ = art;
4249 *bvp++ = blt; *bvp++ = brt;
4250 err += labs(alt-blt) + labs(art-brt);
4253 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4254 int64_t a = *ap, b = *bp;
4255 int64_t alt = (a+av)/2, blt = (b+bv)/2;
4256 //serr += sqr(alt-blt);
4257 *avp++ = alt; *bvp++ = blt;
4258 err += labs(alt-blt);
4262 int64_t *ap = adp[0], *bp = bdp[0];
4263 for( int j=n/2; --j>=0; ) {
4264 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4265 int64_t a = *ap++, b = *bp++;
4266 int64_t v = a-b, dv = av-bv;
4267 //serr += sqr(v) + sqr(dv);
4268 err += labs(v+dv)/2 + labs(v-dv)/2;
4272 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4273 int64_t a = *ap, b = *bp;
4274 int64_t v = a-b, dv = av-bv, d = v + dv;
4279 pb.align(); pk.align();
4280 //printf("n=%d,k=%d,lerr=%lu,err=%lu\n",n,k,lerr,(int64_t)sqrt((double)serr));
4284 uint64_t Db::mediaCmpr::
4285 chk(uint8_t *kp, uint64_t lmt)
4287 pb.seek(spos); pk.init(kp); err = 0;
4288 if( pk.iget() != w || len != pk.iget() ) return ~0;
4289 acnt = pb.lget(); bcnt = pk.lget();
4290 //if( (err=sqr(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4291 if( (err=labs(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4292 int64_t a[dsz], b[dsz]; adat = a; bdat = b;
4293 int64_t *ap[psz], *bp[psz]; adp = ap; bdp = bp;
4294 adp[psz-1] = &acnt; bdp[psz-1] = &bcnt;
4295 for( int i=psz-1; --i>=0; ) adp[i] = bdp[i] = 0;
4296 return len > 1 ? chk_fields(len, 0) : err;
4300 cmpr(uint8_t *kp, uint64_t lmt)
4302 pb.seek(spos); pk.init(kp);
4303 if( pk.iget() != w || len != pk.iget() ) return ~0;
4304 acnt = pb.lget(); bcnt = pk.lget();
4305 if( acnt < bcnt ) return -1;
4306 if( acnt > bcnt ) return 1;
4307 if( len == 1 ) return 0;
4308 int bsz = Db::mediaKey::bit_size(len, w);
4309 int i = bsz / (8*sizeof(uint64_t));
4310 uint64_t *ap = (uint64_t *)pb.addr(), *bp = (uint64_t *)pk.addr();
4312 if( *ap != *bp ) return be64toh(*ap)<be64toh(*bp) ? -1 : 1;
4315 if( (i=bsz % (8*sizeof(uint64_t))) > 0 ) {
4316 int l = (8*sizeof(uint64_t)) - i;
4317 uint64_t av = be64toh(*ap)>>l, bv = be64toh(*bp)>>l;
4318 if( av != bv ) return av<bv ? -1 : 1;
4327 start_transaction(int undo_save)
4329 //traceback(" start_transaction %d\n",my_pid);
4331 // activate written pages
4332 for( id=0; id<root_info->pageTableUsed; ++id ) {
4333 Page &pg = *get_Page(id);
4334 if( !pg.addr && pg->used ) pg->set_flags(fl_rd);
4335 if( !pg->chk_flags(fl_new) ) continue;
4336 int io_allocated = pg->io_allocated;
4337 pg->io_allocated = pg->wr_allocated;
4338 pg->wr_allocated = io_allocated;
4339 ioAddr io_addr = pg->io_addr;
4340 pg->io_addr = pg->wr_io_addr;
4341 pg->wr_io_addr = io_addr;
4343 // release inactivate page storage
4344 for( id=0; id<root_info->pageTableUsed; ++id ) {
4345 Page &pg = *get_Page(id);
4346 if( !pg->chk_flags(fl_new) ) continue;
4347 pg->clr_flags(fl_new);
4348 if_err( storeFree(pg->wr_allocated, pg->wr_io_addr) );
4349 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
4350 if( pg->used ) continue;
4351 if_err( storeFree(pg->io_allocated, pg->io_addr) );
4352 pg->io_allocated = 0; pg->io_addr = NIL;
4357 addrStoreRecord addr;
4358 int status = addrStoreIndex->Last(&addr,0);
4360 if( addr.io_addr+addr.size == root_info->file_size ) {
4361 if_err( storeDelete(addr.size, addr.io_addr) );
4362 ioAddr fsz = storeBlocks(addr.io_addr) * storeBlockSize;
4363 if( root_info->file_size > fsz ) {
4364 status = ftruncate(fd, fsz);
4365 if( status ) Err(errIoWrite);
4366 addr.size = fsz - addr.io_addr;
4368 if_err( storeInsert(addr.size, addr.io_addr) );
4369 root_info->file_size = fsz;
4374 if( status != errNotFound ) Err(status);
4376 for( int idx=0; idx<root_info->indeciesUsed; ++idx )
4377 if( indecies[idx] ) indecies[idx]->deleteFreeBlocks();
4379 // move root pages lower if possible
4380 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4381 IndexBase *bip = indecies[idx];
4382 if( !bip ) continue;
4383 pageId r = bip->st->rootPageId;
4384 if( r < 0 ) continue;
4385 if( r != bip->st->rightHandSide ) continue;
4386 bip->st->rootPageId = bip->st->rightHandSide = lower_page(r);
4389 // truncate pageTable
4390 for( id=root_info->pageTableUsed; id>0; --id ) {
4391 Page &pg = *get_Page(id-1);
4392 if( pg->used ) break;
4394 page_table_sz = root_info->pageTableUsed = id;
4396 // remove released pages from free list
4398 id = root_info->freePages;
4400 Page &pg = *get_Page(id);
4401 pageId next_id = pg->link;
4402 if( id < root_info->pageTableUsed ) {
4403 if( prev ) prev->st->link = id;
4404 else root_info->freePages = id;
4411 if( prev ) prev->st->link = NIL;
4412 else root_info->freePages = NIL;
4414 if( pageTableHunks(root_info->pageTableUsed)*pageTableHunkSize < pageTableAllocated )
4415 alloc_pageTable(root_info->pageTableUsed);
4417 if( undo_save ) undata.save(this);
4418 active_transaction = ++root_info->transaction_id;
4424 #define bfr_sz 0x40000
4429 bfr = inp = new char[bfr_sz];
4430 memset(bfr, 0, bfr_sz);
4438 int ret = flush_bfr();
4440 bfr = inp = lmt = 0;
4449 return n > 0 ? write_data(bfr, n) : 0;
4453 write_bfr(char *dp, int sz)
4457 if( n > sz ) n = sz;
4459 if( (inp+=n) >= lmt && flush_bfr() < 0 )
4471 //traceback(" commit %d\n",my_pid);
4472 int v = attach_wr();
4473 int ret = icommit(force);
4474 if( !ret && force < 0 ) ret = icommit(1);
4476 if( v ) detach_rw();
4484 //traceback(" icommit %d\n",my_pid);
4491 // check for written pages
4492 for( id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
4493 Page &pg = *get_Page(id);
4494 if( pg->chk_flags(fl_wr) ) break;
4496 // if no pages are written
4497 if( n < 0 ) return 0;
4501 // release last root info, allows storage to move down
4502 if_err( storeFree(root_info->last_info_size, root_info->last_info_addr) );
4503 root_info->last_info_addr = NIL; root_info->last_info_size = 0;
4506 ioAddr ri_addr = root_info->last_info_addr;
4507 int ri_size = root_info->last_info_size;
4508 int k = root_info_extra_pages;
4511 // allocate new storage
4514 for( id=0; id<root_info->pageTableUsed; ++id ) {
4515 Page &pg = *get_Page(id);
4516 if( !pg->chk_flags(fl_wr) ) continue;
4517 if( pg->chk_flags(fl_new) ) continue;
4518 pg->set_flags(fl_new);
4520 pg->wr_allocated = pg->allocated;
4521 if_err( storeAllocate(pg->wr_allocated, pg->wr_io_addr) );
4526 // compute rootInfo size
4527 int rsz = writeRootInfo(&Db::size_data);
4528 int rsz0 = entityPages(rsz) * entityPageSize;
4529 int rsz1 = rsz0 + k * entityPageSize;
4530 // retry while no storage, too little, or too big
4531 if( !(ri_addr < 0 || ri_size < rsz0 || ri_size > rsz1) ) break;
4532 // delete/allocate can change pageTable;
4533 if_err( storeFree(ri_size, ri_addr) );
4534 ri_addr = NIL; ri_size = 0;
4535 rsz1 = rsz0 + k/2 * entityPageSize;
4536 if_err( storeAllocate(rsz1, ri_addr) );
4537 ri_size = rsz1; k += 2;
4540 // delete free index pages
4541 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4542 IndexBase *ib = indecies[idx];
4544 while( (id=ib->st->freeBlocks) != NIL ) {
4545 keyBlock *kbb; pgRef loc;
4546 loc.id = id; loc.offset = 0;
4547 if_err( addrWrite_(loc,kbb) );
4548 ib->st->freeBlocks = kbb->right_link();
4549 Page &pg = *get_Page(id);
4550 pg->used = 0; pg->set_flags(fl_new);
4554 root_info->last_info_addr = root_info->root_info_addr;
4555 root_info->last_info_size = root_info->root_info_size;
4556 root_info->root_info_addr = ri_addr;
4557 root_info->root_info_size = ri_size;
4559 // write page storage
4560 for( id=0; id<root_info->pageTableUsed; ++id ) {
4561 Page &pg = *get_Page(id);
4562 if( pg->chk_flags(fl_wr) ) {
4563 pg->clr_flags(fl_wr);
4565 if_err( pageWrite(pg) );
4569 pg->set_flags(fl_rd);
4573 // write rootInfo storage
4574 if_err( seek_data(ri_addr) );
4576 if_err( open_bfr() );
4577 if_err( writeRootInfo(&Db::write_bfr) );
4578 if_err( close_bfr() );
4580 if_err( writeRootInfo(&Db::write_data) );
4582 if_err( write_zeros(ri_addr+ri_size) );
4584 // update rootInfo pointer
4585 if_err( seek_data(root_info_offset) );
4586 if_err( write_data((char*)&ri_addr,sizeof(ri_addr)) );
4588 // commit is finished
4589 return start_transaction();
4595 // evict unwritten page storage
4596 for( int id=0; id<root_info->pageTableUsed; ++id ) {
4597 Page &pg = *get_page(id);
4598 if( !pg.addr ) continue;
4599 if( pg->chk_flags(fl_wr) ) continue;
4601 pg->set_flags(fl_rd);
4607 seek_data(ioAddr io_addr)
4609 if( io_addr < 0 ) Err(errIoSeek);
4610 if( file_position != io_addr ) {
4611 long offset = lseek(fd,io_addr,SEEK_SET);
4612 if( offset != io_addr ) Err(errIoSeek);
4613 file_position = io_addr;
4619 size_data(char *dp, int sz)
4621 return sz > 0 ? sz : 0;
4625 read_data(char *dp, int sz)
4628 long len = read(fd,dp,sz);
4629 if( len != sz ) Err(errIoRead);
4630 file_position += sz;
4636 write_data(char *dp, int sz)
4639 long len = write(fd,dp,sz);
4640 if( len != sz ) Err(errIoWrite);
4641 file_position += sz;
4642 if( file_position > root_info->file_size )
4643 root_info->file_size = file_position;
4649 write_zeros(ioAddr io_addr)
4651 ioAddr len = io_addr - file_position;
4652 if( len < 0 ) Err(errIoWrite);
4653 int sz = len > entityPageSize ? entityPageSize : len;
4654 char bfr[sz]; memset(&bfr[0],0,sz);
4656 sz = len > entityPageSize ? entityPageSize : len;
4657 if_err( write_data(&bfr[0], sz) );
4658 len = io_addr - file_position;
4664 write_padding(ioAddr io_addr)
4666 if( root_info->file_size > io_addr ) Err(errIoWrite);
4668 if_err( write_zeros(io_addr) );
4670 int status = ftruncate(fd, io_addr);
4671 if( status ) Err(errIoSeek);
4672 root_info->file_size = io_addr;
4677 #define Read(n) do { \
4678 if( (count+=sizeof(n)) > root_info->root_info_size ) Err(errCorrupt); \
4679 if_err( (this->*fn)((char*)&(n),sizeof(n)) ); \
4683 readRootInfo(int(Db::*fn)(char *dp,int sz))
4689 root_info->root_info_size = sizeof(RootInfo);
4691 if( root_info->root_magic != root_magic ) Err(errBadMagic);
4694 sz = indeciesHunks(root_info->indeciesUsed) * indexTableHunkSize;
4695 indecies = new IndexBase*[sz];
4696 if( !indecies ) Err(errNoMemory);
4697 index_info = (IndexInfo *)new_uint8_t(sz*sizeof(IndexInfo),id);
4698 if( !index_info ) Err(errNoMemory);
4699 db_info->index_info_id = index_info_id = id;
4700 indeciesAllocated = sz;
4702 indecies_sz = root_info->indeciesUsed;
4703 for( int idx=0; idx<indecies_sz; ++idx ) {
4704 IndexBaseInfo *b = &index_info[idx].base;
4705 Read(*(IndexTypeInfo*)b);
4706 if( b->magic != idx_magic ) Err(errBadMagic);
4707 if( b->type != idxNil )
4708 Read(*(IndexRecdInfo*)b);
4711 IndexBinaryInfo *bi = &index_info[idx].bin;
4713 if_err( new_index(indecies[idx], b, bi) );
4716 IndexStringInfo *si = &index_info[idx].str;
4718 if_err( new_index(indecies[idx], b, si) );
4729 page_table_sz = root_info->pageTableUsed;
4730 sz = pageTableHunks(page_table_sz) * pageTableHunkSize;
4731 pageTable = (Page **)new Page*[sz];
4732 if( !pageTable ) Err(errNoMemory);
4733 pageTableAllocated = sz;
4734 sz = pageTableAllocated*sizeof(PageStorage);
4735 page_info = (PageStorage *)new_uint8_t(sz, id);
4736 if( !page_info ) Err(errNoMemory);
4737 db_info->page_info_id = page_info_id = id;
4738 for( id=0; id<root_info->pageTableUsed; ++id ) {
4739 Read(page_info[id]);
4740 Page *pp = new Page(page_info[id]);
4741 if( !pp ) Err(errNoMemory);
4744 if( !pg->chk_flags(fl_free) ) pg->shmid = -1;
4745 if( pg->used ) pg->set_flags(fl_rd);
4747 while( id < pageTableAllocated )
4755 #define Write(n) do { \
4756 if_err( (sz=(this->*fn)((char*)&(n),sizeof(n))) ); \
4761 writeRootInfo(int(Db::*fn)(char *data, int sz))
4770 for( id=0; id<root_info->indeciesUsed; ++id ) {
4771 IndexBase *ib = indecies[id];
4773 switch( ib->st->type ) {
4775 IndexBinary *b = (IndexBinary*)ib;
4780 IndexString *b = (IndexString*)ib;
4787 IndexBaseType b(idxNil);
4793 for( id=0; id<root_info->pageTableUsed; ++id ) {
4794 Page *pp = get_Page(id);
4806 int n = db->indeciesAllocated - usrIdx + 1;
4807 if( cfnAllocated != n ) {
4809 cfn = new cfnData[n];
4812 cfnData *cdp = &cfn[0];
4813 cfnUsed = db->root_info->indeciesUsed;
4814 for( int idx=usrIdx; idx<cfnUsed; ++idx,++cdp ) {
4815 IndexBase *ib = db->indecies[idx];
4817 switch( ib->st->type ) {
4819 IndexBinary *bidx = (IndexBinary *)ib;
4820 cdp->cmprId = bidx->bst->cmprId;
4821 cdp->compare = bidx->compare;
4834 cfnData *cdp = &cfn[0];
4835 int n = db->root_info->indeciesUsed<cfnUsed ? db->root_info->indeciesUsed : cfnUsed;
4836 for( int idx=usrIdx; idx<n; ++idx,++cdp ) {
4837 IndexBase *ib = db->indecies[idx];
4839 switch( ib->st->type ) {
4841 IndexBinary *bidx = (IndexBinary *)ib;
4842 int cid = cdp->cmprId;
4843 bidx->bst->cmprId = cid;
4844 bidx->compare = cid>0 ? db->cmprFns[cid] : cdp->compare;
4857 int v = attach_wr();
4860 if( v ) detach_rw();
4870 undata.restore(this);
4877 DbInfo(int pid, int key, int id)
4892 int id = -1, flk = 0, ret = 0;
4895 if( !shm_init ) init_shm();
4896 get_mem = &get_shm8_t;
4897 new_mem = &new_shm8_t;
4898 del_mem = &del_shm8_t;
4899 if( flock(fd, LOCK_EX) ) ret = errInvalid;
4902 id = shmget(key, sizeof(DbInfo), IPC_CREAT+IPC_EXCL+0666);
4903 if( id < 0 ) ret = errno==EEXIST ? errDuplicate : errNoMemory;
4906 vp = shmat(id, NULL, 0);
4907 if( vp == (void*)-1 ) ret = errNoMemory;
4911 get_mem = &get_mem8_t;
4912 new_mem = &new_mem8_t;
4913 del_mem = &del_mem8_t;
4914 vp = (void *)new uint8_t[sizeof(DbInfo)];
4915 if( !vp ) Err(errNoMemory);
4918 db_info = new(vp) DbInfo(my_pid, key, id);
4922 if( flk ) flock(fd, LOCK_UN);
4923 //traceback("new_info %s\n",ret ? "failed" : "success");
4931 if( no_shm ) Err(errInvalid);
4932 get_mem = &get_shm8_t;
4933 new_mem = &new_shm8_t;
4934 del_mem = &del_shm8_t;
4935 int id = shmget(key, sizeof(DbInfo), 0666);
4936 if( id < 0 ) Fail(errNotFound);
4937 void *vp = shmat(id, NULL, 0);
4938 if( vp == (void*)-1 ) Err(errNoMemory);
4939 if( del_uint8_t(0, id) <= 1 ) {
4940 shmctl(id, IPC_RMID, 0);
4942 //traceback("get_info failed\n");
4945 DbInfo *dip = (DbInfo *)vp;
4946 if( dip->magic != info_magic ) Err(errBadMagic);
4947 if( dip->info_key != key || dip->info_id != id ) Err(errCorrupt);
4949 //traceback("get_info success\n");
4957 db_info->index_info_id = -1;
4958 db_info->page_info_id = -1;
4959 db_info->root_info_id = -1;
4960 db_info->owner = -1;
4961 db_info->last_owner = my_pid;
4963 int flk = !flock(fd, LOCK_EX) ? 1 : 0;
4965 int ret = del_uint8_t(0, db_info->info_id);
4967 shmctl(db_info->info_id, IPC_RMID, 0);
4971 delete [] (uint8_t*)db_info;
4972 if( flk ) flock(fd, LOCK_UN);
4973 //traceback("del_info %d, refs=%d\n",my_pid,ret);
4980 open(int zfd, int zkey)
4982 //traceback("open %d\n",my_pid);
4984 if( fd >= 0 ) Err(errDuplicate);
4987 if( fd < 0 ) Err(errNotFound);
4989 if( fstat(fd,&st) < 0 ) Err(errIoStat);
4990 if( zkey >= 0 ) use_shm(1);
4992 if_err( new_info(zkey) );
5000 iopen(int undo_save)
5002 //traceback(" iopen %d\n",my_pid);
5004 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5005 int ret = !root_info ? errNoMemory : 0;
5008 db_info->root_info_id = root_info_id = info_id;
5011 if( !ret ) ret = seek_data(db_magic_offset);
5012 if( !ret ) ret = read_data((char*)&magic,sizeof(magic));
5013 if( !ret && magic != db_magic ) ret = errBadMagic;
5014 if( !ret ) ret = seek_data(root_info_offset);
5015 if( !ret ) ret = read_data((char*)&root_info->root_info_addr,sizeof(root_info->root_info_addr));
5016 if( !ret ) ret = seek_data(root_info->root_info_addr);
5017 if( !ret ) ret = readRootInfo(&Db::read_data) >= 0 ? 0 : ret;
5018 if( !ret ) ret = start_transaction(undo_save);
5020 active_transaction = root_info->transaction_id;
5029 //traceback("close %d\n",my_pid);
5030 if( !db_info || fd < 0 ) return 0;
5038 //traceback(" iclose %d\n",my_pid);
5048 //traceback(" ireopen %d\n",my_pid);
5049 Page **old_page_table = pageTable; pageTable = 0;
5050 PageStorage *old_page_info = page_info; page_info = 0;
5051 int old_page_table_sz = page_table_sz; page_table_sz = 0;
5054 for( pageId id=0; id<old_page_table_sz; ++id ) {
5055 Page *opp = old_page_table[id], &opg = *opp;
5056 if( id < page_table_sz && opg.addr && !opg->chk_flags(fl_wr | fl_rd) ) {
5057 Page &pg = *get_Page(id);
5058 if( !pg.addr && pg->shmid < 0 && opg.shm_id == opg->shmid &&
5059 pg->io_addr == opg->io_addr && pg->trans_id == opg->trans_id ) {
5060 pg.addr = opg.addr; pg->shmid = pg.shm_id = opg.shm_id;
5061 pg->clr_flags(fl_rd);
5069 delete [] old_page_table;
5070 del_uint8_t(old_page_info);
5077 //traceback(" iattach %d\n",my_pid);
5078 RootInfo *new_root_info = 0;
5079 IndexInfo *new_index_info = 0;
5080 PageStorage *new_page_info = 0;
5081 int new_indecies_alloc = 0;
5082 int new_indecies_sz = 0;
5083 IndexBase **new_indecies = 0;
5084 int new_page_table_alloc = 0;
5085 int new_page_table_sz = 0;
5086 Page **new_page_table = 0;
5089 if( !ret && root_info_id != db_info->root_info_id ) {
5090 new_root_info = (RootInfo *)get_uint8_t(db_info->root_info_id);
5091 if( !new_root_info ) ret = errCorrupt;
5094 new_root_info = root_info;
5099 new_indecies_sz = new_root_info->indeciesUsed;
5100 new_indecies_alloc = indeciesHunks(new_indecies_sz) * indexTableHunkSize;
5101 new_indecies = new IndexBase*[new_indecies_alloc];
5102 if( !new_indecies ) ret = errNoMemory;
5106 if( index_info_id != db_info->index_info_id ) {
5107 new_index_info = (IndexInfo *)get_uint8_t(db_info->index_info_id);
5108 if( !new_index_info ) ret = errCorrupt;
5111 new_index_info = index_info;
5116 for( int idx=0; !ret && idx<new_indecies_sz; ++idx ) {
5117 IndexBaseInfo *b = &new_index_info[idx].base;
5118 if( b->magic == idx_magic ) {
5119 IndexBase *ib = idx<indecies_sz ? indecies[idx] : 0;
5120 if( ib && ib->st->type != b->type ) ib = 0;
5124 new_indecies[idx] = new(ib) IndexBinary(ib,*b,new_index_info[idx].bin);
5128 ret = new_index(new_indecies[idx], b, &new_index_info[idx].bin);
5132 new_indecies[idx] = new(ib) IndexString(ib,*b,new_index_info[idx].str);
5136 ret = new_index(new_indecies[idx], b, &new_index_info[idx].str);
5139 new_indecies[idx] = 0;
5149 for( int idx=0; idx<indecies_sz; ++idx ) {
5150 if( !indecies[idx] ) continue;
5151 delete indecies[idx]; indecies[idx] = 0;
5156 new_page_table_sz = new_root_info->pageTableUsed;
5157 new_page_table_alloc = pageTableHunks(new_page_table_sz) * pageTableHunkSize;
5158 new_page_table = (Page **)new Page*[new_page_table_alloc];
5159 if( !new_page_table ) ret = errNoMemory;
5163 if( page_info_id != db_info->page_info_id ) {
5164 new_page_info = (PageStorage *)get_uint8_t(db_info->page_info_id);
5165 if( !new_page_info ) ret = errCorrupt;
5168 new_page_info = page_info;
5174 for( id=0; !ret && id<new_page_table_sz; ++id ) {
5175 Page *pp = id<page_table_sz ? pageTable[id] : 0;
5176 PageStorage *st = &new_page_info[id];
5178 pageTable[id] = 0; pp->st = st; Page &pg = *pp;
5179 if( pg->chk_flags(fl_rd | fl_free) )
5181 else if( pg->shmid >= 0 && pp->shm_id != pg->shmid ) {
5182 del_uint8_t(pp->addr);
5183 pp->addr = (pagePrefix *)get_uint8_t(pp->shm_id=pg->shmid);
5188 if( !pp ) ret = errNoMemory;
5190 new_page_table[id] = pp;
5192 while( id<page_table_sz ) del_page(id++);
5195 delete [] new_indecies;
5196 delete [] new_page_table;
5197 if( !root_info ) root_info = new_root_info;
5198 else del_uint8_t(new_root_info);
5199 if( !index_info ) index_info = new_index_info;
5200 else del_uint8_t(new_index_info);
5201 if( !page_info ) page_info = new_page_info;
5202 else del_uint8_t(new_page_info);
5208 indecies = new_indecies;
5209 indeciesAllocated = new_indecies_alloc;
5210 indecies_sz = new_indecies_sz;
5211 delete [] pageTable;
5212 pageTable = new_page_table;
5213 pageTableAllocated = new_page_table_alloc;
5214 page_table_sz = new_page_table_sz;
5215 root_info_id = db_info->root_info_id;
5216 del_uint8_t(root_info);
5217 root_info = new_root_info;
5218 index_info_id = db_info->index_info_id;
5219 del_uint8_t(index_info);
5220 index_info = new_index_info;
5221 page_info_id = db_info->page_info_id;
5222 del_uint8_t(page_info);
5223 page_info = new_page_info;
5224 active_transaction = root_info->transaction_id;
5229 attach(int zfd, int zkey, int zrw)
5231 //traceback("attach %s %d\n",zrw ? "wr" : "rd", my_pid);
5233 if_ret( get_info(zkey) );
5234 fd = zfd; key = zkey;
5237 else if( zfd != fd || zkey != key )
5241 ( root_info && active_transaction == root_info->transaction_id &&
5242 db_info->root_info_id >= 0 && root_info_id == db_info->root_info_id &&
5243 db_info->index_info_id >= 0 && index_info_id == db_info->index_info_id &&
5244 db_info->page_info_id >= 0 && page_info_id == db_info->page_info_id ) )
5246 int ret = db_info->root_info_id < 0 ? ireopen() : iattach();
5264 if( fd >= 0 ) Err(errDuplicate);
5268 if( new_info(key) ) Err(errNotFound);
5270 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5271 if( !root_info ) { Err(errNoMemory); }
5273 db_info->root_info_id = root_info_id = info_id;
5274 if_err( init_idx() );
5275 if_err( seek_data(db_magic_offset) );
5276 int magic = db_magic;
5277 if_err( write_data((char *)&magic,sizeof(magic)) );
5278 write_zeros(entityPageSize);
5283 // in-order traversal copying IndexBinary
5284 int Db::IndexBinary::
5285 keyCopy(pageId s, IndexBase *ib)
5287 IndexBinary *idx = (IndexBinary *)ib;
5289 keyBlock *sbb; Page *spp; char *sn;
5290 if_err( db->indexRead(s,0,sbb,spp,sn) );
5291 if( (r=sbb->right_link()) >= 0 ) {
5292 int lkdSz = kdSz + sizeof(pageId);
5293 int n = spp->iused() / lkdSz;
5294 for( int i=0; i<n; ++i ) {
5295 pageId l = readPageId(sn);
5296 sn += sizeof(pageId);
5297 if_ret( keyCopy(l,idx) );
5298 if_ret( idx->Insert(sn,sn+st->keySz) );
5301 if_ret( keyCopy(r,idx) );
5304 int n = spp->iused() / kdSz;
5305 for( int i=0; i<n; ++i ) {
5306 if_ret( idx->Insert(sn,sn+st->keySz) );
5313 // in-order traversal copying IndexString
5314 int Db::IndexString::
5315 keyCopy(pageId s, IndexBase *ib)
5317 IndexString *idx = (IndexString *)ib;
5318 pageId r; unsigned char lky[keysz];
5319 keyBlock *sbb; Page *spp; char *sn;
5320 if_err( db->indexRead(s,0,sbb,spp,sn) );
5321 unsigned char *lp = (unsigned char *)sn;
5322 unsigned char *rp = lp + spp->iused();
5324 if( (r=sbb->right_link()) >= 0 ) {
5326 pageId l = getPageId(lp);
5327 if_ret( keyCopy(l,idx) );
5328 char *dp = (char *)lp; lp += st->dataSz;
5329 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5330 if_ret( idx->Insert(&lky[0],dp) );
5332 if_ret( keyCopy(r,idx) );
5336 char *dp = (char *)lp; lp += st->dataSz;
5337 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5338 if_ret( idx->Insert(&lky[0],dp) );
5345 EntityObj(EntityObj &eobj, int eid)
5348 memmove(&name[0],&eobj.name[0],nmSz);
5349 recdSz = eobj.recdSz;
5351 indexs[idxId] = eobj.indexs[idxId];
5352 for( int i=idxId+1; i<nidxs; ++i )
5353 indexs[i] = eobj.indexs[i];
5359 copy(ObjectLoc &dobj)
5362 Obj *dp = dobj.addr();
5363 if( !dp ) Err(errNoMemory);
5364 allocPrefix *mp = ((allocPrefix *)dp) - 1;
5365 int sz = mp->size - sizeof(*mp);
5366 if_err( allocate(sz - dobj.entity->ent->recdSz) );
5368 if( !np ) Err(errNoMemory);
5370 // copy varObj data using ref data, if any
5371 for( varObjs vp=dobj.entity->vobjs; vp!=0; vp=vp->next ) {
5373 varObj &vd = dp->*ref;
5374 varObj &vn = np->*ref;
5376 if( (sz=vd.size()) > 0 ) {
5378 memcpy(addr(vn),dobj.addr(vd), sz);
5385 copy(Db *db, Objects objs)
5387 int id, n = db->root_info->indeciesUsed;
5388 for( id=usrIdx; id<n; ++id ) {
5389 IndexBase *ib = db->indecies[id];
5390 if( ib && ib->st->type != idxNil ) {
5392 switch( ib->st->type ) {
5393 // copy binary index
5395 IndexBinary *bidx = (IndexBinary *)ib;
5396 int kySz = bidx->st->keySz, dtSz = bidx->st->dataSz;
5397 int idx = get_index(&bidx->st->name[0]);
5399 idx = new_binary_index(&bidx->st->name[0], kySz, dtSz, bidx->compare);
5401 // ignore empty indecies
5402 if( bidx->st->rootPageId >= 0 ) {
5403 // entity id indecies are processed below
5404 if( db->entityNmIndex->Find(&ib->st->name[0],0) != 0 ) {
5405 IndexBinary *bip = (IndexBinary *)indecies[idx];
5406 // use cmprLast since index is in-order. Avoids using
5407 // user defined class key cmprs and compare functions.
5408 bip->compare = cmprLast;
5409 ret = bidx->keyCopy(bidx->st->rootPageId, indecies[idx]);
5410 bip->compare = cmprFns[bip->bst->cmprId];
5414 // copy string index
5416 IndexString *sidx = (IndexString *)ib;
5417 int dtSz = sidx->st->dataSz;
5418 int idx = get_index(&sidx->st->name[0]);
5420 idx = new_string_index(&sidx->st->name[0], dtSz);
5423 if( sidx->st->rootPageId >= 0 )
5424 ret = sidx->keyCopy(sidx->st->rootPageId, indecies[idx]);
5428 if_err( db->flush() );
5429 if_err( commit(-1) );
5434 // copy entity indecies/data
5435 IndexBinary *eidx = (IndexBinary *)db->entityIdIndex;
5436 int eid, status; pgRef loc;
5437 if( !(status=eidx->First(&eid,&loc)) ) do {
5440 Entity *ep = op->obj->entity;
5441 if( ep->ent->id == eid ) break;
5446 EntityLoc &dent = db_ent.ent;
5448 ObjectLoc objd(&db_ent), *dobj = &objd;
5449 // if old db copy fn, use ObjectLoc::copy
5450 if( op->obj->entity->db == db ) dobj = op->obj;
5451 char name[nmSz]; memset(name,0,sizeof(name));
5452 strncpy(&name[0],&dent->name[0],sizeof(name));
5454 Entity entity(this);
5455 EntityLoc &nent = entity.ent;
5457 if( entityNmIndex->Find(&name[0],&nid) != 0 ) {
5458 int nidx1 = dent->nidxs-1;
5459 int sz = sizeof(EntityObj) + sizeof(dent->indexs[0])*nidx1;
5461 if_err( allocate(dent->id, sz, nent.obj, alloc_cache) );
5462 if( !nent.addr_wr() ) Err(errNoMemory);
5464 new((EntityObj *)nent.addr())
5465 EntityObj(*(EntityObj*)dent.addr(),eid);
5466 if_err( entityIdIndex->Insert(&eid,&nent.obj) );
5467 if_err( entityNmIndex->Insert(&name[0],&eid) );
5469 else if( nid == eid )
5470 if_err( entityIdIndex->Find(&eid,&nent.obj) );
5473 ObjectLoc objn(&entity), *nobj = &objn;
5474 // if new db copy fn, use it instead of ObjectLoc::copy
5475 if( op->obj->entity->db == this ) nobj = op->obj;
5477 if( !(status = dobj->FirstId(cur)) ) do {
5479 if_err( nobj->copy(*dobj) );
5481 if_err( entity.construct_(*nobj, dobj->id()) );
5482 } while( !(status=dobj->NextId(cur)) );
5483 if( status == errNotFound ) status = 0;
5485 if_err( db->flush() );
5486 if_err( commit(-1) );
5488 } while( !(status=eidx->Next(&eid,&loc)) );
5489 if( status == errNotFound ) status = 0;
5491 if_err( db->flush() );
5492 if_err( commit(-1) );
5497 cmprFrSt(char *a, char *b)
5499 freeStoreRecord *ap = (freeStoreRecord *)a;
5500 freeStoreRecord *bp = (freeStoreRecord *)b;
5501 if( ap->size > bp->size ) return 1;
5502 if( ap->size < bp->size ) return -1;
5503 if( ap->io_addr > bp->io_addr ) return 1;
5504 if( ap->io_addr < bp->io_addr ) return -1;
5509 cmprAdSt(char *a, char *b)
5511 addrStoreRecord *ap = (addrStoreRecord *)a;
5512 addrStoreRecord *bp = (addrStoreRecord *)b;
5513 if( ap->io_addr > bp->io_addr ) return 1;
5514 if( ap->io_addr < bp->io_addr ) return -1;
5515 if( ap->size > bp->size ) return 1;
5516 if( ap->size < bp->size ) return -1;
5521 cmprFrSp(char *a, char *b)
5523 freeSpaceRecord *ap = (freeSpaceRecord *)a;
5524 freeSpaceRecord *bp = (freeSpaceRecord *)b;
5525 if( ap->size > bp->size ) return 1;
5526 if( ap->size < bp->size ) return -1;
5527 if( ap->id > bp->id ) return 1;
5528 if( ap->id < bp->id ) return -1;
5529 if( ap->offset > bp->offset ) return 1;
5530 if( ap->offset < bp->offset ) return -1;
5535 cmprAdSp(char *a, char *b)
5537 addrSpaceRecord *ap = (addrSpaceRecord *)a;
5538 addrSpaceRecord *bp = (addrSpaceRecord *)b;
5539 if( ap->id > bp->id ) return 1;
5540 if( ap->id < bp->id ) return -1;
5541 if( ap->offset > bp->offset ) return 1;
5542 if( ap->offset < bp->offset ) return -1;
5543 if( ap->size > bp->size ) return 1;
5544 if( ap->size < bp->size ) return -1;
5549 cmprOIds(char *a, char *b)
5553 if( ap->id > bp->id ) return 1;
5554 if( ap->id < bp->id ) return -1;
5559 cmprStr(char *a, char *b)
5561 return strncmp(a,b,keysz);
5565 cmprKey(char *a, char *b)
5568 return kp->cmpr(a,b);
5572 cmprLast(char *a, char *b)
5577 Db::CmprFn Db::cmprFns[] = {
5586 findCmprFn(CmprFn fn)
5589 for( i=lengthof(cmprFns); --i>0; )
5590 if( fn == cmprFns[i] ) return i;
5597 if_err( new_binary_index("freeStoreIndex", sizeof(freeStoreRecord), 0, cmprFrSt) );
5598 if_err( new_binary_index("addrStoreIndex", sizeof(addrStoreRecord), 0, cmprAdSt) );
5599 if_err( new_binary_index("freeSpaceIndex", sizeof(freeSpaceRecord), 0, cmprFrSp) );
5600 if_err( new_binary_index("addrSpaceIndex", sizeof(addrSpaceRecord), 0, cmprAdSp) );
5601 if_err( new_binary_index("entityIdIndex", sizeof(int), sizeof(pgRef), cmprOIds) );
5602 if_err( new_binary_index("entityNmIndex", sizeof(char[nmSz]), sizeof(int), cmprStr) );
5611 FILE *fp = fopen("/proc/sys/kernel/shmall","w");
5612 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5613 fp = fopen("/proc/sys/kernel/shmmax","w");
5614 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5615 fp = fopen("/proc/sys/kernel/shmmni","w");
5616 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5622 root_magic = Db::root_magic;
5623 root_info_size = sizeof(RootInfo);
5626 root_info_addr = NIL;
5627 last_info_addr = NIL;
5643 storeBlockSize = defaultStoreBlockSize;
5644 entityPageSize = defaultEntityPageSize;
5645 pageTableHunkSize = defaultPageTableHunkSize;
5646 indexTableHunkSize = defaultIndexTableHunkSize;
5648 root_info_id = -1; root_info = 0;
5649 index_info_id = -1; index_info = 0;
5650 indecies = 0; indeciesAllocated = 0; indecies_sz = 0;
5651 page_info_id = -1; page_info = 0;
5652 pageTable = 0; pageTableAllocated = 0; page_table_sz = 0;
5656 bfr = lmt = inp = 0;
5657 active_transaction = -1;
5664 for( int idx=indecies_sz; --idx>=0; ) delete indecies[idx];
5665 delete [] indecies; indecies = 0;
5667 del_uint8_t(index_info); index_info = 0;
5668 indeciesAllocated = 0; indecies_sz = 0;
5672 for( pageId id=0; id<page_table_sz; ++id ) {
5673 Page *pp = get_Page(id); pageDealloc(*pp); delete pp;
5675 delete [] pageTable; pageTable = 0;
5677 del_uint8_t(page_info); page_info = 0;
5678 pageTableAllocated = 0; page_table_sz = 0;
5681 del_uint8_t(root_info); root_info = 0;
5688 no_shm = defaultNoShm;
5689 get_mem = !no_shm ? &get_shm8_t : &get_mem8_t;
5690 new_mem = !no_shm ? &new_shm8_t : &new_mem8_t;
5691 del_mem = !no_shm ? &del_shm8_t : &del_mem8_t;
5692 root_info_id = index_info_id = page_info_id = -1;
5714 if( error() < 0 ) Fail(errPrevious); \
5715 if( r < 0 || r >= root_info->indeciesUsed || !indecies[r] ) Err(errInvalid); \
5716 return indecies[r]->fn;
5719 ins(int r, void *key, void *data)
5721 Run(r,Insert(key,data));
5725 del(int r, void *key)
5731 find(int r, void *key, void *rtnData)
5733 Run(r,Find(key,rtnData));
5737 locate(int r, int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
5739 Run(r,Locate(op,key,cmpr,rtnKey,rtnData));
5743 first(int r, void *key, void *rtnData)
5745 Run(r,First(key,rtnData));
5749 last(int r, void *key, void *rtnData)
5751 Run(r,Last(key,rtnData));
5755 next(int r, void *key, void *rtnData)
5757 Run(r,Next(key,rtnData));
5761 nextloc(int r, pgRef &loc)
5763 Run(r,NextLoc(loc));
5770 allocate(ObjectLoc &loc, int sz)
5772 if( loc.entity != this ) Err(errObjEntity);
5774 int n = ent->recdSz + sz;
5775 if_err( db->allocate(id, n, loc.obj, ent->alloc_cache) );
5776 Obj *op = loc.addr();
5778 ent._wr(); loc._wr();
5780 op->id = ent->maxId;
5786 construct_(ObjectLoc &loc, int id)
5788 int idx = ent->indexs[idxId];
5789 loc._wr(); loc->id = id;
5790 if_err( db->indecies[idx]->Insert(&id,&loc.obj) );
5791 ent._wr(); ++ent->count;
5792 if( id >= ent->maxId ) ent->maxId = id+1;
5798 destruct_(ObjectLoc &loc, int id)
5800 if_err( index(idxId)->Delete(&id) );
5801 ent._wr(); --ent->count;
5802 if( id+1 == ent->maxId ) {
5803 if( ent->count > 0 ) {
5804 if_err( index(idxId)->Last(&id,0) );
5816 new_entity_(Entity &entity, const char *nm, int sz)
5819 EntityLoc &ent = entity.ent;
5820 // construct EntityObj
5821 if( root_info->entity_max_id >= max_entity_type ) Err(errLimit);
5822 if_err( allocate(root_info->entity_max_id+1,
5823 sizeof(EntityObj), ent.obj, alloc_cache) );
5824 int id = root_info->entity_max_id;
5825 ent._wr(); ent->id = id;
5826 char name[nmSz]; memset(&name[0],0,sizeof(name));
5827 strncpy(name,nm,sizeof(name)-1);
5828 memmove(&ent->name[0],name,sizeof(name));
5829 ent->alloc_cache.init();
5833 // add to entity indecies
5834 if_err( entityIdIndex->Insert(&id,&ent.obj) );
5835 if_err( entityNmIndex->Insert(&name[0],&id) );
5836 // create entity id/loc
5837 int idx = new_binary_index(nm, sizeof(int),sizeof(pgRef),cmprOIds);
5839 ent->indexs[idxId] = idx;
5841 ++root_info->entity_count;
5842 ++root_info->entity_max_id;
5843 entity.rw_lock = &db_info->rw_locks[id];
5848 del_entity(Entity &entity)
5850 EntityLoc &ent = entity.ent;
5851 if( ent.obj.id >= 0 ) {
5853 ObjectLoc loc(&entity);
5854 int status = loc.FirstId();
5857 entity.deallocate(loc.obj);
5858 } while( !(status=loc.NextId()) );
5859 if( status != errNotFound )
5861 for( int i=ent->nidxs; --i>=0; ) entity.del_index_(i);
5863 entityIdIndex->Delete(&id);
5864 entityNmIndex->Delete(&ent->name[0]);
5865 if_err( deallocate(ent.obj, alloc_cache) );
5867 --root_info->entity_count;
5868 if( id+1 == root_info->entity_max_id ) {
5869 if( root_info->entity_count > 0 ) {
5870 if_err( entityIdIndex->Last(&id,0) );
5875 root_info->entity_max_id = id;
5882 new_entity(Entity &entity, const char *nm, int sz)
5884 int ret = new_entity_(entity, nm, sz);
5885 if( ret ) del_entity(entity);
5890 get_entity(Entity &entity, const char *nm)
5892 EntityLoc &ent = entity.ent;
5893 char name[nmSz]; memset(&name[0],0,sizeof(name));
5894 strncpy(name,nm,sizeof(name)-1); int id;
5895 if_fail( entityNmIndex->Find(&name[0], &id) );
5896 if_err( entityIdIndex->Find(&id, &ent.obj) );
5897 entity.rw_lock = &db_info->rw_locks[id];
5902 get_index(const char *nm, CmprFn cmpr)
5904 int idx = ent->nidxs;
5905 while( --idx >= 0 ) {
5906 int i = ent->indexs[idx];
5907 if( i < 0 ) continue;
5908 IndexBase *ib = db->indecies[i];
5909 if( ib && !strncmp(&ib->st->name[0],nm,nmSz) ) {
5910 if( cmpr && ib->st->type == idxBin ) {
5911 IndexBinary *bidx = (IndexBinary *)ib;
5912 bidx->compare = cmpr;
5913 bidx->bst->cmprId = db->findCmprFn(cmpr);
5918 if( idx < 0 ) Fail(errNotFound);
5925 EntityLoc nent(this);
5926 // construct EntityObj
5927 int nidx = ent->nidxs;
5928 int sz = sizeof(EntityObj) + sizeof(ent->indexs[0])*nidx;
5929 if_err( db->allocate(ent->id, sz, nent.obj, db->alloc_cache) );
5930 nent._wr(); nent->id = ent->id;
5931 memmove(&nent->name[0],&ent->name[0],nmSz);
5932 nent->alloc_cache = ent->alloc_cache;
5933 nent->maxId = ent->maxId;
5934 nent->recdSz = ent->recdSz;
5935 nent->count = ent->count;
5936 // add to entity indecies
5937 if_err( db->entityIdIndex->Modify(&ent->id,&nent.obj) );
5938 for( int i=0; i<nidx; ++i )
5939 nent->indexs[i] = ent->indexs[i];
5941 nent->indexs[nidx] = idx;
5942 nent->nidxs = nidx+1;
5943 if_err( db->deallocate(ent.obj, db->alloc_cache) );
5949 del_index(const char *nm)
5951 int idx; if_ret( idx = get_index(nm) );
5952 return del_index(idx);
5958 if( i <= idxId ) Fail(errInvalid);
5959 if( i >= ent->nidxs ) Fail(errInvalid);
5960 return del_index_(i);
5966 int idx = ent->indexs[i];
5967 if( idx < 0 ) Fail(errNotFound);
5968 if( idx >= db->root_info->indeciesUsed ) Fail(errNotFound);
5969 if( !db->indecies[idx] ) Fail(errNotFound);
5970 ent._wr(); ent->indexs[i] = -1;
5971 db->indecies[idx]->Clear();
5977 finit(Objects objects)
5980 Objects op = objects; objects = op->next;
5981 Entity *ep = op->obj->entity;
5982 for( varObjs nxt=0, vp=ep->vobjs; vp!=0; vp=nxt ) {
5983 nxt = vp->next; delete vp;
5989 void Db::ObjectLoc::
5992 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
5994 (op->*(vp->ref)).init();
5998 void Db::ObjectLoc::
6001 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
6003 (op->*(vp->ref)).del(entity);
6009 size(varObj &vobj, int sz)
6011 if( vobj.len != sz ) {
6012 AllocCache &cache = entity->ent->alloc_cache;
6013 if_ret( entity->db->reallocate(sz,vobj.loc,cache) );
6014 vobj.len = sz; entity->ent._wr(); _wr();
6019 // get last index id on member accessed with ip
6021 last(const char *nm,int (ObjectLoc::*ip)())
6023 int idx; if_ret( idx = entity->get_index(nm) );
6024 return last(idx, ip);
6028 last(int idx,int (ObjectLoc::*ip)())
6030 ObjectLoc last_loc(*this);
6031 int id, ret = entity->index(idx)->Last(0,&id);
6032 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
6033 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
6034 return (last_loc.*ip)();
6037 // get last index unsigned id on member accessed with ip
6038 unsigned int Db::ObjectLoc::
6039 last(const char *nm,unsigned int (ObjectLoc::*ip)())
6041 int idx; if_ret( idx = entity->get_index(nm) );
6042 return last(idx, ip);
6045 unsigned int Db::ObjectLoc::
6046 last(int idx,unsigned int (ObjectLoc::*ip)())
6048 ObjectLoc last_loc(*this);
6049 int id, ret = entity->index(idx)->Last(0,&id);
6050 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
6051 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
6052 return (last_loc.*ip)();
6055 #define cmpr_type(nm,ty) int Db::ObjectLoc:: \
6056 nm(const ty *ap, int asz, const ty *bp, int bsz) { \
6057 int n = asz < bsz ? asz : bsz; \
6059 if( *ap > *bp ) return 1; \
6060 if( *ap < *bp ) return -1; \
6061 ++ap; ++bp; n -= sizeof(ty); \
6063 if( asz > bsz ) return 1; \
6064 if( asz < bsz ) return -1; \
6068 cmpr_type(cmpr_char, char)
6069 cmpr_type(cmpr_uchar, unsigned char)
6070 cmpr_type(cmpr_short, short)
6071 cmpr_type(cmpr_ushort, unsigned short)
6072 cmpr_type(cmpr_int, int)
6073 cmpr_type(cmpr_uint, unsigned int)
6074 cmpr_type(cmpr_long, long)
6075 cmpr_type(cmpr_ulong, unsigned long)
6076 cmpr_type(cmpr_float, float)
6077 cmpr_type(cmpr_double, double)
6081 cmpr_media(const unsigned char *ap, int asz, const unsigned char *bp, int bsz)
6083 return mediaCmpr((uint8_t *)ap).cmpr((uint8_t *)bp);
6090 if( !idx ) Err(errInvalid);
6091 int id; if_fail( idx->Find(*this, &id) );
6092 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6099 if( !idx ) Err(errInvalid);
6100 int id; if_fail( idx->Locate(op, *this,0, 0,&id) );
6101 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6108 if( !idx ) Err(errInvalid);
6109 int id; if_fail( idx->First(0,&id) );
6110 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6117 if( !idx ) Err(errInvalid);
6118 int id; if_fail( idx->Last(0,&id) );
6119 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6126 if( !idx ) Err(errInvalid);
6127 int id; if_fail( idx->Next(this,&id) );
6128 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6135 if( !idx ) Err(errInvalid);
6136 int id; if_fail( idx->First(pos,0,&id) );
6137 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6144 if( !idx ) Err(errInvalid);
6145 int id; if_fail( idx->Next(pos,this,&id) );
6146 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6153 if( !idx ) Err(errInvalid);
6154 int id; if_fail( idx->Locate(op, *this,0, 0,&id) );
6155 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6159 int Db::ioCmpr(const void *a, const void *b, void *c)
6162 Page &pa = *db->get_page(*(pageId*)a);
6163 Page &pb = *db->get_page(*(pageId*)b);
6164 int64_t v = pa->io_addr - pb->io_addr;
6165 return v < 0 ? -1 : v > 0 ? 1 : 0;
6170 int npages = root_info->pageTableUsed;
6171 pageId *pages = new pageId[npages];
6172 for( int i=0 ; i<npages; ++i ) pages[i] = i;
6173 qsort_r(pages, npages, sizeof(*pages), ioCmpr, this);
6174 for( int i=0 ; i<npages; ++i )
6175 pageLoad(pages[i], *get_page(pages[i]));