13 #include <sys/types.h>
21 extern "C" void traceback(const char *fmt,...)
23 FILE *fp = fopen("/tmp/x","a");
25 va_list ap; va_start(ap, fmt);
26 vfprintf(fp, fmt, ap);
28 int nptrs; void *buffer[100];
29 nptrs = backtrace(buffer, lengthof(buffer));
30 struct timeval now; gettimeofday(&now,0);
31 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
32 fprintf(fp,"*** stack %d\n",nptrs);
33 char **strings = backtrace_symbols(buffer, nptrs);
34 if( !strings ) return;
35 for( int i=0; i<nptrs; ++i ) fprintf(fp,"%s\n", strings[i]);
42 // use repeated string byte move for decompress operation.
43 // page table design should be changed to hierachical bit
44 // field, so that only the trace of page table blocks in
45 // the update path are written at commit time. Writing
46 // the entire page table on commit can be expensive.
48 // local memory allocator
49 void *Db::get_mem8_t(int id)
54 void *Db::new_mem8_t(int size, int &id)
56 return new uint8_t[size];
59 int Db::del_mem8_t(const void *vp, int id)
61 delete [] (uint8_t*)vp;
66 // shared memory allocator
70 void *vp = shmat(id, NULL, 0);
71 if( vp == (void*)-1 ) { perror("shmat"); sleep(1000000); vp = 0; }
76 new_shm8_t(int size, int &id)
78 id = shmget(IPC_PRIVATE, size, IPC_CREAT+0666);
79 if( id < 0 ) { perror("shmget"); return 0; }
80 uint8_t *addr = (uint8_t *)get_shm8_t(id);
81 if( addr ) shmctl(id, IPC_RMID, 0);
86 del_shm8_t(const void *vp, int id)
91 if( !shmctl(id, IPC_STAT, &ds) ) ret = ds.shm_nattch;
92 else { perror("shmctl"); sleep(1000000); ret = errIoStat; }
101 get_uint8_t(int id, int pg)
103 uint8_t *bp = (uint8_t *) get_mem(id);
108 new_uint8_t(int size, int &id, int pg)
110 void *vp = new_mem(size, id);
111 return (uint8_t *)vp;
115 del_uint8_t(const void *vp, int id, int pg)
117 return del_mem(vp, id);
126 if( err_callback ) err_callback(this, v);
130 //int Db::debug = DBBUG_ERR + DBBUG_FAIL;
131 int Db::debug = DBBUG_ERR;
135 void dmp(unsigned char *bp, int len)
137 unsigned char ch[16], lch[16];
138 int llen = lengthof(lch)+1;
140 unsigned char *adr = 0;
145 for( n=0; n<lengthof(ch) && --len>=0; ch[n++]=*bp++ );
146 if( (i=n) >= llen ) // check for a duplicate
147 while( --i>=0 && ch[i]==lch[i] );
148 if( i >= 0 || len < 0 ) { /* not a duplicate */
149 if( dups > 0 ) fprintf(stderr," ...%d\n",dups);
151 fprintf(stderr,"%p:",adr);
152 for( i=0; i<n; ++i ) fprintf(stderr," %02x",lch[i]=ch[i]);
153 for( i=n; i<lengthof(ch); ++i ) fprintf(stderr," ");
156 fprintf(stderr,"%c", ch[i]>=' ' && ch[i]<='~' ? ch[i] : '.');
157 fprintf(stderr,"\n");
170 "NoCmprFn", "NotFound", "Duplicate", "NoPage", "NoMemory",
171 "IoRead", "IoWrite", "IoSeek", "IoStat", "BadMagic", "Corrupt",
172 "Invalid", "Previous", "ObjEntity", "Limit", "User",
176 dmsg(int msk, const char *msg,...)
178 if( !(msk & debug) ) return;
179 #ifdef DEBUG_TIMESTAMPS
180 struct timeval now; gettimeofday(&now,0);
181 printf("@%ld.03%ld: ",now.tv_sec,now.tv_usec/1000);
187 FILE *fp = fopen("/tmp/x","a"); if( !fp ) return;
188 fprintf(fp,"@%ld.03%ld %5d: ",now.tv_sec,now.tv_usec/1000,getpid());
189 va_start(ap, msg); vfprintf(fp, msg, ap); va_end(ap);
194 _err_(int v,const char *fn,int ln)
197 dmsg(DBBUG_ERR,"%s:%d errored %d (%s)\n",fn,ln,v,errMsgs[-v]);
203 _fail_(int v,const char *fn,int ln)
205 dmsg(DBBUG_FAIL,"%s:%d failed %d (%s)\n",fn,ln,v,errMsgs[-v]);
210 Error(int v,const char *msg)
213 dmsg(DBBUG_ERR,"%s\n",msg);
219 printf("dmp root_info->file_size %016lx\n",
220 root_info->file_size);
221 printf(" rootInfo root_info->transaction_id %d\n",
222 root_info->transaction_id);
223 printf(" root_info->root_info_addr/size %016lx/%08x\n",
224 root_info->root_info_addr,root_info->root_info_size);
225 printf(" root_info->last_info_addr/size %016lx/%08x\n",
226 root_info->last_info_addr,root_info->last_info_size);
227 printf(" root_info->indeciesUsed %d\n",
228 root_info->indeciesUsed);
229 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
230 IndexBase *ib = indecies[idx];
232 printf(" idx %d. %-24s %s pop %5ld"
233 " root %-5d rhs %-5d ky/Dt %2d/%-2d ",
234 idx, &ib->st->name[0], ib->st->type==idxBin ? "bin" :
235 ib->st->type==idxStr ? "str" : "???", ib->st->count,
236 ib->st->rootPageId, ib->st->rightHandSide,
237 ib->st->keySz, ib->st->dataSz);
238 printf(" free %d/",ib->st->freeBlocks);
240 for( pageId id=ib->st->freeBlocks; id>=0; ++n ) {
241 pgRef loc; loc.id = id; loc.offset = 0;
242 keyBlock *kbb; addrRead_(loc,kbb);
243 if( (id=kbb->right_link()) < 0 ) break;
246 printf("%d pages\n",n);
249 printf(" Entities, count %ld\n", entityIdIndex->Count());
250 Entity e(this); EntityLoc &ent = e.ent; int eid;
251 if( !entityIdIndex->First(&eid,&ent.obj) ) do {
252 int nidxs = ent->nidxs;
253 printf(" id %d. %s maxId %d, recdSz %d, count %d, nidxs %d:",
254 eid, &ent->name[0], ent->maxId, ent->recdSz, ent->count, nidxs);
255 for( int i=0; i<nidxs; ++i ) {
256 int idx = ent->indexs[i];
257 printf(" %d(%s),", idx, idx < 0 ? "" :
258 !indecies[idx] ? "(err)" : &indecies[idx]->st->name[0]);
261 } while( !entityIdIndex->Next(&eid,&ent.obj) );
263 printf(" root_info->pageTableUsed %d\n",root_info->pageTableUsed);
264 for( int pid=0; pid<root_info->pageTableUsed; ++pid ) {
265 Page &pg = *get_page(pid);
266 if( !pg.addr && !pg->io_allocated && pg->io_addr==NIL &&
267 pg->trans_id < 0 && pg->shmid < 0 && !pg->flags &&
268 !pg->wr_allocated && pg->wr_io_addr==NIL ) continue;
269 printf(" pid %d. used %d, type %d, link %d, flags %04x addr %p\n",
270 pid, pg->used, pg->type, pg->link, pg->flags, pg.addr);
271 printf(" allocated %d io_alloc/io_addr %d/%016lx, wr_alloc/io_addr %d/%016lx\n",
272 pg->allocated, pg->io_allocated, pg->io_addr,
273 pg->wr_allocated, pg->wr_io_addr);
275 printf(" root_info->freePages %d",root_info->freePages);
277 for( pageId id=root_info->freePages; id>=0; ++n ) id = (*get_page(id))->link;
278 // printf(", %d",(id=get_page(id)->link));
279 printf(", pages = %d\n",n);
280 printf("freeStoreIndex\n"); fdmp();
281 printf("addrStoreIndex\n"); admp();
282 printf("freeSpaceIndex\n"); edmp();
283 printf("addrSpaceIndex\n"); bdmp();
290 freeStoreRecord free;
291 if( !freeStoreIndex->First(&free,0) ) do {
292 printf("free=%04lx/%012lx\n", free.size,free.io_addr);
293 } while( !freeStoreIndex->Next(&free,0) );
299 addrStoreRecord addr;
300 if( !addrStoreIndex->First(&addr,0) ) do {
301 printf("addr=%012lx/%04lx\n", addr.io_addr,addr.size);
302 } while( !addrStoreIndex->Next(&addr,0) );
308 if( !indecies ) return; addrStoreRecord addr;
309 addrStoreRecord last; last.io_addr = 0; last.size = 0;
310 if( !addrStoreIndex->First(&addr,0) ) do {
311 if( last.io_addr > addr.io_addr ||
312 (last.io_addr == addr.io_addr && last.size >= addr.size ) ) {
313 printf("last=%012lx/%04lx, addr=%012lx/%04lx\n",
314 addr.io_addr, addr.size, last.io_addr, last.size);
318 } while( !addrStoreIndex->Next(&addr,0) );
324 if( !indecies ) return; freeStoreRecord free;
325 freeStoreRecord last; last.size = 0; last.io_addr = 0;
326 if( !freeStoreIndex->First(&free,0) ) do {
327 if( last.size > free.size ||
328 (last.size == free.size && last.io_addr >= free.io_addr ) ) {
329 printf("last=%04lx/%012lx, free=%04lx/%012lx\n",
330 free.size, free.io_addr, last.size, last.io_addr);
334 } while( !freeStoreIndex->Next(&free,0) );
340 freeSpaceRecord free;
341 if( !freeSpaceIndex->First(&free,0) ) do {
342 printf("free=%04lx %d/%d\n", free.size,free.id,free.offset);
343 } while( !freeSpaceIndex->Next(&free,0) );
349 addrSpaceRecord addr;
350 if( !addrSpaceIndex->First(&addr,0) ) do {
351 printf("addr=%d/%d %04lx\n", addr.id,addr.offset,addr.size);
352 } while( !addrSpaceIndex->Next(&addr,0) );
358 long store_allocated=0, store_used=0;
359 long loaded_allocated=0, loaded_used=0;
360 long written_allocated=0, written_used=0;
361 int pages_written=0, pages_loaded=0, pages_deleted=0;
362 for( int id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
363 Page &pg = *get_page(id);
364 store_allocated += pg->allocated;
365 store_used += pg->used;
368 loaded_allocated += pg->allocated;
369 loaded_used += pg->used;
371 if( pg->chk_flags(fl_wr) ) {
373 written_allocated += pg->allocated;
374 written_used += pg->used;
375 if( !pg.addr ) ++pages_deleted;
378 #define percent(a,b) (!(a) || !(b) ? 0.0 : (100.0*(a)) / (b))
379 long read_allocated = loaded_allocated - written_allocated;
380 long read_used = loaded_used - written_used;
381 int pages_read = pages_loaded - pages_written;
382 printf("\ncommit %d\n", root_info->transaction_id);
383 printf(" pages %8d/del %-4d alloc:%-12ld used:%-12ld %7.3f%%\n",
384 root_info->pageTableUsed, pages_deleted, store_allocated, store_used,
385 percent(store_used,store_allocated));
386 printf(" loaded %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
387 pages_loaded, percent(pages_loaded, root_info->pageTableUsed),
388 loaded_allocated, loaded_used, percent(loaded_used, loaded_allocated));
389 printf(" read %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
390 pages_read, percent(pages_read, root_info->pageTableUsed),
391 read_allocated, read_used, percent(read_used, read_allocated));
392 printf(" write %8d/%-7.3f%% alloc:%-12ld used:%-12ld %7.3f%%\n",
393 pages_written, percent(pages_written, root_info->pageTableUsed),
394 written_allocated, written_used, percent(written_used, written_allocated));
396 long store_avail=0, space_avail=0;
397 freeStoreRecord store;
398 if( !freeStoreIndex->First(&store,0) ) do {
399 store_avail += store.size;
400 } while( !freeStoreIndex->Next(&store,0) );
401 freeSpaceRecord space;
402 if( !freeSpaceIndex->First(&space,0) ) do {
403 space_avail += space.size;
404 } while( !freeSpaceIndex->Next(&space,0) );
405 printf(" file %-12ld", root_info->file_size);
406 printf(" store %12ld/%-7.3f%% space %12ld/%-7.3f%%\n",
407 store_avail, percent(store_avail, root_info->file_size),
408 space_avail, percent(space_avail, root_info->file_size));
421 return syscall(SYS_sched_yield);
427 return syscall(SYS_gettid);
434 while( (ret=zfutex(FUTEX_WAKE, nwakeups)) < 0 );
439 zwait(int val, timespec *ts)
441 return zfutex(FUTEX_WAIT, val, ts);
447 fprintf(stderr,"unlocking and not locked\n");
454 if( v || zxchg(1,loc) >= 0 ) do {
456 } while ( zxchg(1,loc) >= 0 );
461 zunlock(int nwakeups)
470 zdecr(loc); lk.lock(); lk.unlock(); zincr(loc);
476 if( lk.loc >= 0 ) zwake(1);
482 lk.lock(); timespec ts = { 1, 0 };
483 int v; while( (v=loc) >= 0 ) zwait(v, &ts);
496 if( !db_info ) return -1;
497 if( is_locked() > 0 && db_info->owner == my_pid ) return 0;
499 //traceback(" attach_wr %d/%d/%d\n",my_pid,db_info->owner,db_info->last_owner);
500 db_info->last_owner = db_info->owner;
501 db_info->owner = my_pid;
509 //traceback(" attach_rd %d/%d\n",my_pid,db_info->owner);
516 if( !db_info ) return;
524 //traceback(" detach_rw %d\n",my_pid);
527 // persistent pageTable element initial constructor
544 // non-persistent pageTable element initial constructor
552 // deletes storage next start_transaction
556 st->used = 0; st->set_flags(fl_wr);
560 // locked pageTable access
564 read_locked by(db_info->pgTblLk);
565 return get_Page(pid);
569 * Db::alloc_pageTable -- alloc page table
572 * sz int input page table size
576 alloc_pageTable(int sz)
578 write_blocked by(db_info->pgTblLk);
579 int n = pageTableHunks(sz) * pageTableHunkSize;
580 Page **pt = new Page*[n];
581 if( !pt ) Err(errNoMemory);
582 int info_id, info_sz = n*sizeof(PageStorage);
583 PageStorage *new_page_info = (PageStorage *)new_uint8_t(info_sz, info_id);
584 if( !new_page_info ) { delete pt; Err(errNoMemory); }
585 if( page_info && root_info->pageTableUsed > 0 )
586 memcpy(new_page_info, page_info, root_info->pageTableUsed*sizeof(*page_info));
588 for( ; i<root_info->pageTableUsed; ++i ) {
590 pt[i]->st = &new_page_info[i];
592 for( ; i<n; ++i ) pt[i] = 0;
593 db_info->page_info_id = page_info_id = info_id;
594 del_uint8_t(page_info); page_info = new_page_info;
595 delete [] pageTable; pageTable = pt;
596 pageTableAllocated = n;
601 * Db::new_page -- access/construct new page
604 * returns page id on success (>=zero), error code otherwise(< 0)
610 locked by(db_info->pgAlLk);
611 pageId id = root_info->freePages;
613 if( root_info->pageTableUsed >= pageTableAllocated ) {
614 int sz = root_info->pageTableUsed + root_info->pageTableUsed/2 + 1;
615 if_err( alloc_pageTable(sz) );
617 id = root_info->pageTableUsed;
618 Page * pp = new Page(*new(&page_info[id]) PageStorage());
619 if( !pp ) Err(errNoMemory);
621 page_table_sz = ++root_info->pageTableUsed;
624 Page &pg = *get_page(id);
625 root_info->freePages = pg->link;
626 new(&pg) Page(*new(&page_info[id]) PageStorage());
634 Page *pp = get_Page(id);
641 * Db::free_page -- link to root_info->freePages
644 * pid pageId input page id
650 locked by(db_info->pgAlLk);
651 Page &pg = *get_page(pid);
655 pg->clr_flags(fl_wr | fl_rd);
656 pg->set_flags(fl_free);
657 pageId id = root_info->freePages;
659 Page *lpp = 0; // use sorted order
660 while( id >= 0 && id < pid ) {
668 root_info->freePages = pid;
673 get_index(const char *nm, CmprFn cmpr)
675 int idx = root_info->indeciesUsed;
676 while( --idx >= 0 ) {
677 IndexBase *ib = indecies[idx];
679 if( !strncmp(&ib->st->name[0],nm,nmSz) ) break;
681 if( idx >= 0 && cmpr && indecies[idx]->st->type == idxBin ) {
682 IndexBinary *bidx = (IndexBinary *)indecies[idx];
683 bidx->compare = cmpr;
684 bidx->bst->cmprId = findCmprFn(cmpr);
692 if( r < 0 || r >= root_info->indeciesUsed ) Fail(errInvalid);
693 if( !indecies[r] ) Fail(errNotFound);
694 return indecies[r]->Count();
698 alloc_indecies(int n)
700 IndexBase **it = new IndexBase*[n];
701 if( !it ) Err(errNoMemory);
703 IndexInfo *info = (IndexInfo *)new_uint8_t(n*sizeof(*info), info_id);
704 if( !info ) { delete it; Err(errNoMemory); }
705 memcpy(info, index_info, indeciesAllocated*sizeof(*info));
707 for( ; i<root_info->indeciesUsed; ++i ) {
708 if( !(it[i] = indecies[i]) ) continue;
709 switch( it[i]->st->type ) {
711 IndexBinary *bidx = (IndexBinary *)it[i];
712 bidx->st = info[i].bin;
713 bidx->bst = info[i].bin;
716 IndexString *sidx = (IndexString *)it[i];
717 sidx->st = info[i].str;
718 sidx->sst = info[i].str;
725 for( ; i<n; ++i ) it[i] = 0;
726 db_info->index_info_id = index_info_id = info_id;
727 del_uint8_t(index_info); index_info = info;
728 delete [] indecies; indecies = it;
729 indeciesAllocated = n;
737 while( idx < root_info->indeciesUsed && indecies[idx] )
739 if( idx >= indeciesAllocated ) {
740 int n = indeciesAllocated + indexTableHunkSize;
741 if( alloc_indecies(n) ) Err(errNoMemory);
743 if( idx >= root_info->indeciesUsed ) {
744 if( idx >= max_index_type ) Err(errLimit);
745 root_info->indeciesUsed = idx+1;
746 indecies_sz = root_info->indeciesUsed;
755 delete indecies[idx];
757 for( idx=root_info->indeciesUsed; idx>0 && indecies[idx-1]!=0; --idx );
758 indecies_sz = root_info->indeciesUsed = idx;
762 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexBinaryInfo *bi)
764 IndexBaseStorage *st = new(b) IndexBaseStorage();
765 IndexBinaryStorage *bst = new(bi) IndexBinaryStorage();
766 IndexBinary *bidx = new IndexBinary(this, st, bst);
767 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
768 if( bidx ) delete bidx;
776 new_binary_index(const char *nm, int ksz, int dsz, CmprFn cmpr)
778 if( get_index(nm) >= 0 ) Err(errDuplicate);
779 int idx = new_index();
780 if( idx < 0 ) Err(errNoMemory);
781 IndexBinary *bidx = new IndexBinary(this, idx, ksz, dsz, cmpr);
782 if( !bidx || !bidx->ikey() || !bidx->tkey() ) {
786 if( nm ) strncpy(&bidx->st->name[0],nm,sizeof(bidx->st->name));
787 indecies[idx] = bidx;
792 new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexStringInfo *si)
794 IndexBaseStorage *st = new(b) IndexBaseStorage();
795 IndexStringStorage *sst = new(si) IndexStringStorage();
796 IndexString *sidx = new IndexString(this, st, sst);
797 if( !sidx ) Err(errNoMemory);
803 new_string_index(const char *nm, int dsz)
805 if( get_index(nm) >= 0 ) Err(errDuplicate);
806 int idx = new_index();
807 if( idx < 0 ) Err(errNoMemory);
808 IndexString *sidx = new IndexString(this, idx, dsz);
813 if( nm ) strncpy(&sidx->st->name[0],nm,sizeof(sidx->st->name));
814 indecies[idx] = sidx;
819 * Db::IndexBinary::keyMap - map function on index pages
822 * s pageId input current id
824 * returns 0 on success,
825 * errNotFound if index is empty
826 * error code otherwise
829 int Db::IndexBinary::
830 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
833 keyBlock *sbb; Page *spp; char *sn;
834 if_err( db->indexRead(s,0,sbb,spp,sn) );
835 if( (r=sbb->right_link()) >= 0 ) {
836 int lkdSz = kdSz + sizeof(pageId);
837 int n = spp->iused() / lkdSz;
838 for( int i=0; i<n; ++i ) {
839 pageId l = readPageId(sn);
840 if_ret( keyMap(l,fn) );
843 if_ret( keyMap(r,fn) );
845 if_err( (this->*fn)(s) );
850 * Db::IndexBinary::setLastKey -- conditionally update lastAccess
853 * s pageId input page of last insert
854 * u pageId input rightLink of page
855 * k int input insert offset in block
858 void Db::IndexBinary::
859 setLastKey(pageId s, pageId u, int k)
861 if( keyInterior < 0 ) {
869 lastAccess.offset = sizeof(keyBlock) + k;
874 * Db::IndexBinary::keyLocate -- generalized database key retreival
877 * s pageId input current id
878 * cmpr CmprFn input key compare function
880 * returns 0 on success
881 * errNotFound if not found,
882 * error code otherwise
885 int Db::IndexBinary::
886 keyLocate(pageId s, CmprFn cmpr)
888 int ret = errNotFound;
889 keyBlock *sbb; Page *spp; char *sn;
890 if_err( db->indexRead(s,0,sbb,spp,sn) );
891 int len = spp->iused();
893 if( sbb->right_link() >= 0 )
894 lkdSz += sizeof(pageId);
898 // binary search block for key
899 while( (r - l) > 1 ) {
902 if( sbb->right_link() >= 0 )
905 int n = cmpr(key,kn);
907 if( relationship >= keyLE && relationship <= keyGE ) {
909 lastAccess.offset = sizeof(keyBlock) + k;
912 if( relationship == keyLE || relationship == keyGT ) n = 1;
914 if( n > 0 ) l = i; else r = i;
918 int k = relationship < keyEQ ? l*lkdSz : (r < len ? r : -1);
919 if( relationship != keyEQ && k >= 0 ) {
920 if( sbb->right_link() >= 0 )
923 lastAccess.offset = sizeof(keyBlock) + k;
927 if( (s = sbb->right_link()) >= 0 ) {
928 if( r < len ) s = readPageId(sn+r);
930 ret = keyLocate(s,cmpr);
931 if( k == 0 ) ret = 0;
938 * Db::IndexBinary::Locate -- interface to generalized database key retreival
941 * op int input key relation in keyLT..keyGT
942 * key void * input retreival key image
943 * cmpr CmprFn input retreival key image
944 * rtnKey void * output resulting key value
945 * rtnData void * output resulting recd value
947 * returns 0 on success
948 * errNotFound on not found,
949 * error code otherwise
952 int Db::IndexBinary::
953 Locate(int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
955 if( st->rootPageId == NIL ) Fail(errNotFound);
956 this->key = (char *)key;
957 if( op == keyEQ ) op = keyLE;
959 if_fail( keyLocate(st->rootPageId,!cmpr ? compare : cmpr) );
961 if_err( db->addrRead_(lastAccess,kp) );
963 memmove(rtnKey,kp,st->keySz);
965 memmove(rtnData,kp+st->keySz,st->dataSz);
966 lastNext = lastAccess;
971 * Db::IndexBinary::chkFind - check lastAccess block for key
974 * key char * input key image
975 * last pgRef * input last position loc
977 * returns 0 if not found
978 * error code otherwise
981 int Db::IndexBinary::
982 chkFind(char *key, pgRef *last)
985 if( s < 0 ) return 0; // must be valid block
986 keyBlock *sbb; Page *spp; char *sn;
987 if_err( db->indexRead(s,0,sbb,spp,sn) );
988 if( sbb->right_link() >= 0 ) return 0; // must be leaf
989 int slen = spp->iused();
990 int k = last->offset - sizeof(keyBlock);
991 if( k < 0 || k > slen ) return 0; // must be inside/end of block
992 lastAccess.id = last->id;
993 int cmpr0 = k>=slen ? -1 : compare(key,sn+k); // compare last access
994 if( cmpr0 ) { // not found here
996 if( cmpr0 < 0 ? (k-=kdSz) < 0 : (k+=kdSz) >= slen ) return 0;
997 int cmpr1 = compare(key,sn+k);
998 if( !cmpr1 ) goto xit; // found here
1000 if( l >= slen ) return 0; // no curr
1001 if( cmpr0 < 0 ) Fail(errNotFound); // btwn prev/curr
1003 cmpr1 = compare(key,sn+k);
1004 if( !cmpr1 ) goto xit; // found here
1005 if( cmpr1 > 0 ) return 0; // key after last in block
1008 if( cmpr0 > 0 ) Fail(errNotFound); // btwn curr/next
1010 cmpr1 = compare(key,sn);
1011 if( !cmpr1 ) goto xit; // found here
1012 if( cmpr1 < 0 ) return 0; // key before first in block
1014 lastAccess.id = NIL; // in block, but not located
1017 lastAccess.offset = sizeof(keyBlock) + k;
1022 * Db::IndexBinary::keyFind -- database unique key retreival
1025 * s pageId input current id
1027 * returns 0 on success
1028 * errNotFound on not found,
1029 * error code otherwise
1032 int Db::IndexBinary::
1036 keyBlock *sbb; Page *spp; char *sn;
1037 if_err( db->indexRead(s,0,sbb,spp,sn) );
1039 if( sbb->right_link() >= 0 )
1040 lkdSz += sizeof(pageId);
1041 int len = spp->iused();
1045 // binary search block for key
1046 while( (r - l) > 1 ) {
1049 if( sbb->right_link() >= 0 )
1050 k += sizeof(pageId);
1052 int n = compare(key,kn);
1055 lastAccess.offset = sizeof(keyBlock) + k;
1058 if( n > 0 ) l = i; else r = i;
1061 if( (s = sbb->right_link()) < 0 ) break;
1062 if( (r*=lkdSz) < len ) s = readPageId(sn+r);
1069 * Db::IndexBinary::Find -- interface to database unique key retreival
1072 * key void * input retreival key image
1073 * rtnData void * output resulting recd value
1075 * returns 0 on success
1076 * errNotFound on not found,
1077 * error code otherwise
1080 int Db::IndexBinary::
1081 Find(void *key, void *rtnData)
1083 if( st->rootPageId == NIL ) Fail(errNotFound);
1084 pageId r = st->rootPageId;
1086 if( CHK cFindCount > 2 ) { // try the easy way
1087 ret = chkFind((char *)key,&lastFind);
1091 if( lastAccess.id < 0 ) { // not found here, but in block
1092 r = lastFind.id; ret = 0; // search just this block
1095 if( !ret ) { // try the hard way
1096 this->key = (char *)key;
1097 if_fail( keyFind(r) );
1101 if_err( db->addrRead_(lastAccess,kp) );
1102 memmove(rtnData,kp+st->keySz,st->dataSz);
1108 int Db::IndexBinary::
1109 chkInsert(void *key, void *data)
1112 char *ky = (char *)key;
1113 pageId s = lastInsert.id;
1114 if( s < 0 || cInsCount < 2 ) return 0; /* >= 2 in a row */
1115 keyBlock *sbb; Page *spp; char *sn;
1116 if_err( db->indexRead(s,1,sbb,spp,sn) );
1117 if( sbb->right_link() >= 0 ) return 0; /* must be exterior */
1118 int slen = spp->iused();
1119 int k = lastInsert.offset - sizeof(keyBlock);
1121 char *kn = kp + kdSz;
1122 char *rp = sn + slen;
1123 int n = compare(ky,kp);
1124 if( n == 0 ) Fail(errDuplicate);
1125 if( n > 0 ) { /* after last one */
1126 if( kn >= rp ) { /* no next one */
1127 if( st->rightHandSide == s )
1132 if( n == 0 ) Fail(errDuplicate);
1134 last = 1; /* before next one */
1137 if( !last ) return 0; /* not a hit */
1138 if( spp->iallocated()-slen < kdSz ) return 0; /* doesnt fit */
1139 if( rp > kn ) memmove(kn+kdSz,kn,rp-kn); /* move data up */
1140 memmove(kn,key,st->keySz);
1141 memmove(kn+st->keySz,data,st->dataSz); /* add new key/data */
1142 spp->iused(slen + kdSz);
1145 lastAccess.offset = sizeof(keyBlock) + kn-sn;
1150 * Db::IndexBinary::keyInsert - add unique key to database
1153 * s pageId input current id
1155 * iky char * input assembled insertion key
1157 * returns 0 on success,
1158 * errDuplicate if key already exists in database
1159 * error code otherwise
1162 int Db::IndexBinary::
1163 keyInsert(pageId s, pageId &t)
1166 /* mark no continued insertion, and check end of search */
1167 /* if not end, read current pageId, search for key */
1168 /* return error if found. */
1169 keyBlock *sbb; Page *spp; char *sn;
1170 if_err( db->indexRead(s,1,sbb,spp,sn) );
1172 pageId u = sbb->right_link();
1174 lkdSz += sizeof(pageId);
1175 int slen = spp->iused();
1176 int keyCount = slen / lkdSz;
1177 int maxKeysInBlock = spp->iallocated() / lkdSz;
1181 /* binary search block for key */
1182 while( (r - l) > 1 ) {
1185 if( sbb->right_link() >= 0 )
1186 k += sizeof(pageId);
1188 int n = compare(key,kn);
1191 lastAccess.offset = sizeof(keyBlock) + k;
1194 if( n > 0 ) l = i; else r = i;
1197 /* not found in this pageId, continue search at lower levels. */
1198 /* process insertion if lower level splits ( t>=0 ). */
1199 int i = sizeof(pageId);
1203 u = readPageId(sn+k);
1204 if_ret( keyInsert(u, t) );
1205 if( t < 0 ) return 0;
1207 writePageId(sn+k,t);
1213 /* if current pageId is not full, insert into this pageId. */
1214 if( keyCount < maxKeysInBlock ) {
1217 memmove(kn+lkdSz,kn,slen-k);
1218 spp->iused(slen + lkdSz);
1219 memmove(kn,&iky[i],lkdSz);
1220 setLastKey(s,u,k); /* save insert loc */
1224 /* current pageId is full, split pageId and promote new parent key */
1225 keyBlock *tbb; Page *tpp; char *tn;
1226 if_err( blockAllocate(t,tbb,tpp,tn) );
1227 /* split point is middle, or end if inserting consecutive on rhs */
1228 int promoted = maxKeysInBlock/2;
1229 if( cInsCount > 2 && s == st->rightHandSide )
1230 promoted = maxKeysInBlock-1;
1232 int tlen = slen - promoted;
1233 if( st->rightHandSide == s ) st->rightHandSide = t;
1235 if( k <= promoted ) { /* at or left of promoted key */
1236 if( k != promoted ) { /* not promoting current key */
1237 kn = sn+promoted-lkdSz;
1238 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1240 memmove(kn+lkdSz,kn,promoted-lkdSz-k);
1241 memmove(kn,&iky[i],lkdSz); /* insert new key */
1242 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1243 setLastKey(s,u,k); /* save insert loc */
1245 memmove(tn,sn+promoted,tlen);
1247 else { /* key is > center */
1249 memmove(&tky[0],kn,lkdSz); /* save promoted key */
1250 int j = k - promoted - lkdSz;
1251 memmove(tn,kn+=lkdSz,j);
1252 memmove(kn=tn+j,&iky[i],lkdSz); /* insert new key */
1253 setLastKey(t,u,j); /* save insert loc */
1254 memmove(kn+=lkdSz,sn+k,slen-k);
1255 memmove(&iky[i],&tky[0],lkdSz); /* promote key */
1258 /* set newly split blocks half full, and set new links. */
1262 tbb->right_link(sbb->right_link());
1263 sbb->right_link(readPageId(&iky[0]));
1264 writePageId(&iky[0],s);
1265 /* if root is splitting, create new left */
1266 if( s == st->rootPageId ) {
1267 keyBlock *ubb; Page *upp; char *un;
1268 if_err( blockAllocate(u,ubb,upp,un) );
1269 memmove(un,sn,slen);
1271 ubb->right_link(sbb->right_link());
1272 writePageId(&iky[0],u);
1273 k = st->keySz + st->dataSz + sizeof(pageId);
1274 memmove(sn,&iky[0],k);
1277 setLastKey(s,t,sizeof(pageId));
1279 /* t >= 0 indicates continued insertion, return success */
1283 void Db::IndexBinary::
1284 makeKey(char *cp,char *key,int l,char *recd,int n)
1286 writePageId(cp,NIL);
1287 memmove(cp+=sizeof(pageId),key,l);
1288 if( recd ) memmove(cp+=l,recd,n);
1292 * Db::Insert - interface to database unique key insertion
1295 * key void * input key image
1296 * data void * input recd value
1298 * returns 0 on success,
1299 * errDuplicate if key already exists in database
1300 * error code otherwise
1303 int Db::IndexBinary::
1304 Insert(void *key, void *data)
1306 if_err( MakeRoot() );
1309 if( CHK cInsCount > 2 ) { // try the easy way
1310 ret = chkInsert(key,data);
1313 if( !ret ) { // try the hard way
1314 makeKey(&iky[0],this->key=(char *)key,st->keySz,(char *)data,st->dataSz);
1315 pageId t = NIL; lastAccess.id = NIL;
1316 if_ret( keyInsert(st->rootPageId, t) );
1318 if( keyInterior > 0 ) lastAccess.id = NIL;
1325 * Db::keyBlockUnderflow -- balences/merges blocks after a deletion
1328 * t int output continuation flag
1329 * lbb keyBlock * input left sibling keyBlock
1330 * p pageId input parent keyBlock id
1331 * pbb keyBlock * input parent keyBlock
1332 * pi int input deletion key offset parent
1335 * returns 0 on success, errorcode otherwise
1338 int Db::IndexBinary::
1339 keyBlockUnderflow(int &t,keyBlock *lbb,pageId p,keyBlock *pbb,int pi)
1345 int psiz = kdSz + sizeof(pageId);
1347 * if deletion was at right end of block
1348 * back up one key otherwise use this key
1350 char *pn = (char *)(pbb+1);
1351 Page *ppp = db->get_page(p);
1352 int plen = ppp->iused();
1353 if( pi >= plen ) { /* read lt side */
1354 r = pbb->right_link();
1357 l = readPageId(pn+pi);
1358 if_err( db->indexRead(l,1,lbb) );
1360 else { /* read rt side */
1361 l = readPageId(pn+pi);
1363 r = i >= plen ? pbb->right_link() : readPageId(pn+i);
1364 if_err( db->indexRead(r,1,rbb) );
1367 int lsz = lbb->right_link() >= 0 ? sizeof(pageId) : 0;
1368 int lkdSz = kdSz + lsz;
1369 char *ln = (char *)(lbb+1);
1370 Page *lpp = db->get_page(l);
1371 int llen = lpp->iused();
1372 char *rn = (char *)(rbb+1);
1373 Page *rpp = db->get_page(r);
1374 int rlen = rpp->iused();
1377 * if both lt&rt blocks+parent key all fit in one block, merge them
1379 int n = llen + rlen;
1380 if( n <= rpp->iallocated()-lkdSz ) { /* merge */
1381 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1382 memmove(ln+llen,pn+pi+sizeof(pageId)-lsz,lkdSz);
1384 memmove(ln+llen,rn,rlen); /* move right to left */
1386 lbb->right_link(rbb->right_link());
1388 memmove(pn+pi,pn+i,plen-i); /* remove parent key */
1390 if( plen == 0 && p == st->rootPageId ) { /* totally mashed root */
1391 if( st->rightHandSide == r ) st->rightHandSide = p;
1392 if_err( blockFree(r) );
1393 memmove(pn,ln,llen); /* copy to parent page */
1394 pbb->right_link(lbb->right_link());
1396 if_err( blockFree(l) );
1399 if( r != pbb->right_link() ) /* not rightLink */
1400 writePageId(pn+pi,l); /* must be next key */
1403 if( st->rightHandSide == r ) st->rightHandSide = l;
1404 if_err( blockFree(r) );
1408 lastAccess.id = NIL;
1409 return 0; /* continue underflow */
1413 if( tsiz < lkdSz ) tsiz = lkdSz; /* slosh threshold */
1414 if( plen > psiz && plen > tsiz ) t = 0; /* discontinue underflow */
1415 if( (i=(n/=2)-llen) >= tsiz || !llen ) { /* slosh left */
1416 writePageId(pn+pi,lbb->right_link()); /* reset parent key left ptr */
1417 k = pi+sizeof(pageId);
1418 memmove(kn=ln+llen,pn+k-lsz,lkdSz); /* move parent to left */
1420 memmove(kn+=lkdSz,rn,i*=lkdSz); /* migrate keys left */
1421 llen += i+lkdSz; kn = rn+i;
1422 if( lbb->right_link() >=0 )
1423 lbb->right_link(readPageId(kn));
1424 writePageId(pn+pi,l); /* change parent key */
1425 memmove(pn+k,kn+lsz,kdSz);
1426 kn += lkdSz; i += lkdSz;
1427 memmove(rn,kn,rlen-=i); /* migrate keys left */
1429 else if( (i=n-rlen) >= tsiz || !rlen ) { /* slosh right */
1430 i /= lkdSz; i *= lkdSz;
1431 memmove(kn=rn+i,rn,rlen);
1432 rlen += i; /* migrate keys right */
1433 writePageId(pn+pi,lbb->right_link());
1434 k = pi+sizeof(pageId);
1435 memmove(kn-=lkdSz,pn+k-lsz,lkdSz); /* move parent key */
1436 i -= lkdSz; n = llen-i;
1437 memmove(rn,kn=ln+n,i);
1438 kn -= lkdSz; /* migrate keys right */
1439 if( lbb->right_link() >=0 )
1440 lbb->right_link(readPageId(kn));
1441 memmove(pn+k,kn+lsz,kdSz); /* change parent key */
1442 writePageId(pn+pi,l);
1447 lastAccess.id = NIL;
1448 lpp->iused(llen); /* update lengths */
1455 * Db::IndexBinary::keyDelete - remove unique key from database
1458 * t int input/output check underflow flag
1459 * kp void * input key image to be removed
1460 * s pageId input current id
1461 * p pageId input parent id
1462 * pbb keyBlock * input parent
1463 * pi int input deletion key offset parent
1465 * returns 0 on success,
1466 * errNotFound if key does not exists in database
1467 * error code otherwise
1470 int Db::IndexBinary::
1471 keyDelete(int &t,void *kp,pageId s,pageId p,keyBlock *pbb,int pi)
1474 keyBlock *sbb; Page *spp; char *sn;
1475 if_err( db->indexRead(s,1,sbb,spp,sn) );
1476 int slen = spp->iused();
1477 t = 1; /* check underflow */
1478 if( idf == 0 ) { /* not interior deletion */
1480 if( sbb->right_link() >= 0 )
1481 lkdSz += sizeof(pageId);
1483 int r = slen / lkdSz;
1484 while( (r - l) > 1 ) { /* binary search for key */
1487 if( sbb->right_link() >= 0 )
1488 k += sizeof(pageId);
1490 int n = compare(key,kn);
1492 if( sbb->right_link() < 0 ) { /* terminal key */
1494 memmove(kn,kn+lkdSz,slen-k);
1495 spp->iused(slen); /* delete key */
1496 lastAccess.id = s; /* lastAccess = key */
1497 lastAccess.offset = sizeof(keyBlock) + k;
1499 else { /* interior key */
1500 k -= sizeof(pageId);
1503 idf = 1; /* interior delete */
1504 if_ret( keyDelete(t,(void *)kn,u,s,sbb,k) );
1508 if( n > 0 ) l = i; else r = i;
1510 if( (u=sbb->right_link()) < 0 ) { /* could not find it */
1514 if( (r *= lkdSz) < slen )
1515 u = readPageId(sn+r);
1516 if_ret( keyDelete(t,kp,u,s,sbb,r) ); /* continue search */
1518 else { /* interior deletion */
1519 if( (u=sbb->right_link()) < 0 ) { /* equivalent exterior key */
1520 int i = slen - kdSz;
1521 char *kn = sn + i; /* translate to interior */
1522 memmove((char *)kp+sizeof(pageId),kn,kdSz);
1523 spp->iused(i); /* remove key */
1525 else { /* continue decending */
1526 if_ret( keyDelete(t,kp,u,s,sbb,slen) );
1530 if( t != 0 ) { /* underflow possible */
1532 t = 0; /* no parent, root */
1534 if_err( keyBlockUnderflow(t,sbb,p,pbb,pi) );
1540 * Db::IndexBinary::Delete - interface to remove unique key
1543 * key void * input key image to be removed
1545 * returns 0 on success,
1546 * errNotFound key does not exists in database
1547 * error code otherwise
1550 int Db::IndexBinary::
1553 if( st->rootPageId == NIL ) Fail(errNotFound);
1554 this->key = (char *)key;
1556 pageId r = st->rootPageId;
1557 pgRef *last = &lastDelete;
1559 if( CHK cDelCount > 2 ) { // try the easy way
1560 if( lastOp == opFind && lastFind.id >= 0 ) {// chk find/delete
1562 if_ret( db->addrRead_(lastFind,kp) );
1563 if( !compare(this->key,kp) ) last = &lastFind;
1565 ret = chkFind(this->key,last);
1568 ret = 0; pageId s = last->id;
1569 keyBlock *sbb; Page *spp; char *sn;
1570 if_err( db->indexRead(s,1,sbb,spp,sn) );
1571 int slen = spp->iused() - kdSz;
1572 if( slen >= kdSz ) { // at least 1 key will remain
1573 if( lastAccess.id >= 0 ) { // found here
1574 spp->iused(slen); // delete
1575 int k = lastAccess.offset - sizeof(keyBlock);
1578 memmove(kp,kp+kdSz,slen-k);
1583 r = s; // search just this block
1587 if( !ret ) { // try the hard way
1588 makeKey(&iky[0],this->key=(char *)key,st->keySz,0,0);
1589 lastAccess.id = NIL; int t = 1;
1590 (void)r; // use full search, r works but is not traditional
1591 if_fail( keyDelete(t,(void *)&iky[0],/*r*/st->rootPageId,0,0,0) );
1592 if_err( UnmakeRoot() );
1600 * Db::IndexBinary::keyFirst - access leftmost key in tree
1603 * s pageId input current id
1605 * returns 0 on success,
1606 * errNotFound if index is empty
1607 * error code otherwise
1610 int Db::IndexBinary::
1615 if_err( db->indexRead(s,0,sbb) );
1616 if( sbb->right_link() < 0 ) break;
1617 char *sn = (char *)(sbb+1);
1622 lastAccess.offset = sizeof(keyBlock);
1627 * Db::IndexBinary::First -- interface to database access leftmost key in tree
1630 * rtnKey void * output resulting key value
1631 * rtnData void * output resulting recd value
1633 * returns 0 on success
1634 * errNotFound on not found,
1635 * error code otherwise
1638 int Db::IndexBinary::
1639 First(void *rtnKey,void *rtnData)
1641 if( st->rootPageId == NIL ) Fail(errNotFound);
1642 if_fail( keyFirst(st->rootPageId) );
1644 if_err( db->addrRead_(lastAccess,kp) );
1646 memmove(rtnKey,kp,st->keySz);
1648 memmove(rtnData,kp+st->keySz,st->dataSz);
1649 lastNext = lastAccess;
1654 * Db::IndexBinary::keyLast - access rightmost key in tree
1657 * s pageId input current id
1659 * returns 0 on success,
1660 * errNotFound if index is empty
1661 * error code otherwise
1664 int Db::IndexBinary::
1669 if_err( db->indexRead(s,0,sbb) );
1670 pageId u = sbb->right_link();
1675 Page *spp = db->get_page(s);
1677 int k = spp->iused() - kdSz;
1678 lastAccess.offset = sizeof(keyBlock) + k;
1683 * Db::IndexBinary::Last -- interface to database access rightmost key in tree
1686 * rtnKey void * output resulting key value
1687 * rtnData void * output resulting recd value
1689 * returns 0 on success
1690 * errNotFound on not found,
1691 * error code otherwise
1694 int Db::IndexBinary::
1695 Last(void *rtnKey,void *rtnData)
1697 if( st->rootPageId == NIL ) Fail(errNotFound);
1698 if_fail( keyLast(st->rootPageId) );
1700 if_err( db->addrRead_(lastAccess,kp) );
1702 memmove(rtnKey,kp,st->keySz);
1704 memmove(rtnData,kp+st->keySz,st->dataSz);
1705 lastNext = lastAccess;
1710 * Db::IndexBinary::Modify -- interface to database record modify
1713 * keyDef index input key definition block
1714 * key void * input retreival key image
1715 * recd void * input new recd value
1717 * returns 0 on success
1718 * errNotFound on not found,
1719 * error code otherwise
1722 int Db::IndexBinary::
1723 Modify(void *key,void *recd)
1725 if_fail( Find(key,0) );
1727 if_err( db->addrWrite_(lastAccess,bp) );
1728 memmove(bp+st->keySz,recd,st->dataSz);
1732 int Db::IndexBinary::
1733 chkNext(pgRef &loc, char *&kp)
1736 if( s < 0 ) return 0; // must be valid
1737 keyBlock *sbb; Page *spp; char *sn;
1738 if_err( db->indexRead(s,0,sbb,spp,sn) );
1739 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1740 int k = loc.offset - sizeof(keyBlock);
1741 if( k < 0 || k >= spp->iused() ) return 0; // curr must be in block
1742 if( (k+=kdSz) >= spp->iused() ) return 0; // next must be in block
1745 lastAccess.offset = sizeof(keyBlock) + k;
1749 int Db::IndexBinary::
1750 keyNext(pgRef &loc, char *kp)
1752 this->key = kp; relationship = keyGT;
1753 if_fail( keyLocate(st->rootPageId,compare) );
1758 * Db::IndexBinary::Next -- interface to sequential database key retreival
1761 * loc pgRef & input last known retreival key
1762 * rtnKey void * output resulting key value
1763 * rtnData void * output resulting recd value
1765 * returns 0 on success
1766 * error code otherwise
1769 int Db::IndexBinary::
1770 Next(pgRef &loc,void *rtnKey,void *rtnData)
1772 if( st->rootPageId == NIL ) Fail(errNotFound);
1774 int ret = CHK chkNext(loc,kp); // try the easy way
1778 if( !st->keySz && rtnKey ) // rtnKey is rKey class
1779 ky = (char *)rtnKey;
1781 if_err( db->addrRead_(loc,ky) );
1782 if_ret( keyNext(loc, ky) ); // try the hard way
1784 if_err( db->addrRead_(lastAccess,kp) );
1786 memmove(rtnKey,kp,st->keySz);
1788 memmove(rtnData,kp+st->keySz,st->dataSz);
1793 void Db::IndexBinary::
1796 relationship = keyEQ;
1802 IndexBinary(Db *zdb, int zidx, int ksz, int dsz, CmprFn cmpr)
1803 : IndexBase(zdb, idxBin, zidx, ksz, dsz)
1805 compare = !cmpr && !ksz ? cmprKey : cmpr;
1806 bst = new(st+1) IndexBinaryStorage(zdb->findCmprFn(compare));
1807 iky = new char[st->blockSize/2+1];
1808 tky = new char[st->blockSize/2+1];
1813 IndexBinary(Db *zdb, IndexBaseStorage *b, IndexBinaryStorage *d)
1814 : IndexBase(zdb, *b)
1816 bst = new(d) IndexBinaryStorage();
1817 compare = !bst->cmprId && !b->keySz ? cmprKey : cmprFns[bst->cmprId];
1818 iky = new char[st->blockSize/2+1];
1819 tky = new char[st->blockSize/2+1];
1824 IndexBinary(IndexBase *ib, IndexBaseStorage *b, IndexBinaryStorage *d)
1825 : IndexBase(ib->db, *b)
1827 bst = new(d) IndexBinaryStorage();
1828 compare = !bst->cmprId && !ib->st->keySz ? cmprKey : cmprFns[bst->cmprId];
1840 int Db::IndexString::
1841 keyMap(pageId s, int(IndexBase::*fn)(pageId id))
1844 keyBlock *sbb; Page *spp; char *sn;
1845 if_err( db->indexRead(s,0,sbb,spp,sn) );
1846 unsigned char *lp = (unsigned char *)sn;
1847 unsigned char *rp = lp + spp->iused();
1848 if( (r=sbb->right_link()) >= 0 ) {
1850 pageId l = getPageId(lp);
1851 lp += st->dataSz; ++lp;
1852 while( *lp++ != 0 );
1853 if_ret( keyMap(l,fn) );
1855 if_ret( keyMap(r,fn) );
1857 if_err( (this->*fn)(s) );
1862 * Db::IndexString::kpress -- compresses kp with prefix
1865 * kp unsigned char * input key string to compress
1866 * lp unsigned char * input left prefix
1867 * cp unsigned char * output compressed string
1869 * returns length of compressed string cp
1870 * safe to kpress buffer in place over cp or lp
1873 int Db::IndexString::
1874 kpress(unsigned char *kp, unsigned char *lp, unsigned char *cp)
1878 while( *kp == *lp ) {
1882 ch = *kp++; *cp++ = n;
1890 * divide tbfr length n into sectors l and s with rlink r
1891 * if i >= 0 then the insert point is i
1893 * promoted key in tky, return left sector
1896 int Db::IndexString::
1897 split(int n, int i, pageId s, pageId &l, pageId r)
1899 unsigned char lky[keysz];
1900 unsigned char *bp = &tbfr[0];
1901 unsigned char *lp = bp;
1902 unsigned char *rp = bp + n;
1903 unsigned char *kp = lp;
1904 unsigned char *tp = 0;
1905 pageId t = NIL, u = NIL;
1906 keyBlock *lbb; Page *lpp; char *ln;
1908 db->indexRead(l,1,lbb,lpp,ln) :
1909 blockAllocate(l,lbb,lpp,ln) );
1911 int blen = lpp->iallocated()-1 - st->dataSz;
1912 if( r >= 0 ) blen -= sizeof(pageId);
1913 if( n > blen ) n = blen;
1914 /* split point is middle, or end if inserting consecutive on rhs */
1916 if( i >= 0 && cInsCount > 2 && s == st->rightHandSide )
1918 unsigned char *mp = lp + promoted;
1919 // get promoted key in lky
1922 // expand key to lky
1923 if( r >= 0 ) u = getPageId(lp);
1924 tp = lp; lp += st->dataSz;
1925 for( i=*lp++; (lky[i++]=*lp++) != 0; );
1926 // stop copy if past mid pt and
1927 // remaining bytes + expanded key bytes fit in block
1929 if( kp != &tbfr[0] && lp >= mp && rlen+i <= blen )
1931 // copy promoted data/key
1932 unsigned char *tkp = tky;
1933 umemmove(tkp, tp, st->dataSz);
1935 // save end of left block, move to next key
1936 kp = bp; bp = lp; t = u;
1940 // must be at least 3 keys in tbfr (left,parent,right)
1941 if( !n || bp >= rp ) Err(errCorrupt);
1942 memmove(ln,&tbfr[0],n);
1945 // add first key in next block, order of moves important
1946 // memmove may be move up since key may expand past split
1948 if( r >= 0 ) putPageId(mp,u);
1949 umemmove(mp, tp, st->dataSz); // data recd
1950 *mp++ = 0; // first prefix is zero length
1951 memmove(mp+i, lp, rlen); // rest of block
1952 memmove(mp, &lky[0], i); // expanded key
1954 int slen = mp - &tbfr[0];
1956 * if root is splitting, allocate new right sibling
1957 * and set root to contain only new parent key.
1959 keyBlock *sbb; Page *spp; char *sn;
1960 if_err( db->indexRead(s,1,sbb,spp,sn) );
1961 if( s == st->rootPageId ) {
1962 keyBlock *rbb; Page *rpp; char *rn;
1963 if_err( blockAllocate(u,rbb,rpp,rn) );
1966 memmove(rn,&tbfr[0],slen);
1968 if( st->rightHandSide == s ) st->rightHandSide = r;
1969 mp = (unsigned char *)sn;
1971 umemmove(mp, kp=&tky[0], st->dataSz);
1973 for( *mp++=0; (*mp++=*kp++)!=0; );
1974 slen = mp - (unsigned char *)sn;
1977 memmove(sn, &tbfr[0], slen);
1983 int Db::IndexString::
1984 chkInsert(void *key, void *data)
1986 unsigned char *ky = (unsigned char *)key;
1987 pageId s = lastInsert.id;
1988 if( s < 0 ) return 0; // must be valid
1989 int n = ustrcmp(&lastInsKey[0],ky);
1990 if( !n ) Fail(errDuplicate);
1991 keyBlock *sbb; Page *spp; char *sn;
1992 if_err( db->indexRead(s,0,sbb,spp,sn) );
1993 if( sbb->right_link() >= 0 ) return 0; // must be leaf
1994 int slen = spp->iused();
1995 int k = lastInsert.offset - sizeof(keyBlock);
1996 if( k < 0 || k >= slen ) return 0; // must be inside block
1997 unsigned char *bp = (unsigned char *)sn;
1998 unsigned char *lp = bp;
1999 unsigned char *rp = bp + k; // beginning to current
2000 unsigned char *tp = bp + slen;
2001 unsigned char nky[keysz]; nky[0] = 0;
2002 if( n < 0 ) { // past here
2003 ustrcpy(&nky[0],&lastInsKey[0]);
2006 else { // before here
2007 n = ustrcmp(bp+st->dataSz+1,ky);
2008 if( !n ) Fail(errDuplicate);
2009 if( n > 0 ) return 0; // before first
2010 unsigned char rb; rp += st->dataSz; // move past curr data
2011 for( int i=*rp++; (rb=*rp++) == lastInsKey[i] && rb != 0; ++i );
2012 if( rb ) return 0; // must match curr
2014 unsigned char lky[keysz]; lky[0] = 0;
2015 unsigned char *kp; n = 0;
2016 while( (kp=lp) < rp ) {
2017 lp += st->dataSz; // scan next
2018 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2019 n = ustrcmp(ky,&nky[0]);
2020 if( !n ) Fail(errDuplicate);
2021 if( n < 0 ) break; // btwn last,next
2022 ustrcpy(&lky[0],&nky[0]);
2024 if( lp >= tp && s != st->rightHandSide ) return 0;// not found, not rhs
2025 // recompress and compute storage demand
2026 int llen = kp - (unsigned char *)sn; // left
2027 int lsz = kpress(ky, &lky[0], &lky[0]); // lky = kpress new key
2028 int mlen = st->dataSz + lsz;
2029 int klen = mlen; // new key demand
2031 if( kp < rp ) { // if next key exists
2032 nsz = kpress(&nky[0], ky, &nky[0]);
2033 mlen += st->dataSz + nsz; // new+next key demand
2035 int rlen = tp - lp; // right
2036 int blen = llen + mlen + rlen; // left + demand + right
2038 if( blen > spp->iallocated() ) return 0; // if insertion wont fit
2040 spg->set_flags(fl_wr);
2041 if( kp < rp ) { // insert not at end of block
2042 memmove(kp+mlen, lp, rlen); // move right up
2043 memmove(kp+klen, kp, st->dataSz); // move next data up
2044 memmove(kp+klen+st->dataSz,&nky[0],nsz); // kpress next key
2046 k = kp - (unsigned char *)sn;
2047 lastAccess.id = lastInsert.id;
2048 lastAccess.offset = sizeof(keyBlock) + k;
2049 ustrcpy(&lastAccKey[0],ky);
2050 umemmove(kp,(unsigned char *)data,st->dataSz); // insert new key
2051 umemmove(kp,&lky[0],lsz);
2057 * insert - add node to compressed b-tree.
2058 * entry - tky - data/key to add
2059 * s - root of tree/subtree
2060 * t - NIL/discontinue or tky->left_link
2062 int Db::IndexString::
2063 keyInsert(pageId &t, pageId s)
2065 unsigned char lky[keysz]; // last key
2066 unsigned char nky[keysz]; // next key
2067 keyBlock *sbb; Page *spp; char *sn;
2068 if_err( db->indexRead(s,1,sbb,spp,sn) );
2069 t = NIL; // set discontinue insertion
2071 pageId r = sbb->right_link();
2073 int n = spp->iused();
2074 unsigned char *lp = (unsigned char *)sn; // rest of block
2075 unsigned char *rp = lp + n; // end of block
2076 unsigned char *kp = 0; // insertion point
2077 unsigned char *tkp = &tky[st->dataSz]; // tky = data/key
2079 while( (kp=lp) < rp ) { // search
2080 if( r >= 0 ) l = getPageId(lp);
2082 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2083 n = ustrcmp(&tkp[0], &nky[0]);
2085 if( n == 0 ) Fail(errDuplicate);
2088 // if non-terminal block, continue search at lower levels
2089 // if lower level splits( t>=0 ), process insertion
2091 if_ret( keyInsert(t, kp>=rp ? r : l) );
2092 if( t < 0 ) return 0; // discontinue
2095 // recompress and compute storage demand
2096 int llen = kp - (unsigned char *)sn; // left
2097 int lsz = kpress(tkp, &lky[0], &lky[0]); // lky = kpress new key
2098 int dsz = st->dataSz;
2099 if( r >= 0 ) dsz += sizeof(pageId); // link+data size
2100 int mlen = dsz + lsz;
2101 int klen = mlen; // new key demand
2103 if( kp < rp ) { // if next key exists
2104 nsz = kpress(&nky[0], &tkp[0], &nky[0]);
2105 mlen += dsz + nsz; // new+next key demand
2107 int rlen = rp - lp; // right
2108 int blen = llen + mlen + rlen; // left + demand + right
2110 if( blen <= spp->iallocated() ) { // if insertion will fit
2111 if( kp < rp ) { // insert not at end of block
2112 memmove(kp+mlen, lp, rlen); // move right up
2113 memmove(kp+klen, kp, dsz); // move next link/data up
2114 memmove(kp+klen+dsz,&nky[0],nsz); // kpress next key
2116 if( r >= 0 ) putPageId(kp, t);
2117 if( lastAccess.id < 0 ) { // update lastAccess
2119 int k = kp - (unsigned char *)sn;
2120 lastAccess.offset = sizeof(keyBlock) + k;
2121 ustrcpy(&lastAccKey[0],&tkp[0]);
2123 umemmove(kp,&tky[0],st->dataSz); // insert new key
2124 memmove(kp,&lky[0],lsz);
2129 unsigned char *bp = &tbfr[0]; // overflowed, insert to tbfr
2130 umemmove(bp,(unsigned char *)sn,llen);
2131 if( r >= 0 ) putPageId(bp, t);
2132 umemmove(bp,&tky[0],st->dataSz); // insert new key
2133 umemmove(bp,&lky[0],lsz);
2134 if( kp < rp ) { // insert not at end of block
2135 umemmove(bp,kp,dsz);
2136 umemmove(bp,&nky[0],nsz); // kpress next key
2137 umemmove(bp,lp,rlen); // copy rest of block
2140 if_err( split(blen,llen,s,t,r) ); // split block, and continue
2145 int Db::IndexString::
2146 Insert(void *key,void *data)
2148 if_err( MakeRoot() );
2150 if( CHK cInsCount > 2 ) { // try the easy way
2151 ret = chkInsert(key,data);
2154 if( !ret ) { // try the hard way
2155 unsigned char *tp = &tky[0];
2156 umemmove(tp,(unsigned char *)data,st->dataSz);
2157 ustrcpy(tp,(unsigned char*)key);
2158 pageId t = NIL; lastAccess.id = NIL;
2159 if_ret( keyInsert(t, st->rootPageId) );
2161 ustrcpy(&lastInsKey[0],&lastAccKey[0]);
2168 int Db::IndexString::
2172 if_err( db->indexRead(s,0,sbb) );
2173 char *sn = (char *)(sbb+1);
2175 while( sbb->right_link() >= 0 ) {
2177 if_err( db->indexRead(s,0,sbb) );
2178 sn = (char *)(sbb+1);
2182 lastAccess.offset = sizeof(keyBlock);
2183 unsigned char *kp = (unsigned char *)sn;
2184 ustrcpy(&lastAccKey[0],kp+st->dataSz+1);
2188 int Db::IndexString::
2189 First(void *rtnKey,void *rtnData)
2191 if( st->rootPageId == NIL ) Fail(errNotFound);
2192 if_ret( keyFirst(st->rootPageId) );
2194 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2197 if_err( db->addrRead_(lastAccess,kp) );
2198 memmove(rtnData,kp,st->dataSz);
2200 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2201 lastNext = lastAccess;
2205 int Db::IndexString::
2209 if_err( db->indexRead(s,0,sbb) );
2212 while( (u=sbb->right_link()) >= 0 ) {
2213 if_err( db->indexRead(s=u,0,sbb) );
2216 unsigned char lky[keysz];
2217 Page *spp = db->get_page(s);
2218 char *sn = (char *)(sbb+1);
2219 unsigned char *lp = (unsigned char *)sn;
2220 unsigned char *rp = lp + spp->iused();
2221 unsigned char *kp = 0;
2225 kp = lp; lp += st->dataSz;
2226 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2229 if( !kp ) Fail(errNotFound);
2230 int k = kp - (unsigned char *)sn;
2232 lastAccess.offset = sizeof(keyBlock) + k;
2233 ustrcpy(&lastAccKey[0],&lky[0]);
2237 int Db::IndexString::
2238 Last(void *rtnKey,void *rtnData)
2240 if( st->rootPageId == NIL ) Fail(errNotFound);
2241 if_ret( keyLast(st->rootPageId) );
2243 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2246 if_err( db->addrRead_(lastAccess,kp) );
2247 memmove(rtnData,kp,st->dataSz);
2249 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2250 lastNext = lastAccess;
2254 int Db::IndexString::
2255 chkFind(char *key, pgRef *last, unsigned char *lkey, unsigned char *lkp)
2257 pageId s = last->id;
2258 if( s < 0 ) return 0; // must be valid block
2259 keyBlock *sbb; Page *spp; char *sn;
2260 if_err( db->indexRead(s,0,sbb,spp,sn) );
2261 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2262 int slen = spp->iused();
2263 int k = last->offset - sizeof(keyBlock);
2264 if( k < 0 || k > slen ) return 0; // must be inside/end of block
2265 unsigned char *ky = (unsigned char *)key;
2266 unsigned char *bp = (unsigned char *)sn;
2267 unsigned char *lp = bp;
2268 unsigned char *rp = bp + k; // current or end
2269 unsigned char *tp, *kp = 0;
2272 unsigned char rb; tp = rp; rp += st->dataSz; // move past curr data
2273 for( int i=*rp++; (rb=*rp++) == lkey[i] && rb != 0; ++i );
2274 if( rb ) return 0; // must match curr
2275 n = ustrcmp(ky,&lkey[0]);
2276 if( !n && !lkp ) { kp = tp; goto xit; } // found here, and no last key
2278 unsigned char lky[keysz];
2279 if( !lkp ) lkp = &lky[0]; // need lky buffer
2280 if( n > 0 ) { // past here, use next to end
2281 lp = rp; rp = bp + slen;
2282 ustrcpy(&lkp[0],&lkey[0]);
2286 tp = lp+st->dataSz+1;
2287 n = ustrcmp(tp,ky); // try first
2288 if( n > 0 ) return 0; // before first
2290 ustrcpy(&lkp[0],tp);
2292 kp = lp; // found here
2294 unsigned char nky[keysz];
2295 ustrcpy(&nky[0],&lkp[0]);
2296 while( !kp && lp < rp ) {
2297 tp = lp; lp += st->dataSz;
2298 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2299 n = ustrcmp(ky,&nky[0]);
2300 if( n < 0 ) Fail(errNotFound); // btwn prev,next
2302 ustrcpy(lkp,&nky[0]);
2304 kp = tp; // found here
2306 if( !kp ) return 0; // not in block
2310 lastAccess.offset = sizeof(keyBlock) + k;
2311 ustrcpy(&lastAccKey[0],ky);
2315 int Db::IndexString::
2318 unsigned char nky[keysz];
2319 unsigned char *ky = &key[0];
2321 for( pageId s=st->rootPageId; s>=0; ) {
2322 keyBlock *sbb; Page *spp; char *sn;
2323 if_err( db->indexRead(s,0,sbb,spp,sn) );
2324 unsigned char *lp = (unsigned char *)sn;
2325 unsigned char *rp = lp + spp->iused();
2327 pageId r = sbb->right_link();
2329 if( r >= 0 ) l = getPageId(lp);
2330 unsigned char *kp = lp;
2332 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2333 int n = ustrcmp(ky, &nky[0]);
2336 int k = kp - (unsigned char *)sn;
2337 lastAccess.offset = sizeof(keyBlock) + k;
2338 ustrcpy(&lastAccKey[0],ky);
2341 if( n < 0 ) { r = l; break; }
2348 int Db::IndexString::
2349 Find(void *key,void *rtnData)
2351 if( st->rootPageId == NIL ) Fail(errNotFound);
2353 if( CHK cFindCount > 2 ) { // try the easy way
2354 ret = chkFind((char *)key,&lastFind,&lastFndKey[0]);
2357 if( !ret ) { // try the hard way
2358 lastAccess.id = NIL;
2359 ustrcpy(this->key,(unsigned char *)key);
2360 if_fail( keyFind() );
2364 if_err( db->addrRead_(lastAccess,kp) );
2365 memmove(rtnData,kp,st->dataSz);
2367 ustrcpy(&lastFndKey[0],&lastAccKey[0]);
2368 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2375 * remap sectors on underflow
2376 * s - parent sector, t - pageId if overflowed
2377 * k - parent key offset
2378 * updates tky with parent data/key
2381 int Db::IndexString::
2382 keyUnderflow(pageId s, pageId &t, int k)
2384 unsigned char lky[keysz], nky[keysz];
2385 keyBlock *sbb; Page *spp; char *sn;
2386 if_err( db->indexRead(s,1,sbb,spp,sn) );
2387 unsigned char *lp = (unsigned char *)sn;
2388 unsigned char *rp = lp + spp->iused();
2389 unsigned char *kp = lp + k;
2390 unsigned char *mp = &tbfr[0];
2391 // mark continue underlow
2393 pageId r = sbb->right_link();
2394 lky[0] = nky[0] = 0;
2395 // if underflow, copy to parent key offset
2397 putPageId(mp,getPageId(lp));
2398 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2399 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2401 // read l, key, r -- remove key from parent block
2402 unsigned char *tp = &tky[0];
2403 pageId l = getPageId(lp);
2404 umemmove(tp,lp,st->dataSz); lp += st->dataSz;
2405 ustrcpy(tp,&lky[0]);
2406 for( int i=*lp++; (tp[i]=*lp++) != 0; ++i );
2408 putPageId(mp, r=getPageId(lp));
2409 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2410 ustrcpy(&nky[0],tp);
2411 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2412 mp += kpress(&nky[0],&lky[0],mp);
2413 umemmove(mp,lp,rp-lp);
2415 int n = mp-&tbfr[0];
2416 memmove(sn,&tbfr[0],n);
2419 keyBlock *lbb; Page *lpp; char *ln;
2420 if_err( db->indexRead(l,1,lbb,lpp,ln) );
2421 lp = (unsigned char *)ln;
2422 rp = lp + lpp->iused();
2423 pageId m = lbb->right_link();
2424 mp = &tbfr[0]; nky[0] = 0;
2426 if( m >= 0 ) putPageId(mp,getPageId(lp));
2427 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2428 for( int i=*mp++=*lp++; (nky[i]=*mp++=*lp++) != 0; ++i );
2430 // parent key to tbfr
2431 if( m >= 0 ) putPageId(mp,m);
2432 umemmove(mp,&tky[0],st->dataSz);
2433 mp += kpress(tp,&nky[0],mp);
2435 keyBlock *rbb; Page *rpp; char *rn;
2436 if_err( db->indexRead(r,1,rbb,rpp,rn) );
2437 lp = (unsigned char *)rn;
2438 rp = lp + rpp->iused();
2439 m = rbb->right_link();
2441 if( m >= 0 ) putPageId(mp,getPageId(lp));
2442 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2443 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2444 mp += kpress(&nky[0],tp,mp);
2445 umemmove(mp,lp,rp-lp);
2448 if( n > rpp->iallocated() ) {
2449 // tbfr too big for r
2450 if_err( split(n, -1, r, l, m) );
2451 t = l; // start overflow
2454 if( s == st->rootPageId && !spp->iused() ) {
2455 // if root, delete r
2456 memmove(sn,&tbfr[0],n);
2459 if_err( blockFree(r) );
2460 st->rightHandSide = s;
2463 // update r with tbfr
2464 memmove(rn,&tbfr[0],n);
2468 if_err( blockFree(l) );
2474 * remap sectors on overflow
2475 * s - parent sector, k - parent key offset, o - insertion offset
2476 * t parent link on input,
2477 * t == DDONE on output, end overflow
2478 * tky with parent data/key
2480 int Db::IndexString::
2481 keyOverflow(pageId s, pageId &t, int k, int o)
2483 unsigned char lky[keysz], nky[keysz];
2484 keyBlock *sbb; Page *spp; char *sn;
2485 if_err( db->indexRead(s,1,sbb,spp,sn) );
2486 unsigned char *lp = (unsigned char *)sn;
2487 unsigned char *rp = lp + spp->iused();
2488 unsigned char *kp = lp + k;
2489 unsigned char *mp = &tbfr[0];
2490 pageId r = sbb->right_link();
2491 lky[0] = nky[0] = 0;
2492 // copy parent up to parent key to tbfr
2494 putPageId(mp,getPageId(lp));
2495 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2496 for( int i=*mp++=*lp++; (lky[i]=*mp++=*lp++) != 0; ++i );
2498 // if at insertion point, add new key
2501 unsigned char *tp = &tky[0];
2502 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2503 mp += kpress(kp=tp, &lky[0], mp);
2507 // copy parent key, if insertion - add tky, copy rest
2509 ustrcpy(&nky[0],kp);
2510 putPageId(mp,getPageId(lp));
2511 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2512 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2513 mp += kpress(&lky[0],&nky[0],mp);
2516 unsigned char *tp = &tky[0];
2517 umemmove(mp,tp,st->dataSz); tp += st->dataSz;
2518 mp += kpress(tp, &lky[0], mp);
2519 ustrcpy(&lky[0],tp);
2522 putPageId(mp,getPageId(lp));
2523 umemmove(mp,lp,st->dataSz); lp += st->dataSz;
2524 ustrcpy(&nky[0],&lky[0]);
2525 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2526 mp += kpress(&lky[0],&nky[0],mp);
2528 umemmove(mp,lp,rp-lp);
2530 int n = mp - &tbfr[0];
2531 // if overflow, split sector and promote new parent key
2532 if( n > spp->iallocated() ) {
2533 t = NIL; lastAccess.id = NIL;
2534 if_ret( split(n, -1, s, t, r) );
2540 memmove(sn,&tbfr[0],n);
2545 int Db::IndexString::
2546 keyRemap(pageId s, pageId &t, int k, int o)
2549 if_err( keyUnderflow(s,t,k) );
2553 if_err( keyOverflow(s,t,k,o) );
2558 * delete or replace key
2559 * s - starting sector, nky - replacement key if != 0
2560 * t - returned state -2/done,-1/underflow,pageid/overflow
2563 int Db::IndexString::
2564 keyDelete(pageId s, pageId &t)
2566 unsigned char lky[keysz], nky[keysz];
2567 unsigned char *ky = &key[0];
2569 keyBlock *sbb; Page *spp; char *sn;
2570 if_err( db->indexRead(s,1,sbb,spp,sn) );
2571 unsigned char *bp = (unsigned char *)sn;
2572 unsigned char *lp = bp;
2573 unsigned char *rp = lp + spp->iused();
2574 unsigned char *kp = lp;
2575 unsigned char *mp = 0;
2578 pageId r = sbb->right_link();
2579 lky[0] = nky[0] = 0;
2580 // mark delete done and search
2582 while( (kp=lp) < rp ) {
2584 if( r >= 0 ) l = getPageId(lp);
2586 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2587 if( idf <= 0 && (n=ustrcmp(ky,&nky[0])) <= 0)
2589 ustrcpy(&lky[0],&nky[0]);
2593 if( !idf ) Fail(errNotFound);
2594 // found greatest node less than ky, delete and return with underflow
2595 // deleted key must be on rt side of block to be next to interior key
2596 unsigned char *dp = &dky[0];
2597 umemmove(dp,mp,st->dataSz);
2598 ustrcpy(dp,&nky[0]);
2600 t = NIL; // start underflow
2603 // not found in block, try next level
2604 int status = keyDelete(kp<rp ? l : r,t);
2607 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2610 else if( r < 0 || idf < 0 ) { // found here
2613 int dsz = st->dataSz;
2614 if( r >= 0 ) dsz += sizeof(pageId);
2615 unsigned char *tp = &lky[0];
2617 if( idf < 0 ) { // translating data/key
2618 lsz = kpress(&dky[st->dataSz],tp,tp); // kpress dky to lky
2620 tp = &dky[st->dataSz];
2623 unsigned char *np = lp;
2626 lp += dsz; // get next key
2627 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2628 if( r < 0 ) ustrcpy(&lastAccKey[0],&nky[0]);// new curr key
2629 nsz = kpress(&nky[0],tp,&nky[0]);
2633 int slen = llen + mlen + rlen;
2634 unsigned char *sp = &tbfr[0];
2636 if( (llen > 0 || rlen > 0) && // at least 3 data/keys
2637 slen <= spp->iallocated() ) { // and fits in block
2638 sp = bp; mp = kp; // edit in place
2641 umemmove(mp,bp,llen); // use tbfr, copy left
2642 if( np < rp ) { // next exists
2644 unsigned char *ip = mp;
2645 if( idf < 0 ) ip += dsz + lsz;
2646 if( sp == bp && tp > lp ) {
2647 memmove(tp,lp,rlen); // up copy/move right
2648 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2649 memmove(ip,&nky[0],nsz);
2652 memmove(ip,np,dsz); ip += dsz; // next link/data/key
2653 memmove(ip,&nky[0],nsz);
2655 memmove(tp,lp,rlen); // down copy/move right
2658 if( idf < 0 ) { // move exterior key
2659 if(r >= 0) putPageId(mp,getPageId(kp));
2660 umemmove(mp,&dky[0],st->dataSz); // add dky data/key
2661 umemmove(mp,&lky[0],lsz);
2663 if( sp == bp ) { // in place
2667 lastAccess.id = s; // position to curr
2668 lastAccess.offset = sizeof(keyBlock) + llen;
2669 ustrcpy(&lastAccKey[0],ky);
2674 if( slen > spp->iallocated() ) {
2675 if_ret( split(slen, -1, s, t, r) ); // continue with overflow
2678 memmove(sn,&tbfr[0],slen); // continue with underflow
2684 // deleting nonterminal key, translate to greatest node less than ky
2686 int status = keyDelete(l,t);
2689 if_ret( keyRemap(s,t,mp-bp,kp-bp) );
2695 int Db::IndexString::
2698 if( st->rootPageId == NIL ) Fail(errNotFound);
2699 unsigned char lky[keysz]; lky[0] = 0;
2700 pgRef *last = &lastDelete;
2701 unsigned char *lkey = &lastDelKey[0];
2703 if( CHK cDelCount > 2 ) { // try the easy way
2704 if( lastFind.id >= 0 ) { // chk find/delete
2705 if( !ustrcmp((unsigned char *)key,&lastFndKey[0]) ) {
2706 last = &lastFind; lkey = &lastFndKey[0];
2709 ret = chkFind((char *)key,last,lkey,&lky[0]);
2713 pageId s = lastAccess.id;
2714 keyBlock *sbb; Page *spp; char *sn;
2715 if_err( db->indexRead(s,1,sbb,spp,sn) );
2716 unsigned char *bp = (unsigned char *)sn;
2717 int slen = spp->iused();
2718 int llen = lastAccess.offset - sizeof(keyBlock);
2719 unsigned char *kp = bp + llen; // curr data/key
2720 unsigned char *lp = kp;
2721 unsigned char *rp = bp + slen;
2723 lp += st->dataSz; ++lp; while( *lp++ ); // skip curr
2727 unsigned char *np = lp;
2728 unsigned char nky[keysz]; nky[0] = 0;
2730 lp += st->dataSz; // get next key
2731 ustrcpy(&nky[0],(unsigned char *)key);
2732 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2733 nsz = kpress(&nky[0],&lky[0],&lky[0]);
2734 mlen += st->dataSz + nsz;
2737 slen = llen + mlen + rlen;
2738 if( (llen > 0 || rlen > 0 ) && // at least 3 data/keys
2739 slen <= spp->iallocated() ) { // and fits in block
2740 if( np < rp ) { // right exists
2741 unsigned char *tp = kp + mlen;
2742 umemmove(kp,np,st->dataSz); // next data/key
2743 memmove(kp,&lky[0],nsz);
2744 if( tp != lp && rlen > 0 )
2745 memmove(tp,lp,rlen); // move right down
2748 ustrcpy(&lastAccKey[0],&nky[0]); // new curr key
2753 if( !ret ) { // try the hard way
2754 ustrcpy(&this->key[0],(unsigned char*)key);
2755 lastAccess.id = NIL;
2756 pageId t = NIL; idf = 0;
2757 if_ret( keyDelete(st->rootPageId,t) );
2760 if_ret( keyDelete(st->rootPageId,t) );
2762 if_err( UnmakeRoot() );
2764 ustrcpy(&lastDelKey[0],&lastAccKey[0]);
2770 int Db::IndexString::
2771 keyLocate(pageId s, int &t, CmprFn cmpr)
2773 unsigned char lky[keysz], nky[keysz];
2774 unsigned char *ky = &key[0];
2775 keyBlock *sbb; Page *spp; char *sn;
2776 if_err( db->indexRead(s,0,sbb,spp,sn) );
2778 pageId r = sbb->right_link();
2779 unsigned char *lp = (unsigned char *)sn;
2780 unsigned char *rp = lp + spp->iused();
2781 unsigned char *kp = 0, *mp = 0;
2782 lky[0] = nky[0] = 0;
2785 while( (kp=lp) < rp ) {
2786 if( r >= 0 ) l = getPageId(lp);
2787 kp = lp; lp += st->dataSz;
2788 for( int i=*lp++; (nky[i]=*lp++) != 0; ++i );
2789 int n = cmpr((char *)ky,(char *)&nky[0]);
2790 if( relationship <= keyEQ ? n <= 0 : n < 0 ) break;
2791 ustrcpy(&lky[0],&nky[0]);
2796 int status = keyLocate(kp<rp ? l : r,t,cmpr);
2797 if( !t && status == errNotFound ) status = 0;
2802 if( relationship == keyLT || relationship == keyGE ) {
2803 if( !mp ) Fail(errNotFound);
2805 ustrcpy(&lastAccKey[0],&lky[0]);
2808 if( kp >= rp ) Fail(errNotFound);
2809 ustrcpy(&lastAccKey[0],&nky[0]);
2812 int k = kp - (unsigned char *)sn;
2813 lastAccess.offset = sizeof(keyBlock) + k;
2819 int Db::IndexString::
2820 Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData)
2822 if( st->rootPageId == NIL ) Fail(errNotFound);
2823 ustrcpy(this->key,(unsigned char *)key);
2824 if( op == keyEQ ) op = keyLE;
2825 relationship = op; int t = 0;
2826 if_fail( keyLocate(st->rootPageId,t,!cmpr ? cmprStr : cmpr) );
2828 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2831 if_err( db->addrRead_(lastAccess,kp) );
2832 memmove(rtnData,kp,st->dataSz);
2834 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2835 lastNext = lastAccess;
2840 int Db::IndexString::
2841 Modify(void *key,void *recd)
2843 if_fail( Find(key,0) );
2845 if_err( db->addrWrite_(lastAccess,bp) );
2846 memmove(bp,recd,st->dataSz);
2850 int Db::IndexString::
2853 if( &loc != &lastNext ) return 0; // must be default loc
2855 if( s < 0 ) return 0; // must be valid
2856 keyBlock *sbb; Page *spp; char *sn;
2857 if_err( db->indexRead(s,0,sbb,spp,sn) );
2858 if( sbb->right_link() >= 0 ) return 0; // must be leaf
2859 int slen = spp->iused();
2860 int k = loc.offset - sizeof(keyBlock);
2861 if( k < 0 || k >= slen ) return 0; // must be inside block
2862 unsigned char *bp = (unsigned char *)sn;
2863 unsigned char *lp = bp + k;
2864 unsigned char *rp = bp + slen;
2865 unsigned char lb; lp += st->dataSz; // move past curr data
2866 for( int i=*lp++; (lb=*lp++) == lastNxtKey[i] && lb != 0; ++i );
2867 if( lb ) return 0; // must match curr
2868 if( lp >= rp ) return 0; // next must exist
2869 unsigned char *kp = lp;
2870 lp += st->dataSz; // scan next
2871 ustrcpy(&lastAccKey[0],&lastNxtKey[0]);
2872 for( int i=*lp++; (lastAccKey[i]=*lp++) != 0; ++i );
2874 k = kp - (unsigned char *)sn;
2875 lastAccess.offset = sizeof(keyBlock) + k;
2879 int Db::IndexString::
2882 unsigned char lky[keysz];
2883 if( &loc != &lastNext ) { // find last key
2885 keyBlock *sbb; Page *spp; char *sn;
2886 if_err( db->indexRead(s,0,sbb,spp,sn) );
2887 unsigned char *bp = (unsigned char *)sn;
2888 int k = loc.offset - sizeof(keyBlock);
2889 unsigned char *lp = bp;
2890 unsigned char *kp = bp + k;
2891 unsigned char *rp = bp + spp->iused();
2892 if( kp >= rp ) Err(errInvalid);
2893 pageId r = sbb->right_link();
2895 while( lp <= kp ) { // until past last
2896 if( r >= 0 ) lp += sizeof(pageId);
2897 bp = lp; lp += st->dataSz;
2898 for( int i=*lp++; (lky[i]=*lp++) != 0; ++i );
2900 ustrcpy(&key[0],&lky[0]);
2903 ustrcpy(&key[0],&lastNxtKey[0]);
2904 relationship = keyGT; int t = 0;
2905 if_fail( keyLocate(st->rootPageId,t,cmprStr) );
2909 int Db::IndexString::
2910 Next(pgRef &loc,void *rtnKey,void *rtnData)
2912 if( st->rootPageId == NIL ) Fail(errNotFound);
2913 int ret = CHK chkNext(loc); // try the easy way
2916 if_ret( keyNext(loc) ); // try the hard way
2918 ustrcpy((unsigned char *)rtnKey,&lastAccKey[0]);
2921 if_err( db->addrRead_(lastAccess,kp) );
2922 memmove(rtnData,kp,st->dataSz);
2924 if( &loc == &lastNext )
2925 ustrcpy(&lastNxtKey[0],&lastAccKey[0]);
2930 void Db::IndexString::
2933 relationship = keyEQ;
2937 IndexString(Db *zdb, int zidx, int dsz)
2938 : IndexBase(zdb, idxStr, zidx, 0, dsz)
2940 sst = new(st+1) IndexStringStorage();
2941 dky = new unsigned char[st->dataSz+keysz+1];
2942 tky = new unsigned char[st->dataSz+keysz+1];
2943 tbfr = new unsigned char[3*st->blockSize];
2948 IndexString(Db *zdb, IndexBaseStorage *b, IndexStringStorage *d)
2949 : IndexBase(zdb, *b)
2952 dky = new unsigned char[st->dataSz+keysz+1];
2953 tky = new unsigned char[st->dataSz+keysz+1];
2954 tbfr = new unsigned char[3*st->blockSize];
2959 IndexString(IndexBase *ib, IndexBaseStorage *b, IndexStringStorage *d)
2960 : IndexBase(ib->db, *b)
2962 sst = new(d) IndexStringStorage();
2977 * Db::IndexBase::Clear - clear index
2981 * returns 0 on success,
2982 * error code otherwise
2988 while( st->freeBlocks >= 0 ) {
2989 keyBlock *kbb; pgRef loc;
2990 pageId id = st->freeBlocks;
2991 loc.id = id; loc.offset = 0;
2992 if_err( db->addrWrite_(loc,kbb) );
2993 st->freeBlocks = kbb->right_link();
2996 if( st->rootPageId >= 0 ) {
2997 if_err( keyMap(st->rootPageId, &Db::IndexBase::blockRelease) );
2998 st->rootPageId = st->rightHandSide = NIL;
3007 if( st->rootPageId == NIL ) {
3008 pageId u; keyBlock *ubb; Page *upp; char *un;
3009 if_err( blockAllocate(u,ubb,upp,un) );
3011 ubb->right_link(NIL);
3012 st->rootPageId = st->rightHandSide = u;
3020 pageId u = st->rootPageId;
3021 Page *upp = db->get_page(u);
3022 if( !upp->iused() ) {
3023 if_err( blockFree(u) );
3024 st->rootPageId = st->rightHandSide = NIL;
3029 void Db::IndexBase::
3032 if( lastAccess.id >= 0 && lastAccess.id == lastInsert.id )
3036 lastInsert = lastAccess;
3037 cDelCount = 0; lastDelete.id = NIL;
3038 cFindCount = 0; lastFind.id = NIL;
3039 lastOp = opInsert; lastNext.id = NIL;
3042 void Db::IndexBase::
3045 if( lastAccess.id >= 0 && lastAccess.id == lastDelete.id )
3049 lastDelete = lastAccess;
3050 cInsCount = 0; lastInsert.id = NIL;
3051 cFindCount = 0; lastFind.id = NIL;
3052 lastOp = opDelete; lastNext.id = NIL;
3055 void Db::IndexBase::
3058 if( lastAccess.id >= 0 && lastAccess.id == lastFind.id )
3062 lastFind = lastAccess;
3063 lastNext = lastAccess;
3068 IndexBaseType(int typ)
3071 memset(&name[0],0,sizeof(name));
3076 defaultBlockSizes[] = {
3078 defaultBinaryBlockSize, // idxBin
3079 defaultStringBlockSize, // idxStr
3083 IndexBaseRecd(int typ, int zidx, int ksz, int dsz)
3089 rightHandSide = NIL;
3091 count = 0; pad1 = 0;
3092 blockSize = defaultBlockSizes[typ];
3095 Db::IndexBaseStorage::
3096 IndexBaseStorage(int typ, int zidx, int ksz, int dsz)
3098 new((IndexTypeInfo *)this) IndexBaseType(typ);
3099 new((IndexRecdInfo *)this) IndexBaseRecd(typ, zidx, ksz, dsz);
3102 void Db::IndexBase::
3106 kdSz = st->keySz + st->dataSz;
3107 lastAccess.id = lastDelete.id = lastInsert.id =
3108 lastFind.id = lastNext.id = NIL;
3109 lastAccess.offset = lastDelete.offset = lastInsert.offset =
3110 lastFind.offset = lastNext.offset = 0;
3111 cFindCount = cDelCount = cInsCount = 0;
3115 IndexBase(Db *zdb, IndexBaseStorage &d)
3122 IndexBase(Db *db, int typ, int zidx, int ksz, int dsz)
3124 st = new(&db->index_info[zidx]) IndexBaseStorage(typ, zidx, ksz, dsz);
3135 /*** int Db::objectHeapInsert(int sz,int pg,int off)
3137 * insert a free space record into the entity heap
3140 * sz int record size
3142 * offset int record offset
3144 * returns zero on success, error code on failure
3148 objectHeapInsert(int sz,int id,int offset)
3150 freeSpaceRecord free;
3151 free.size = sz; free.id = id; free.offset = offset;
3152 if_err( freeSpaceIndex->Insert(&free,0) );
3153 addrSpaceRecord addr;
3154 addr.id = id; addr.offset = offset; addr.size = sz;
3155 if_err( addrSpaceIndex->Insert(&addr,0) );
3159 /*** int Db::objectHeapDelete(int sz,int pg,int off)
3161 * delete a free space record from the entity heap
3164 * sz int record size
3166 * offset int record offset
3168 * returns zero on success, error code on failure
3172 objectHeapDelete(int sz,int id,int offset)
3174 freeSpaceRecord free;
3175 free.size = sz; free.id = id; free.offset = offset;
3176 if_err( freeSpaceIndex->Delete(&free) );
3177 addrSpaceRecord addr;
3178 addr.id = id; addr.offset = offset; addr.size = sz;
3179 if_err( addrSpaceIndex->Delete(&addr) );
3183 /*** int Db::pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3185 * find existing persistent free storage.
3188 * size int input/output requested/allocated size
3189 * loc pgRef & output locator returned for new storage
3190 * cache AllocCache& output allocator cache to operate
3192 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3196 pgRefGet(int &size, pgRef &loc, AllocCache &cache)
3198 freeSpaceRecord look, find;
3199 look.size = size; look.id = 0; look.offset = 0;
3200 int status = freeSpaceIndex->Locate(keyGE, &look, &find, 0);
3201 if( status == errNotFound ) return 1;
3203 // if record is at offset 0, need extra space for prefix
3204 while( !find.offset && look.size+(int)sizeof(pagePrefix) > find.size ) {
3205 status = freeSpaceIndex->Next(&find, 0);
3206 if( status == errNotFound ) return 1;
3209 if_err( objectHeapDelete(find.size,find.id,find.offset) );
3211 loc.offset = find.offset ? find.offset : sizeof(pagePrefix);
3212 Page &pg = *get_page(loc.id);
3213 unsigned int ofs = loc.offset + look.size;
3214 if( ofs > pg->used ) pg->used = ofs;
3215 int sz = find.offset+find.size - ofs;
3216 if( sz >= min_heap_allocation ) {
3217 //if_err( objectHeapInsert(sz,find.id,ofs) );
3218 if_err( cache.Load(this, find.id, ofs, sz) );
3221 size = look.size + sz;
3225 /*** int Db::pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3227 * allocate new persistent free storage.
3230 * size int input/output requested/allocated size
3231 * loc pgRef & output locator returned for new storage
3232 * cache AllocCache& output allocator cache to operate
3234 * returns: zero on success, error code.
3238 pgRefNew(int &size, pgRef &loc, AllocCache &cache)
3240 pageId id = new_page();
3242 unsigned int sz = size + sizeof(pagePrefix);
3243 sz = entityPages(sz) * entityPageSize;
3244 Page &pg = *get_page(id);
3245 if( pg->allocated != sz ) {
3251 loc.offset = sizeof(pagePrefix);
3252 int used = loc.offset + size;
3253 int free = pg->allocated - used;
3254 if( free >= min_heap_allocation ) {
3255 //if_err( objectHeapInsert(free,id,used) );
3256 if_err( cache.Load(this, id, used, free) );
3259 size = pg->allocated - loc.offset;
3263 /*** int Db::pgRefAllocate(int &size, pgRef &loc)
3265 * find persistent free storage. existing/new
3266 * does not allocate virtual memory storage
3269 * size int input/output requested/allocated size
3270 * loc pgRef & output locator returned for new storage
3271 * cache AllocCache& output allocator cache to operate
3273 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3277 pgRefAllocate(int &size, pgRef &loc, AllocCache &cache)
3279 int status = pgRefGet(size, loc, cache);
3282 if_err( pgRefNew(size, loc, cache) );
3286 /*** int Db::objectAllocate(int typ, int &size, pgRef &loc)
3288 * find persistent free storage. existing/new
3289 * does allocate virtual memory storage
3290 * inits pagePrefix, inits allocPrefix
3293 * type int input entity type id
3294 * size int input/output requested/allocated size
3295 * loc pgRef & output locator returned for new storage
3296 * cache AllocCache& output allocator cache to operate
3298 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3302 objectAllocate(int typ, int &size, pgRef &loc, AllocCache &cache)
3304 int status = cache.Get(this, size, loc);
3307 if_err( pgRefAllocate(size, loc, cache) );
3308 Page &pg = *get_page(loc.id);
3310 pagePrefix *bpp; pgRef ref;
3311 ref.id = loc.id; ref.offset = 0;
3312 if_err( addrWrite_(ref,bpp) );
3313 bpp->magic = page_magic;
3314 bpp->type = pg_entity + typ;
3315 pg->type = bpp->type;
3317 unsigned int sz = loc.offset + size;
3324 /*** int Db::objectFree(pgRef &loc)
3326 * Purpose: Immediate release of entity memory
3329 * loc pgRef & Page/offset
3331 * returns zero on success, error code on failure
3334 int Db::objectFree(pgRef &loc)
3337 if_err( addrRead_(loc,mp) );
3338 if( mp->magic != entity_magic ) Err(errBadMagic);
3339 addrSpaceRecord addr, prev, next;
3340 /* get prev, next addrSpace free heap items near this addr */
3341 addr.size = mp->size;
3343 addr.offset = loc.offset;
3344 int status = addrSpaceIndex->Locate(keyLT,&addr,&prev,0);
3345 if( status == errNotFound ) {
3347 status = addrSpaceIndex->Locate(keyGT,&addr,&next,0);
3350 status = addrSpaceIndex->Next(&next,0);
3351 if( status == errNotFound ) {
3356 /* merge with prev if possible */
3357 if( prev.id == addr.id &&
3358 prev.offset + prev.size == addr.offset ) {
3359 if_err( objectHeapDelete(prev.size,prev.id,prev.offset) );
3360 addr.offset = prev.offset;
3361 addr.size += prev.size;
3363 /* merge with next if possible */
3364 if( addr.id == next.id &&
3365 addr.offset + addr.size == next.offset ) {
3366 if_err( objectHeapDelete(next.size,next.id,next.offset) );
3367 addr.size += next.size;
3369 /* reduce used block bytes if possible */
3370 Page &pg = *get_page(loc.id);
3371 if( pg->used <= addr.offset + addr.size )
3372 pg->used = addr.offset;
3373 // if only page prefix, release page, otherwise save new fragment
3374 if( pg->used == sizeof(pagePrefix) )
3377 if_err( objectHeapInsert(addr.size,addr.id,addr.offset) );
3381 /*** int Db::blockAllocate(pageId *pid, keyBlock *&pkbb)
3383 * allocate persistent storage.
3386 * pageId & pid page allocated
3387 * keyBlock *& pkbb keyblock* pointer
3391 blockAllocate(pageId &pid, keyBlock *&pkbb)
3393 locked by(db->db_info->blkAlLk);
3394 pageId id; Page *kpp;
3395 pgRef loc; keyBlock *kbb;
3396 if( st->freeBlocks != NIL ) {
3398 id = st->freeBlocks;
3399 loc.id = id; loc.offset = 0;
3400 if_err( db->addrWrite_(loc,kbb) );
3401 st->freeBlocks = kbb->right_link();
3402 Page &kpg = *db->get_page(id);
3403 if( kpg->allocated != st->blockSize ) {
3404 db->pageDealloc(kpg);
3405 kpg->allocated = st->blockSize;
3406 if_err( db->addrWrite_(loc, kbb) );
3411 id = db->new_page();
3413 loc.id = id; loc.offset = 0;
3414 Page &kpg = *db->get_page(id);
3415 kpg->allocated = st->blockSize;
3416 if_err( db->addrWrite_(loc, kbb) );
3419 kbb->magic = page_magic;
3421 kbb->type = pg_index + st->idx;
3424 kpg->type = kbb->type;
3430 /*** int Db::blockFree(pageId pid)
3432 * Purpose: delayed release database memory
3435 * pid int input Page
3437 * returns zero on success, error code on failure
3441 blockFree(pageId pid)
3443 locked by(db->db_info->blkAlLk);
3445 pageId id = st->freeBlocks; pgRef loc;
3446 loc.id = NIL; loc.offset = 0;
3449 while( id >= 0 && id < pid ) {
3451 if_err( db->addrRead_(loc,kbb) );
3452 id = kbb->right_link();
3456 if_err( db->addrWrite_(loc,kbb) );
3457 kbb->right_link(pid);
3460 st->freeBlocks = pid;
3462 if_err( db->addrWrite_(loc,kbb) );
3463 kbb->right_link(id);
3464 Page *kpp = db->get_page(pid);
3470 blockRelease(pageId pid)
3472 Page *pp = db->get_page(pid);
3473 return pp->release();
3476 /*** int Db::deleteFreeBlock()
3478 * Purpose: release database memory/storage
3480 * returns zero on success, error code on failure
3487 while( (id=st->freeBlocks) != NIL ) {
3488 keyBlock *kbb; pgRef loc;
3489 loc.id = id; loc.offset = 0;
3490 if_err( db->addrRead_(loc,kbb) );
3491 st->freeBlocks = kbb->right_link();
3492 Page &pg = *db->get_Page(id);
3493 if_err( db->storeFree(pg->wr_allocated, pg->wr_io_addr) );
3494 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
3495 if_err( db->storeFree(pg->io_allocated, pg->io_addr) );
3496 pg->io_allocated = 0; pg->io_addr = NIL;
3502 /*** int Db::storeInsert(int sz, ioAddr io_addr)
3504 * insert a storage record into the storage heap
3508 * io_addr ioAddr blob addr
3510 * returns zero on success, error code on failure
3514 storeInsert(long sz, ioAddr io_addr)
3516 //dmsg(-1, "storeInsert %08lx/%012lx\n",sz,io_addr);
3517 freeStoreRecord free;
3518 free.size = sz; free.io_addr = io_addr;
3519 if_err( freeStoreIndex->Insert(&free,0) );
3521 addrStoreRecord addr;
3522 addr.io_addr = io_addr; addr.size = sz;
3523 if_err( addrStoreIndex->Insert(&addr,0) );
3528 /*** int Db::storeDelete(int sz, ioAddr io_addr)
3530 * delete a storage record from the storage heap
3534 * io_addr ioAddr blob addr
3536 * returns zero on success, error code on failure
3540 storeDelete(long sz, ioAddr io_addr)
3542 //dmsg(-1, "storeDelete %08lx/%012lx\n",sz,io_addr);
3543 freeStoreRecord free;
3544 free.size = sz; free.io_addr = io_addr;
3545 if_err( freeStoreIndex->Delete(&free) );
3547 addrStoreRecord addr;
3548 addr.io_addr = io_addr; addr.size = sz;
3549 if_err( addrStoreIndex->Delete(&addr) );
3554 /*** int Db::storeGet(int &size, ioAddr &io_addr)
3556 * allocate storage record from the storage heap
3558 * size int input/output requested/allocated blob size
3559 * io_addr ioAddr output blob addr
3561 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3565 storeGet(int &size, ioAddr &io_addr)
3567 freeStoreRecord look, find;
3568 look.size = size; look.io_addr = 0;
3569 int status = freeStoreIndex->Locate(keyGE, &look, &find, 0);
3570 if( status == errNotFound ) return 1;
3572 if_err( storeDelete(find.size,find.io_addr) );
3573 io_addr = find.io_addr;
3574 long sz = find.size - look.size;
3575 if( sz >= min_heap_allocation ) {
3576 /* put back extra */
3577 find.size = sz; find.io_addr += look.size;
3578 if_err( storeInsert(find.size,find.io_addr) );
3581 look.size = find.size;
3586 /*** int Db::storeNew(int &size, ioAddr &io_addr)
3588 * allocate storage by extending file
3590 * size int input/output requested/allocated blob size
3591 * io_addr ioAddr output blob addr
3593 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3597 storeNew(int &size, ioAddr &io_addr)
3599 if_err( seek_data(root_info->file_size) );
3600 io_addr = root_info->file_size;
3601 size = storeBlocks(size) * storeBlockSize;
3602 /* make sure file storage exists */
3603 if_err( write_padding(root_info->file_size + size) );
3607 /*** int Db::storeAllocate(int &size, ioAddr &io_addr)
3609 * find persistent free storage. existing/new
3610 * does not allocate virtual memory storage
3613 * size int input/output requested/allocated size
3614 * io_addr ioAddr & output returned storage address
3616 * returns: zero on success, error code (< 0) on failure, (> 0) no avail
3620 storeAllocate(int &size, ioAddr &io_addr)
3622 int status = storeGet(size, io_addr);
3624 status = storeNew(size, io_addr);
3629 /*** int Db::storeFree(int size, ioAddr io_addr)
3631 * Immediate release of store heap
3633 * size int input blob size
3634 * io_addr ioAddr input blob addr
3636 * returns zero on success, error code on failure
3640 storeFree(int size, ioAddr io_addr)
3642 if( io_addr < 0 || size == 0 ) return 0;
3643 /* get prev, next addrStore heap items near this io_addr */
3644 addrStoreRecord addr, prev, next;
3645 addr.io_addr = io_addr; addr.size = size;
3646 int status = addrStoreIndex->Locate(keyLT,&addr,&prev,0);
3647 if( status == errNotFound ) {
3649 status = addrStoreIndex->Locate(keyGT,&addr,&next,0);
3652 status = addrStoreIndex->Next(&next,0);
3653 if( status == errNotFound ) {
3658 /* merge with prev if possible */
3659 if( prev.io_addr >= 0 && prev.io_addr + prev.size == addr.io_addr ) {
3660 if_err( storeDelete(prev.size,prev.io_addr) );
3661 addr.io_addr = prev.io_addr;
3662 addr.size += prev.size;
3664 /* merge with next if possible */
3665 if( next.io_addr >= 0 && addr.io_addr + addr.size == next.io_addr ) {
3666 if_err( storeDelete(next.size,next.io_addr) );
3667 addr.size += next.size;
3670 return storeInsert(addr.size,addr.io_addr);
3673 /*** int Db::pageLoad(pageId id)
3678 * id pageId pageTable element to load
3685 if( unlikely(id < 0 || id >= root_info->pageTableUsed) ) Err(errNoPage);
3686 locked by(db_info->pgLdLk);
3687 Page &pg = *get_page(id);
3690 if( no_shm || pg->shmid < 0 ) {
3691 bp = new_uint8_t(pg->allocated, pg->shmid, id);
3692 if( pg->used ) pg->set_flags(fl_rd);
3695 bp = get_uint8_t(pg->shmid, id);
3696 pg->clr_flags(fl_rd);
3698 if( !bp ) Err(errNoMemory);
3699 pg.addr = (pagePrefix *)bp;
3700 pg.shm_id = pg->shmid;
3702 if( pg->chk_flags(fl_rd) ) {
3703 pg->clr_flags(fl_rd);
3704 if( likely( pg->used > 0 ) )
3705 if_err( pageRead(pg) );
3710 /*** int Db::pageRead(Page *pp)
3712 * read data from the database file
3715 * pg Page input Page to read
3722 if_err( seek_data(pg->io_addr) );
3723 if_err( read_data((char *)pg.addr, pg->used) );
3724 pagePrefix *bpp = pg.addr;
3725 if( bpp->magic != page_magic ) Err(errBadMagic);
3729 /*** int Db::pageWrite(Page *pp)
3731 * writes data to the database file
3734 * pg Page input Page to write
3741 pagePrefix *bpp = pg.addr;
3742 // when io is block aligned and not disk block sized, but
3743 // the allocated areas exceed disk block size, increase
3744 // write to whole blocks to prevent read/modify/write.
3745 int len = bpp->used = pg->used;
3746 unsigned int blen = (len+0xfff) & ~0xfff;
3747 if( blen > pg->allocated ) blen = pg->allocated;
3748 if_err( seek_data(pg->wr_io_addr) );
3749 if_err( write_data((char *)bpp, blen) );
3750 pg->trans_id = active_transaction;
3754 // deallocate page data buffer
3755 // mode<0: delete without shm deallocate
3756 // mode=0: delete and deallocate written page
3757 // mode>0: delete and deallocate page (default)
3760 pageDealloc(Page &pg, int mode)
3762 uint8_t *bp = (uint8_t *)pg.addr;
3763 //int pg=page_table_sz; while( --pg>=0 && pp!=pageTable[pg] );
3764 if( del_uint8_t(bp, pg.shm_id /*, pg*/) <= 1 ||
3765 mode > 0 || (!mode && pg->chk_flags(fl_wr)) )
3771 int Db::AllocCache::
3775 if_ret( db->objectHeapInsert(avail,loc.id,loc.offset) );
3781 int Db::AllocCache::
3782 Get(Db *db,int &size, pgRef &ref)
3785 int isz = avail - size;
3787 unsigned int sz = isz;
3788 if( sz < min_heap_allocation ) {
3791 ref = loc; avail = sz;
3792 if( sz == 0 ) loc.id = NIL;
3793 sz = (loc.offset += size);
3794 Page &pg = *db->get_page(ref.id);
3795 if( pg->used < sz ) pg->used = sz;
3798 if_ret( cacheFlush(db) );
3803 int Db::AllocCache::
3804 Load(Db *db, pageId id, int ofs, int sz)
3808 if_ret( db->objectHeapInsert(sz,id,ofs) );
3817 int Db::cache_all_flush()
3819 if_err( cacheFlush() );
3820 Entity e(this); EntityLoc &ent = e.ent; int ret;
3821 if( !(ret=entityIdIndex->First(0,&ent.obj)) ) do {
3822 if_err( ent.cacheFlush() );
3823 } while( !(ret=entityIdIndex->Next(0,&ent.obj)) );
3824 if( ret == errNotFound ) ret = 0;
3830 allocate(int typ, int size, pgRef &loc, AllocCache &cache)
3832 locked by(db_info->objAlLk);
3834 int sz = size + sizeof(allocPrefix);
3835 // granularity is sizeof pagePrefix
3836 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3837 if_err( objectAllocate(typ, sz, loc, cache) );
3839 if_err( addrWrite_(loc,mp) );
3840 mp->magic = entity_magic;
3847 deallocate(pgRef &loc)
3849 locked by(db_info->objAlLk);
3850 if( loc.id < 0 ) return 0;
3851 if_fail( objectFree(loc) );
3852 loc.id = NIL; loc.offset = 0;
3857 reallocate(int size, pgRef &loc, AllocCache &cache)
3859 int sz = size + sizeof(allocPrefix);
3860 sz = pagePrefixHunks(sz) * sizeof(pagePrefix);
3861 int typ = 0, msz = 0;
3864 if_err( addrRead_(loc,mp) );
3870 ref.id = NIL; ref.offset = 0;
3872 if_err( allocate(typ, size, ref, cache) );
3873 if( (msz-=sizeof(allocPrefix)) > 0 ) {
3874 char *bp = 0, *cp = 0;
3875 if_err( addrRead(loc,bp) );
3876 if_err( addrWrite(ref,cp) );
3877 if( msz > size ) msz = size;
3881 if_err( deallocate(loc) );
3889 int Db::pack::aligned = 0;
3891 void Db::pack::put(uint64_t v, int n)
3893 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
3896 int k = 8-n-(idx&7), m = (1<<n)-1;
3897 bits[i] = (bits[i] & ~(m<<k)) | (v<<k);
3901 case 8: *((uint8_t *)&bits[i]) = v; break;
3902 case 16: *((uint16_t*)&bits[i]) = htobe16(v); break;
3903 case 32: *((uint32_t*)&bits[i]) = htobe32(v); break;
3904 case 64: *((uint64_t*)&bits[i]) = htobe64(v); break;
3911 if( !aligned && n <= 32 ) {
3912 int i = idx/32, k = 64-n-idx%32;
3913 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
3914 *bp = htobe64(((be64toh(*bp) & ~(m<<k)) | (v<<k)));
3920 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
3921 uint64_t *bp = (uint64_t*)&bits[8*i];
3922 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1; v &= m;
3923 b = l>=0 ? (b & ~(m<<l)) | (v<<l) : (b & ~(m>>-l)) | (v>>-l);
3931 uint64_t Db::pack::get(int n)
3934 if( (n & (n-1)) == 0 && (idx & (n-1)) == 0 ) {
3937 int k = 8-n-(idx&7), m = (1<<n)-1;
3938 v = (bits[i]>>k) & m;
3942 case 8: v = *((uint8_t *)&bits[i]); break;
3943 case 16: v = be16toh(*(uint16_t*)&bits[i]); break;
3944 case 32: v = be32toh(*(uint32_t*)&bits[i]); break;
3945 case 64: v = be64toh(*(uint64_t*)&bits[i]); break;
3952 if( !aligned && n <= 32 ) {
3953 int i = idx/32, k = 64-n-idx%32;
3954 uint64_t *bp = (uint64_t *)&bits[4*i], m = ((uint64_t)1<<n)-1;
3955 v = (be64toh(*bp) >> k) & m;
3961 int i = idx/64, k = 64-(idx&(64-1)), l = k - n;
3962 uint64_t *bp = (uint64_t*)&bits[8*i];
3963 uint64_t b = be64toh(*bp), m = ((uint64_t)1<<n)-1;
3964 uint64_t vv = l>=0 ? (b>>l) & m : b & (m>>-l);
3973 int64_t Db::mediaKey::bit_size(int len, int w)
3976 for( int i=0, m=len; m>1; ++i ) {
3978 int b = m * (i+w+1);
3979 b = (b+pack::alignBits-1) & ~(pack::alignBits-1);
3985 int64_t Db::mediaKey::byte_size(int len, int w)
3987 int64_t sz = bit_size(len, w);
3988 sz = (sz+(8*sizeof(uint64_t)-1)) & ~(8*sizeof(uint64_t)-1);
3989 return sz/8 + 2*sizeof(int)+sizeof(int64_t);
3992 int64_t Db::mediaKey::count1(uint8_t *kp)
3995 int w = pk.iget(), len = pk.iget();
3996 pk.seek(pk.pos()+sizeof(int64_t)*8);
3997 int k = high_bit_no(len-1);
3999 int64_t z = (uint64_t)1<<sz++;
4000 return pk.get(sz) - z;
4004 mediaKey(uint8_t *kp, uint8_t *bp, int w, int len)
4007 pk.iput(this->w = w);
4008 pk.iput(this->len = len);
4012 pk.lput(this->cnt = pb.get(w));
4016 // allocate memory, every level length m requires (m+1)/2 differences
4017 // need an extra one when length is power of 2
4018 int k = high_bit_no(len-1) + 1;
4019 struct { int64_t cnt, sz, *wgt; } lt[k+1], rt[k+1];
4020 for( int i=0, m=len; i<=k; ++i ) {
4022 lt[i].wgt = new int64_t[m];
4023 rt[i].wgt = new int64_t[m];
4024 lt[i].sz = rt[i].sz = 0;
4025 lt[i].cnt = rt[i].cnt = 0;
4029 for( int i=0,l=0; ++i<=len; l=i ) {
4031 int m = i&1 ? 0 : high_bit_no(i ^ l); // highest flipped bit
4032 rt[m].cnt = n; // 0->1, begin right
4033 for( int j=0; j<m; ++j ) {
4034 rt[j].wgt[rt[j].sz++] = n-rt[j].cnt;// 1->0, end right
4035 lt[j].cnt = n; // 1->0, begin left
4037 lt[m].wgt[lt[m].sz++] = n-lt[m].cnt; // 0->1, end left
4040 for( int i=0, m=len; m>1; ++i ) {
4042 if( lt[i].sz < m ) { // finish left
4043 lt[i].wgt[lt[i].sz++] = n-lt[i].cnt;
4046 if( lt[i].sz != rt[i].sz ) // finish right
4047 rt[i].wgt[rt[i].sz++] = n-rt[i].cnt;
4052 for( int i=k; --i>=0; ) {
4053 n = (uint64_t)1<<(i+w); // offset for signed difference
4055 for( int j=0; j<sz; ++j ) {
4056 uint64_t v = (lt[i].wgt[j]-rt[i].wgt[j]) + n;
4057 pk.put(v, i+w+1); // output pair differences
4059 if( (n=pk.pos()%pack::alignBits) != 0 ) // align next level
4060 pk.put(0, pack::alignBits-n);
4063 for( int i=0; i<=k; ++i ) {
4064 delete [] lt[i].wgt;
4065 delete [] rt[i].wgt;
4076 mediaLoad(uint8_t *bp)
4079 w = pb.iget(); len = pb.iget(); cnt = pb.lget();
4081 psz = high_bit_no(len-1)+1;
4082 if( psz < 1 ) psz = 1;
4091 void Db::mediaLoad::
4092 get_fields(int n, int k)
4096 dp[k] = dat; dat += m;
4099 int64_t *ap = dp[k];
4101 int64_t z = (uint64_t)1<<sz++;
4103 int64_t *avp = dp[k-1];
4104 for( int j=n/2; --j>=0; ) {
4105 int64_t av = pb.get(sz)-z, a = *ap++;
4106 *avp++ = (a+av)/2; *avp++ = (a-av)/2;
4109 int64_t av = pb.get(sz)-z, a = *ap;
4114 int64_t *ap = dp[0];
4115 for( int j=n/2; --j>=0; ) {
4116 int64_t av = pb.get(sz)-z, a = *ap++;
4117 pk.putc((a+av)/2, w); pk.putc((a-av)/2, w);
4120 int64_t av = pb.get(sz)-z, a = *ap;
4121 pk.putc((a+av)/2, w);
4127 void Db::mediaLoad::
4130 pb.seek(spos); pk.init(kp);
4131 pk.set_max((1<<w)-1);
4132 int64_t d[dsz]; dat = d;
4133 int64_t *p[psz]; dp = p;
4135 for( int i=psz-1; --i>=0; ) dp[i] = 0;
4144 mediaCmpr(uint8_t *bp)
4146 this->err = 0; this->lmt = ~0;
4147 pb.init(bp); w = pb.iget(); len = pb.iget();
4149 psz = high_bit_no(len-1)+1;
4150 if( psz < 1 ) psz = 1;
4159 uint64_t Db::mediaCmpr::
4160 chk_fields(int n, int k)
4164 adp[k] = adat; adat += m;
4165 bdp[k] = bdat; bdat += m;
4166 if( chk_fields(m, k+1) > lmt ) return err;
4168 int64_t *ap = adp[k], *bp = bdp[k];
4169 //uint64_t serr = 0;
4170 err = 0; int sz = k+w;
4171 int64_t z = (uint64_t)1<<sz++;
4173 int64_t *avp = adp[k-1], *bvp = bdp[k-1];
4174 for( int j=n/2; --j>=0; ) {
4175 int64_t a = *ap++, b = *bp++;
4176 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4177 int64_t alt = (a+av)/2, art = (a-av)/2;
4178 int64_t blt = (b+bv)/2, brt = (b-bv)/2;
4179 //serr += sqr(alt-blt) + sqr(art-brt);
4180 *avp++ = alt; *avp++ = art;
4181 *bvp++ = blt; *bvp++ = brt;
4182 err += labs(alt-blt) + labs(art-brt);
4185 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4186 int64_t a = *ap, b = *bp;
4187 int64_t alt = (a+av)/2, blt = (b+bv)/2;
4188 //serr += sqr(alt-blt);
4189 *avp++ = alt; *bvp++ = blt;
4190 err += labs(alt-blt);
4194 int64_t *ap = adp[0], *bp = bdp[0];
4195 for( int j=n/2; --j>=0; ) {
4196 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4197 int64_t a = *ap++, b = *bp++;
4198 int64_t v = a-b, dv = av-bv;
4199 //serr += sqr(v) + sqr(dv);
4200 err += labs(v+dv)/2 + labs(v-dv)/2;
4204 int64_t av = pb.get(sz)-z, bv = pk.get(sz)-z;
4205 int64_t a = *ap, b = *bp;
4206 int64_t v = a-b, dv = av-bv, d = v + dv;
4211 pb.align(); pk.align();
4212 //printf("n=%d,k=%d,lerr=%lu,err=%lu\n",n,k,lerr,(int64_t)sqrt((double)serr));
4216 uint64_t Db::mediaCmpr::
4217 chk(uint8_t *kp, uint64_t lmt)
4219 pb.seek(spos); pk.init(kp); err = 0;
4220 if( pk.iget() != w || len != pk.iget() ) return ~0;
4221 acnt = pb.lget(); bcnt = pk.lget();
4222 //if( (err=sqr(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4223 if( (err=labs(acnt-bcnt)) > (this->lmt=lmt) ) return err;
4224 int64_t a[dsz], b[dsz]; adat = a; bdat = b;
4225 int64_t *ap[psz], *bp[psz]; adp = ap; bdp = bp;
4226 adp[psz-1] = &acnt; bdp[psz-1] = &bcnt;
4227 for( int i=psz-1; --i>=0; ) adp[i] = bdp[i] = 0;
4228 return len > 1 ? chk_fields(len, 0) : err;
4232 cmpr(uint8_t *kp, uint64_t lmt)
4234 pb.seek(spos); pk.init(kp);
4235 if( pk.iget() != w || len != pk.iget() ) return ~0;
4236 acnt = pb.lget(); bcnt = pk.lget();
4237 if( acnt < bcnt ) return -1;
4238 if( acnt > bcnt ) return 1;
4239 if( len == 1 ) return 0;
4240 int bsz = Db::mediaKey::bit_size(len, w);
4241 int i = bsz / (8*sizeof(uint64_t));
4242 uint64_t *ap = (uint64_t *)pb.addr(), *bp = (uint64_t *)pk.addr();
4244 if( *ap != *bp ) return be64toh(*ap)<be64toh(*bp) ? -1 : 1;
4247 if( (i=bsz % (8*sizeof(uint64_t))) > 0 ) {
4248 int l = (8*sizeof(uint64_t)) - i;
4249 uint64_t av = be64toh(*ap)>>l, bv = be64toh(*bp)>>l;
4250 if( av != bv ) return av<bv ? -1 : 1;
4259 start_transaction(int undo_save)
4261 //traceback(" start_transaction %d\n",my_pid);
4263 // activate written pages
4264 for( id=0; id<root_info->pageTableUsed; ++id ) {
4265 Page &pg = *get_Page(id);
4266 if( !pg.addr && pg->used ) pg->set_flags(fl_rd);
4267 if( !pg->chk_flags(fl_new) ) continue;
4268 int io_allocated = pg->io_allocated;
4269 pg->io_allocated = pg->wr_allocated;
4270 pg->wr_allocated = io_allocated;
4271 ioAddr io_addr = pg->io_addr;
4272 pg->io_addr = pg->wr_io_addr;
4273 pg->wr_io_addr = io_addr;
4275 // release inactivate page storage
4276 for( id=0; id<root_info->pageTableUsed; ++id ) {
4277 Page &pg = *get_Page(id);
4278 if( !pg->chk_flags(fl_new) ) continue;
4279 pg->clr_flags(fl_new);
4280 if_err( storeFree(pg->wr_allocated, pg->wr_io_addr) );
4281 pg->wr_allocated = 0; pg->wr_io_addr = NIL;
4282 if( pg->used ) continue;
4283 if_err( storeFree(pg->io_allocated, pg->io_addr) );
4284 pg->io_allocated = 0; pg->io_addr = NIL;
4289 addrStoreRecord addr;
4290 int status = addrStoreIndex->Last(&addr,0);
4292 if( addr.io_addr+addr.size == root_info->file_size ) {
4293 if_err( storeDelete(addr.size, addr.io_addr) );
4294 ioAddr fsz = storeBlocks(addr.io_addr) * storeBlockSize;
4295 if( root_info->file_size > fsz ) {
4296 status = ftruncate(fd, fsz);
4297 if( status ) Err(errIoWrite);
4298 addr.size = fsz - addr.io_addr;
4300 if_err( storeInsert(addr.size, addr.io_addr) );
4301 root_info->file_size = fsz;
4306 if( status != errNotFound ) Err(status);
4308 for( int idx=0; idx<root_info->indeciesUsed; ++idx )
4309 if( indecies[idx] ) indecies[idx]->deleteFreeBlocks();
4311 // truncate pageTable
4312 for( id=root_info->pageTableUsed; id>0; --id ) {
4313 Page &pg = *get_Page(id-1);
4314 if( pg->used ) break;
4316 page_table_sz = root_info->pageTableUsed = id;
4318 // remove released pages from free list
4320 id = root_info->freePages;
4322 Page &pg = *get_Page(id);
4323 pageId next_id = pg->link;
4324 if( id < root_info->pageTableUsed ) {
4325 if( prev ) prev->st->link = id;
4326 else root_info->freePages = id;
4333 if( prev ) prev->st->link = NIL;
4334 else root_info->freePages = NIL;
4336 if( pageTableHunks(root_info->pageTableUsed)*pageTableHunkSize < pageTableAllocated )
4337 alloc_pageTable(root_info->pageTableUsed);
4339 if( undo_save ) undata.save(this);
4340 active_transaction = ++root_info->transaction_id;
4346 #define bfr_sz 0x40000
4351 bfr = inp = new char[bfr_sz];
4352 memset(bfr, 0, bfr_sz);
4360 int ret = flush_bfr();
4362 bfr = inp = lmt = 0;
4371 return n > 0 ? write_data(bfr, n) : 0;
4375 write_bfr(char *dp, int sz)
4379 if( n > sz ) n = sz;
4381 if( (inp+=n) >= lmt && flush_bfr() < 0 )
4393 //traceback(" commit %d\n",my_pid);
4394 int v = attach_wr();
4395 int ret = icommit(force);
4396 if( !ret && force < 0 ) ret = icommit(1);
4398 if( v ) detach_rw();
4406 //traceback(" icommit %d\n",my_pid);
4413 // check for written pages
4414 for( id=0,n=root_info->pageTableUsed; --n>=0; ++id ) {
4415 Page &pg = *get_Page(id);
4416 if( pg->chk_flags(fl_wr) ) break;
4418 // if no pages are written
4419 if( n < 0 ) return 0;
4423 // release last root info, allows storage to move down
4424 if_err( storeFree(root_info->last_info_size, root_info->last_info_addr) );
4425 root_info->last_info_addr = NIL; root_info->last_info_size = 0;
4428 ioAddr ri_addr = root_info->last_info_addr;
4429 int ri_size = root_info->last_info_size;
4430 int k = root_info_extra_pages;
4433 // allocate new storage
4436 for( id=0; id<root_info->pageTableUsed; ++id ) {
4437 Page &pg = *get_Page(id);
4438 if( !pg->chk_flags(fl_wr) ) continue;
4439 if( pg->chk_flags(fl_new) ) continue;
4440 pg->set_flags(fl_new);
4442 pg->wr_allocated = pg->allocated;
4443 if_err( storeAllocate(pg->wr_allocated, pg->wr_io_addr) );
4448 // compute rootInfo size
4449 int rsz = writeRootInfo(&Db::size_data);
4450 int rsz0 = entityPages(rsz) * entityPageSize;
4451 int rsz1 = rsz0 + k * entityPageSize;
4452 // retry while no storage, too little, or too big
4453 if( !(ri_addr < 0 || ri_size < rsz0 || ri_size > rsz1) ) break;
4454 // delete/allocate can change pageTable;
4455 if_err( storeFree(ri_size, ri_addr) );
4456 ri_addr = NIL; ri_size = 0;
4457 rsz1 = rsz0 + k/2 * entityPageSize;
4458 if_err( storeAllocate(rsz1, ri_addr) );
4459 ri_size = rsz1; k += 2;
4462 // delete free index pages
4463 for( int idx=0; idx<root_info->indeciesUsed; ++idx ) {
4464 IndexBase *ib = indecies[idx];
4466 while( (id=ib->st->freeBlocks) != NIL ) {
4467 keyBlock *kbb; pgRef loc;
4468 loc.id = id; loc.offset = 0;
4469 if_err( addrWrite_(loc,kbb) );
4470 ib->st->freeBlocks = kbb->right_link();
4471 Page &pg = *get_Page(id);
4472 pg->used = 0; pg->set_flags(fl_new);
4476 root_info->last_info_addr = root_info->root_info_addr;
4477 root_info->last_info_size = root_info->root_info_size;
4478 root_info->root_info_addr = ri_addr;
4479 root_info->root_info_size = ri_size;
4481 // write page storage
4482 for( id=0; id<root_info->pageTableUsed; ++id ) {
4483 Page &pg = *get_Page(id);
4484 if( pg->chk_flags(fl_wr) ) {
4485 pg->clr_flags(fl_wr);
4487 if_err( pageWrite(pg) );
4491 pg->set_flags(fl_rd);
4495 // write rootInfo storage
4496 if_err( seek_data(ri_addr) );
4498 if_err( open_bfr() );
4499 if_err( writeRootInfo(&Db::write_bfr) );
4500 if_err( close_bfr() );
4502 if_err( writeRootInfo(&Db::write_data) );
4504 if_err( write_zeros(ri_addr+ri_size) );
4506 // update rootInfo pointer
4507 if_err( seek_data(root_info_offset) );
4508 if_err( write_data((char*)&ri_addr,sizeof(ri_addr)) );
4510 // commit is finished
4511 return start_transaction();
4517 // evict unwritten page storage
4518 for( int id=0; id<root_info->pageTableUsed; ++id ) {
4519 Page &pg = *get_page(id);
4520 if( !pg.addr ) continue;
4521 if( pg->chk_flags(fl_wr) ) continue;
4523 pg->set_flags(fl_rd);
4529 seek_data(ioAddr io_addr)
4531 if( io_addr < 0 ) Err(errIoSeek);
4532 if( file_position != io_addr ) {
4533 long offset = lseek(fd,io_addr,SEEK_SET);
4534 if( offset != io_addr ) Err(errIoSeek);
4535 file_position = io_addr;
4541 size_data(char *dp, int sz)
4543 return sz > 0 ? sz : 0;
4547 read_data(char *dp, int sz)
4550 long len = read(fd,dp,sz);
4551 if( len != sz ) Err(errIoRead);
4552 file_position += sz;
4558 write_data(char *dp, int sz)
4561 long len = write(fd,dp,sz);
4562 if( len != sz ) Err(errIoWrite);
4563 file_position += sz;
4564 if( file_position > root_info->file_size )
4565 root_info->file_size = file_position;
4571 write_zeros(ioAddr io_addr)
4573 ioAddr len = io_addr - file_position;
4574 if( len < 0 ) Err(errIoWrite);
4575 int sz = len > entityPageSize ? entityPageSize : len;
4576 char bfr[sz]; memset(&bfr[0],0,sz);
4578 sz = len > entityPageSize ? entityPageSize : len;
4579 if_err( write_data(&bfr[0], sz) );
4580 len = io_addr - file_position;
4586 write_padding(ioAddr io_addr)
4588 if( root_info->file_size > io_addr ) Err(errIoWrite);
4590 if_err( write_zeros(io_addr) );
4592 int status = ftruncate(fd, io_addr);
4593 if( status ) Err(errIoSeek);
4594 root_info->file_size = io_addr;
4599 #define Read(n) do { \
4600 if( (count+=sizeof(n)) > root_info->root_info_size ) Err(errCorrupt); \
4601 if_err( (this->*fn)((char*)&(n),sizeof(n)) ); \
4605 readRootInfo(int(Db::*fn)(char *dp,int sz))
4611 root_info->root_info_size = sizeof(RootInfo);
4613 if( root_info->root_magic != root_magic ) Err(errBadMagic);
4616 sz = indeciesHunks(root_info->indeciesUsed) * indexTableHunkSize;
4617 indecies = new IndexBase*[sz];
4618 if( !indecies ) Err(errNoMemory);
4619 index_info = (IndexInfo *)new_uint8_t(sz*sizeof(IndexInfo),id);
4620 if( !index_info ) Err(errNoMemory);
4621 db_info->index_info_id = index_info_id = id;
4622 indeciesAllocated = sz;
4624 indecies_sz = root_info->indeciesUsed;
4625 for( int idx=0; idx<indecies_sz; ++idx ) {
4626 IndexBaseInfo *b = &index_info[idx].base;
4627 Read(*(IndexTypeInfo*)b);
4628 if( b->magic != idx_magic ) Err(errBadMagic);
4629 if( b->type != idxNil )
4630 Read(*(IndexRecdInfo*)b);
4633 IndexBinaryInfo *bi = &index_info[idx].bin;
4635 if_err( new_index(indecies[idx], b, bi) );
4638 IndexStringInfo *si = &index_info[idx].str;
4640 if_err( new_index(indecies[idx], b, si) );
4651 page_table_sz = root_info->pageTableUsed;
4652 sz = pageTableHunks(page_table_sz) * pageTableHunkSize;
4653 pageTable = (Page **)new Page*[sz];
4654 if( !pageTable ) Err(errNoMemory);
4655 pageTableAllocated = sz;
4656 sz = pageTableAllocated*sizeof(PageStorage);
4657 page_info = (PageStorage *)new_uint8_t(sz, id);
4658 if( !page_info ) Err(errNoMemory);
4659 db_info->page_info_id = page_info_id = id;
4660 for( id=0; id<root_info->pageTableUsed; ++id ) {
4661 Read(page_info[id]);
4662 Page *pp = new Page(page_info[id]);
4663 if( !pp ) Err(errNoMemory);
4666 if( !pg->chk_flags(fl_free) ) pg->shmid = -1;
4667 if( pg->used ) pg->set_flags(fl_rd);
4669 while( id < pageTableAllocated )
4677 #define Write(n) do { \
4678 if_err( (sz=(this->*fn)((char*)&(n),sizeof(n))) ); \
4683 writeRootInfo(int(Db::*fn)(char *data, int sz))
4692 for( id=0; id<root_info->indeciesUsed; ++id ) {
4693 IndexBase *ib = indecies[id];
4695 switch( ib->st->type ) {
4697 IndexBinary *b = (IndexBinary*)ib;
4702 IndexString *b = (IndexString*)ib;
4709 IndexBaseType b(idxNil);
4715 for( id=0; id<root_info->pageTableUsed; ++id ) {
4716 Page *pp = get_Page(id);
4728 int n = db->indeciesAllocated - usrIdx + 1;
4729 if( cfnAllocated != n ) {
4731 cfn = new cfnData[n];
4734 cfnData *cdp = &cfn[0];
4735 cfnUsed = db->root_info->indeciesUsed;
4736 for( int idx=usrIdx; idx<cfnUsed; ++idx,++cdp ) {
4737 IndexBase *ib = db->indecies[idx];
4739 switch( ib->st->type ) {
4741 IndexBinary *bidx = (IndexBinary *)ib;
4742 cdp->cmprId = bidx->bst->cmprId;
4743 cdp->compare = bidx->compare;
4756 cfnData *cdp = &cfn[0];
4757 int n = db->root_info->indeciesUsed<cfnUsed ? db->root_info->indeciesUsed : cfnUsed;
4758 for( int idx=usrIdx; idx<n; ++idx,++cdp ) {
4759 IndexBase *ib = db->indecies[idx];
4761 switch( ib->st->type ) {
4763 IndexBinary *bidx = (IndexBinary *)ib;
4764 int cid = cdp->cmprId;
4765 bidx->bst->cmprId = cid;
4766 bidx->compare = cid>0 ? db->cmprFns[cid] : cdp->compare;
4779 int v = attach_wr();
4782 if( v ) detach_rw();
4792 undata.restore(this);
4799 DbInfo(int pid, int key, int id)
4814 int id = -1, flk = 0, ret = 0;
4817 if( !shm_init ) init_shm();
4818 get_mem = &get_shm8_t;
4819 new_mem = &new_shm8_t;
4820 del_mem = &del_shm8_t;
4821 if( flock(fd, LOCK_EX) ) ret = errInvalid;
4824 id = shmget(key, sizeof(DbInfo), IPC_CREAT+IPC_EXCL+0666);
4825 if( id < 0 ) ret = errno==EEXIST ? errDuplicate : errNoMemory;
4828 vp = shmat(id, NULL, 0);
4829 if( vp == (void*)-1 ) ret = errNoMemory;
4833 get_mem = &get_mem8_t;
4834 new_mem = &new_mem8_t;
4835 del_mem = &del_mem8_t;
4836 vp = (void *)new uint8_t[sizeof(DbInfo)];
4837 if( !vp ) Err(errNoMemory);
4840 db_info = new(vp) DbInfo(my_pid, key, id);
4844 if( flk ) flock(fd, LOCK_UN);
4845 //traceback("new_info %s\n",ret ? "failed" : "success");
4853 if( no_shm ) Err(errInvalid);
4854 get_mem = &get_shm8_t;
4855 new_mem = &new_shm8_t;
4856 del_mem = &del_shm8_t;
4857 int id = shmget(key, sizeof(DbInfo), 0666);
4858 if( id < 0 ) Fail(errNotFound);
4859 void *vp = shmat(id, NULL, 0);
4860 if( vp == (void*)-1 ) Err(errNoMemory);
4861 if( del_uint8_t(0, id) <= 1 ) {
4862 shmctl(id, IPC_RMID, 0);
4864 //traceback("get_info failed\n");
4867 DbInfo *dip = (DbInfo *)vp;
4868 if( dip->magic != info_magic ) Err(errBadMagic);
4869 if( dip->info_key != key || dip->info_id != id ) Err(errCorrupt);
4871 //traceback("get_info success\n");
4879 db_info->index_info_id = -1;
4880 db_info->page_info_id = -1;
4881 db_info->root_info_id = -1;
4882 db_info->owner = -1;
4883 db_info->last_owner = my_pid;
4885 int flk = !flock(fd, LOCK_EX) ? 1 : 0;
4887 int ret = del_uint8_t(0, db_info->info_id);
4889 shmctl(db_info->info_id, IPC_RMID, 0);
4893 delete [] (uint8_t*)db_info;
4894 if( flk ) flock(fd, LOCK_UN);
4895 //traceback("del_info %d, refs=%d\n",my_pid,ret);
4902 open(int zfd, int zkey)
4904 //traceback("open %d\n",my_pid);
4906 if( fd >= 0 ) Err(errDuplicate);
4909 if( fd < 0 ) Err(errNotFound);
4911 if( fstat(fd,&st) < 0 ) Err(errIoStat);
4912 if( zkey >= 0 ) use_shm(1);
4914 if_err( new_info(zkey) );
4922 iopen(int undo_save)
4924 //traceback(" iopen %d\n",my_pid);
4926 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
4927 int ret = !root_info ? errNoMemory : 0;
4930 db_info->root_info_id = root_info_id = info_id;
4933 if( !ret ) ret = seek_data(db_magic_offset);
4934 if( !ret ) ret = read_data((char*)&magic,sizeof(magic));
4935 if( !ret && magic != db_magic ) ret = errBadMagic;
4936 if( !ret ) ret = seek_data(root_info_offset);
4937 if( !ret ) ret = read_data((char*)&root_info->root_info_addr,sizeof(root_info->root_info_addr));
4938 if( !ret ) ret = seek_data(root_info->root_info_addr);
4939 if( !ret ) ret = readRootInfo(&Db::read_data) >= 0 ? 0 : ret;
4940 if( !ret ) ret = start_transaction(undo_save);
4942 active_transaction = root_info->transaction_id;
4951 //traceback("close %d\n",my_pid);
4952 if( !db_info || fd < 0 ) return 0;
4960 //traceback(" iclose %d\n",my_pid);
4970 //traceback(" ireopen %d\n",my_pid);
4971 Page **old_page_table = pageTable; pageTable = 0;
4972 PageStorage *old_page_info = page_info; page_info = 0;
4973 int old_page_table_sz = page_table_sz; page_table_sz = 0;
4976 for( pageId id=0; id<old_page_table_sz; ++id ) {
4977 Page *opp = old_page_table[id], &opg = *opp;
4978 if( id < page_table_sz && opg.addr && !opg->chk_flags(fl_wr | fl_rd) ) {
4979 Page &pg = *get_Page(id);
4980 if( !pg.addr && pg->shmid < 0 && opg.shm_id == opg->shmid &&
4981 pg->io_addr == opg->io_addr && pg->trans_id == opg->trans_id ) {
4982 pg.addr = opg.addr; pg->shmid = pg.shm_id = opg.shm_id;
4983 pg->clr_flags(fl_rd);
4991 delete [] old_page_table;
4992 del_uint8_t(old_page_info);
4999 //traceback(" iattach %d\n",my_pid);
5000 RootInfo *new_root_info = 0;
5001 IndexInfo *new_index_info = 0;
5002 PageStorage *new_page_info = 0;
5003 int new_indecies_alloc = 0;
5004 int new_indecies_sz = 0;
5005 IndexBase **new_indecies = 0;
5006 int new_page_table_alloc = 0;
5007 int new_page_table_sz = 0;
5008 Page **new_page_table = 0;
5011 if( !ret && root_info_id != db_info->root_info_id ) {
5012 new_root_info = (RootInfo *)get_uint8_t(db_info->root_info_id);
5013 if( !new_root_info ) ret = errCorrupt;
5016 new_root_info = root_info;
5021 new_indecies_sz = new_root_info->indeciesUsed;
5022 new_indecies_alloc = indeciesHunks(new_indecies_sz) * indexTableHunkSize;
5023 new_indecies = new IndexBase*[new_indecies_alloc];
5024 if( !new_indecies ) ret = errNoMemory;
5028 if( index_info_id != db_info->index_info_id ) {
5029 new_index_info = (IndexInfo *)get_uint8_t(db_info->index_info_id);
5030 if( !new_index_info ) ret = errCorrupt;
5033 new_index_info = index_info;
5038 for( int idx=0; !ret && idx<new_indecies_sz; ++idx ) {
5039 IndexBaseInfo *b = &new_index_info[idx].base;
5040 if( b->magic == idx_magic ) {
5041 IndexBase *ib = idx<indecies_sz ? indecies[idx] : 0;
5042 if( ib && ib->st->type != b->type ) ib = 0;
5046 new_indecies[idx] = new(ib) IndexBinary(ib,*b,new_index_info[idx].bin);
5050 ret = new_index(new_indecies[idx], b, &new_index_info[idx].bin);
5054 new_indecies[idx] = new(ib) IndexString(ib,*b,new_index_info[idx].str);
5058 ret = new_index(new_indecies[idx], b, &new_index_info[idx].str);
5061 new_indecies[idx] = 0;
5071 for( int idx=0; idx<indecies_sz; ++idx ) {
5072 if( !indecies[idx] ) continue;
5073 delete indecies[idx]; indecies[idx] = 0;
5078 new_page_table_sz = new_root_info->pageTableUsed;
5079 new_page_table_alloc = pageTableHunks(new_page_table_sz) * pageTableHunkSize;
5080 new_page_table = (Page **)new Page*[new_page_table_alloc];
5081 if( !new_page_table ) ret = errNoMemory;
5085 if( page_info_id != db_info->page_info_id ) {
5086 new_page_info = (PageStorage *)get_uint8_t(db_info->page_info_id);
5087 if( !new_page_info ) ret = errCorrupt;
5090 new_page_info = page_info;
5096 for( id=0; !ret && id<new_page_table_sz; ++id ) {
5097 Page *pp = id<page_table_sz ? pageTable[id] : 0;
5098 PageStorage *st = &new_page_info[id];
5100 pageTable[id] = 0; pp->st = st; Page &pg = *pp;
5101 if( pg->chk_flags(fl_rd | fl_free) )
5103 else if( pg->shmid >= 0 && pp->shm_id != pg->shmid ) {
5104 del_uint8_t(pp->addr);
5105 pp->addr = (pagePrefix *)get_uint8_t(pp->shm_id=pg->shmid);
5110 if( !pp ) ret = errNoMemory;
5112 new_page_table[id] = pp;
5114 while( id<page_table_sz ) del_page(id++);
5117 delete [] new_indecies;
5118 delete [] new_page_table;
5119 if( !root_info ) root_info = new_root_info;
5120 else del_uint8_t(new_root_info);
5121 if( !index_info ) index_info = new_index_info;
5122 else del_uint8_t(new_index_info);
5123 if( !page_info ) page_info = new_page_info;
5124 else del_uint8_t(new_page_info);
5130 indecies = new_indecies;
5131 indeciesAllocated = new_indecies_alloc;
5132 indecies_sz = new_indecies_sz;
5133 delete [] pageTable;
5134 pageTable = new_page_table;
5135 pageTableAllocated = new_page_table_alloc;
5136 page_table_sz = new_page_table_sz;
5137 root_info_id = db_info->root_info_id;
5138 del_uint8_t(root_info);
5139 root_info = new_root_info;
5140 index_info_id = db_info->index_info_id;
5141 del_uint8_t(index_info);
5142 index_info = new_index_info;
5143 page_info_id = db_info->page_info_id;
5144 del_uint8_t(page_info);
5145 page_info = new_page_info;
5146 active_transaction = root_info->transaction_id;
5151 attach(int zfd, int zkey, int zrw)
5153 //traceback("attach %s %d\n",zrw ? "wr" : "rd", my_pid);
5155 if_ret( get_info(zkey) );
5156 fd = zfd; key = zkey;
5159 else if( zfd != fd || zkey != key )
5163 ( root_info && active_transaction == root_info->transaction_id &&
5164 db_info->root_info_id >= 0 && root_info_id == db_info->root_info_id &&
5165 db_info->index_info_id >= 0 && index_info_id == db_info->index_info_id &&
5166 db_info->page_info_id >= 0 && page_info_id == db_info->page_info_id ) )
5168 int ret = db_info->root_info_id < 0 ? ireopen() : iattach();
5186 if( fd >= 0 ) Err(errDuplicate);
5190 if( new_info(key) ) Err(errNotFound);
5192 root_info = (RootInfo *)new_uint8_t(sizeof(*root_info), info_id);
5193 if( !root_info ) { Err(errNoMemory); }
5195 db_info->root_info_id = root_info_id = info_id;
5196 if_err( init_idx() );
5197 if_err( seek_data(db_magic_offset) );
5198 int magic = db_magic;
5199 if_err( write_data((char *)&magic,sizeof(magic)) );
5200 write_zeros(entityPageSize);
5205 // in-order traversal copying IndexBinary
5206 int Db::IndexBinary::
5207 keyCopy(pageId s, IndexBase *ib)
5209 IndexBinary *idx = (IndexBinary *)ib;
5211 keyBlock *sbb; Page *spp; char *sn;
5212 if_err( db->indexRead(s,0,sbb,spp,sn) );
5213 if( (r=sbb->right_link()) >= 0 ) {
5214 int lkdSz = kdSz + sizeof(pageId);
5215 int n = spp->iused() / lkdSz;
5216 for( int i=0; i<n; ++i ) {
5217 pageId l = readPageId(sn);
5218 sn += sizeof(pageId);
5219 if_ret( keyCopy(l,idx) );
5220 if_ret( idx->Insert(sn,sn+st->keySz) );
5223 if_ret( keyCopy(r,idx) );
5226 int n = spp->iused() / kdSz;
5227 for( int i=0; i<n; ++i ) {
5228 if_ret( idx->Insert(sn,sn+st->keySz) );
5235 // in-order traversal copying IndexString
5236 int Db::IndexString::
5237 keyCopy(pageId s, IndexBase *ib)
5239 IndexString *idx = (IndexString *)ib;
5240 pageId r; unsigned char lky[keysz];
5241 keyBlock *sbb; Page *spp; char *sn;
5242 if_err( db->indexRead(s,0,sbb,spp,sn) );
5243 unsigned char *lp = (unsigned char *)sn;
5244 unsigned char *rp = lp + spp->iused();
5246 if( (r=sbb->right_link()) >= 0 ) {
5248 pageId l = getPageId(lp);
5249 if_ret( keyCopy(l,idx) );
5250 char *dp = (char *)lp; lp += st->dataSz;
5251 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5252 if_ret( idx->Insert(&lky[0],dp) );
5254 if_ret( keyCopy(r,idx) );
5258 char *dp = (char *)lp; lp += st->dataSz;
5259 for( int i=*lp++; (lky[i++]=*lp++) != 0; );
5260 if_ret( idx->Insert(&lky[0],dp) );
5267 EntityObj(EntityObj &eobj, int eid)
5270 memmove(&name[0],&eobj.name[0],nmSz);
5271 recdSz = eobj.recdSz;
5273 indexs[idxId] = eobj.indexs[idxId];
5274 for( int i=idxId+1; i<nidxs; ++i )
5275 indexs[i] = eobj.indexs[i];
5281 copy(ObjectLoc &dobj)
5284 Obj *dp = dobj.addr();
5285 if( !dp ) Err(errNoMemory);
5286 allocPrefix *mp = ((allocPrefix *)dp) - 1;
5287 int sz = mp->size - sizeof(*mp);
5288 if_err( allocate(sz - dobj.entity->ent->recdSz) );
5290 if( !np ) Err(errNoMemory);
5292 // copy varObj data using ref data, if any
5293 for( varObjs vp=dobj.entity->vobjs; vp!=0; vp=vp->next ) {
5295 varObj &vd = dp->*ref;
5296 varObj &vn = np->*ref;
5298 if( (sz=vd.size()) > 0 ) {
5300 memcpy(addr(vn),dobj.addr(vd), sz);
5307 copy(Db *db, Objects objs)
5309 int id, n = db->root_info->indeciesUsed;
5310 for( id=usrIdx; id<n; ++id ) {
5311 IndexBase *ib = db->indecies[id];
5312 if( ib && ib->st->type != idxNil ) {
5314 switch( ib->st->type ) {
5315 // copy binary index
5317 IndexBinary *bidx = (IndexBinary *)ib;
5318 int kySz = bidx->st->keySz, dtSz = bidx->st->dataSz;
5319 int idx = get_index(&bidx->st->name[0]);
5321 idx = new_binary_index(&bidx->st->name[0], kySz, dtSz, bidx->compare);
5323 // ignore empty indecies
5324 if( bidx->st->rootPageId >= 0 ) {
5325 // entity id indecies are processed below
5326 if( db->entityNmIndex->Find(&ib->st->name[0],0) != 0 ) {
5327 IndexBinary *bip = (IndexBinary *)indecies[idx];
5328 // use cmprLast since index is in-order. Avoids using
5329 // user defined class key cmprs and compare functions.
5330 bip->compare = cmprLast;
5331 ret = bidx->keyCopy(bidx->st->rootPageId, indecies[idx]);
5332 bip->compare = cmprFns[bip->bst->cmprId];
5336 // copy string index
5338 IndexString *sidx = (IndexString *)ib;
5339 int dtSz = sidx->st->dataSz;
5340 int idx = get_index(&sidx->st->name[0]);
5342 idx = new_string_index(&sidx->st->name[0], dtSz);
5345 if( sidx->st->rootPageId >= 0 )
5346 ret = sidx->keyCopy(sidx->st->rootPageId, indecies[idx]);
5350 if_err( db->flush() );
5351 if_err( commit(-1) );
5356 // copy entity indecies/data
5357 IndexBinary *eidx = (IndexBinary *)db->entityIdIndex;
5358 int eid, status; pgRef loc;
5359 if( !(status=eidx->First(&eid,&loc)) ) do {
5362 Entity *ep = op->obj->entity;
5363 if( ep->ent->id == eid ) break;
5368 EntityLoc &dent = db_ent.ent;
5370 ObjectLoc objd(&db_ent), *dobj = &objd;
5371 // if old db copy fn, use ObjectLoc::copy
5372 if( op->obj->entity->db == db ) dobj = op->obj;
5373 char name[nmSz]; memset(name,0,sizeof(name));
5374 strncpy(&name[0],&dent->name[0],sizeof(name));
5376 Entity entity(this);
5377 EntityLoc &nent = entity.ent;
5379 if( entityNmIndex->Find(&name[0],&nid) != 0 ) {
5380 int nidx1 = dent->nidxs-1;
5381 int sz = sizeof(EntityObj) + sizeof(dent->indexs[0])*nidx1;
5383 if_err( allocate(dent->id, sz, nent.obj) );
5384 if( !nent.addr_wr() ) Err(errNoMemory);
5386 new((EntityObj *)nent.addr())
5387 EntityObj(*(EntityObj*)dent.addr(),eid);
5388 if_err( entityIdIndex->Insert(&eid,&nent.obj) );
5389 if_err( entityNmIndex->Insert(&name[0],&eid) );
5391 else if( nid == eid )
5392 if_err( entityIdIndex->Find(&eid,&nent.obj) );
5395 ObjectLoc objn(&entity), *nobj = &objn;
5396 // if new db copy fn, use it instead of ObjectLoc::copy
5397 if( op->obj->entity->db == this ) nobj = op->obj;
5399 if( !(status = dobj->FirstId(cur)) ) do {
5401 if_err( nobj->copy(*dobj) );
5403 if_err( entity.construct_(*nobj, dobj->id()) );
5404 } while( !(status=dobj->NextId(cur)) );
5405 if( status == errNotFound ) status = 0;
5407 if_err( db->flush() );
5408 if_err( commit(-1) );
5410 } while( !(status=eidx->Next(&eid,&loc)) );
5411 if( status == errNotFound ) status = 0;
5413 if_err( db->flush() );
5414 if_err( commit(-1) );
5419 cmprFrSt(char *a, char *b)
5421 freeStoreRecord *ap = (freeStoreRecord *)a;
5422 freeStoreRecord *bp = (freeStoreRecord *)b;
5423 if( ap->size > bp->size ) return 1;
5424 if( ap->size < bp->size ) return -1;
5425 if( ap->io_addr > bp->io_addr ) return 1;
5426 if( ap->io_addr < bp->io_addr ) return -1;
5431 cmprAdSt(char *a, char *b)
5433 addrStoreRecord *ap = (addrStoreRecord *)a;
5434 addrStoreRecord *bp = (addrStoreRecord *)b;
5435 if( ap->io_addr > bp->io_addr ) return 1;
5436 if( ap->io_addr < bp->io_addr ) return -1;
5437 if( ap->size > bp->size ) return 1;
5438 if( ap->size < bp->size ) return -1;
5443 cmprFrSp(char *a, char *b)
5445 freeSpaceRecord *ap = (freeSpaceRecord *)a;
5446 freeSpaceRecord *bp = (freeSpaceRecord *)b;
5447 if( ap->size > bp->size ) return 1;
5448 if( ap->size < bp->size ) return -1;
5449 if( ap->id > bp->id ) return 1;
5450 if( ap->id < bp->id ) return -1;
5451 if( ap->offset > bp->offset ) return 1;
5452 if( ap->offset < bp->offset ) return -1;
5457 cmprAdSp(char *a, char *b)
5459 addrSpaceRecord *ap = (addrSpaceRecord *)a;
5460 addrSpaceRecord *bp = (addrSpaceRecord *)b;
5461 if( ap->id > bp->id ) return 1;
5462 if( ap->id < bp->id ) return -1;
5463 if( ap->offset > bp->offset ) return 1;
5464 if( ap->offset < bp->offset ) return -1;
5465 if( ap->size > bp->size ) return 1;
5466 if( ap->size < bp->size ) return -1;
5471 cmprOIds(char *a, char *b)
5475 if( ap->id > bp->id ) return 1;
5476 if( ap->id < bp->id ) return -1;
5481 cmprStr(char *a, char *b)
5483 return strncmp(a,b,keysz);
5487 cmprKey(char *a, char *b)
5490 return kp->cmpr(a,b);
5494 cmprLast(char *a, char *b)
5499 Db::CmprFn Db::cmprFns[] = {
5508 findCmprFn(CmprFn fn)
5511 for( i=lengthof(cmprFns); --i>0; )
5512 if( fn == cmprFns[i] ) return i;
5519 if_err( new_binary_index("freeStoreIndex", sizeof(freeStoreRecord), 0, cmprFrSt) );
5520 if_err( new_binary_index("addrStoreIndex", sizeof(addrStoreRecord), 0, cmprAdSt) );
5521 if_err( new_binary_index("freeSpaceIndex", sizeof(freeSpaceRecord), 0, cmprFrSp) );
5522 if_err( new_binary_index("addrSpaceIndex", sizeof(addrSpaceRecord), 0, cmprAdSp) );
5523 if_err( new_binary_index("entityIdIndex", sizeof(int), sizeof(pgRef), cmprOIds) );
5524 if_err( new_binary_index("entityNmIndex", sizeof(char[nmSz]), sizeof(int), cmprStr) );
5533 FILE *fp = fopen("/proc/sys/kernel/shmall","w");
5534 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5535 fp = fopen("/proc/sys/kernel/shmmax","w");
5536 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5537 fp = fopen("/proc/sys/kernel/shmmni","w");
5538 if( fp ) { fprintf(fp,"%d",0x7fffffff); fclose(fp); }
5544 root_magic = Db::root_magic;
5545 root_info_size = sizeof(RootInfo);
5548 root_info_addr = NIL;
5549 last_info_addr = NIL;
5565 storeBlockSize = defaultStoreBlockSize;
5566 entityPageSize = defaultEntityPageSize;
5567 pageTableHunkSize = defaultPageTableHunkSize;
5568 indexTableHunkSize = defaultIndexTableHunkSize;
5570 root_info_id = -1; root_info = 0;
5571 index_info_id = -1; index_info = 0;
5572 indecies = 0; indeciesAllocated = 0; indecies_sz = 0;
5573 page_info_id = -1; page_info = 0;
5574 pageTable = 0; pageTableAllocated = 0; page_table_sz = 0;
5578 bfr = lmt = inp = 0;
5579 active_transaction = -1;
5586 for( int idx=indecies_sz; --idx>=0; ) delete indecies[idx];
5587 delete [] indecies; indecies = 0;
5589 del_uint8_t(index_info); index_info = 0;
5590 indeciesAllocated = 0; indecies_sz = 0;
5594 for( pageId id=0; id<page_table_sz; ++id ) {
5595 Page *pp = get_Page(id); pageDealloc(*pp); delete pp;
5597 delete [] pageTable; pageTable = 0;
5599 del_uint8_t(page_info); page_info = 0;
5600 pageTableAllocated = 0; page_table_sz = 0;
5603 del_uint8_t(root_info); root_info = 0;
5610 no_shm = defaultNoShm;
5611 get_mem = !no_shm ? &get_shm8_t : &get_mem8_t;
5612 new_mem = !no_shm ? &new_shm8_t : &new_mem8_t;
5613 del_mem = !no_shm ? &del_shm8_t : &del_mem8_t;
5614 root_info_id = index_info_id = page_info_id = -1;
5636 if( error() < 0 ) Fail(errPrevious); \
5637 if( r < 0 || r >= root_info->indeciesUsed || !indecies[r] ) Err(errInvalid); \
5638 return indecies[r]->fn;
5641 ins(int r, void *key, void *data)
5643 Run(r,Insert(key,data));
5647 del(int r, void *key)
5653 find(int r, void *key, void *rtnData)
5655 Run(r,Find(key,rtnData));
5659 locate(int r, int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData)
5661 Run(r,Locate(op,key,cmpr,rtnKey,rtnData));
5665 first(int r, void *key, void *rtnData)
5667 Run(r,First(key,rtnData));
5671 last(int r, void *key, void *rtnData)
5673 Run(r,Last(key,rtnData));
5677 next(int r, void *key, void *rtnData)
5679 Run(r,Next(key,rtnData));
5683 nextloc(int r, pgRef &loc)
5685 Run(r,NextLoc(loc));
5692 allocate(ObjectLoc &loc, int sz)
5694 if( loc.entity != this ) Err(errObjEntity);
5696 int n = ent->recdSz + sz;
5697 if_err( db->allocate(id, n, loc.obj, ent->alloc_cache) );
5698 Obj *op = loc.addr();
5700 ent._wr(); loc._wr();
5702 op->id = ent->maxId;
5708 construct_(ObjectLoc &loc, int id)
5710 int idx = ent->indexs[idxId];
5711 loc._wr(); loc->id = id;
5712 if_err( db->indecies[idx]->Insert(&id,&loc.obj) );
5713 ent._wr(); ++ent->count;
5714 if( id >= ent->maxId ) ent->maxId = id+1;
5720 destruct_(ObjectLoc &loc, int id)
5722 if_err( index(idxId)->Delete(&id) );
5723 ent._wr(); --ent->count;
5724 if( id+1 == ent->maxId ) {
5725 if( ent->count > 0 ) {
5726 if_err( index(idxId)->Last(&id,0) );
5738 deallocate(ObjectLoc &loc)
5740 if_err( db->deallocate(loc.obj) );
5745 new_entity_(Entity &entity, const char *nm, int sz)
5748 EntityLoc &ent = entity.ent;
5749 // construct EntityObj
5750 if( root_info->entity_max_id >= max_entity_type ) Err(errLimit);
5751 if_err( allocate(root_info->entity_max_id+1, sizeof(EntityObj), ent.obj) );
5752 int id = root_info->entity_max_id;
5753 ent._wr(); ent->id = id;
5754 char name[nmSz]; memset(&name[0],0,sizeof(name));
5755 strncpy(name,nm,sizeof(name)-1);
5756 memmove(&ent->name[0],name,sizeof(name));
5757 ent->alloc_cache.init();
5761 // add to entity indecies
5762 if_err( entityIdIndex->Insert(&id,&ent.obj) );
5763 if_err( entityNmIndex->Insert(&name[0],&id) );
5764 // create entity id/loc
5765 int idx = new_binary_index(nm, sizeof(int),sizeof(pgRef),cmprOIds);
5767 ent->indexs[idxId] = idx;
5769 ++root_info->entity_count;
5770 ++root_info->entity_max_id;
5771 entity.rw_lock = &db_info->rw_locks[id];
5776 del_entity(Entity &entity)
5778 EntityLoc &ent = entity.ent;
5779 if( ent.obj.id >= 0 ) {
5781 ObjectLoc loc(&entity);
5782 int status = loc.FirstId();
5785 deallocate(loc.obj);
5786 } while( !(status=loc.NextId()) );
5787 if( status != errNotFound )
5789 for( int i=ent->nidxs; --i>=0; ) entity.del_index_(i);
5791 entityIdIndex->Delete(&id);
5792 entityNmIndex->Delete(&ent->name[0]);
5793 if_err( deallocate(ent.obj) );
5795 --root_info->entity_count;
5796 if( id+1 == root_info->entity_max_id ) {
5797 if( root_info->entity_count > 0 ) {
5798 if_err( entityIdIndex->Last(&id,0) );
5803 root_info->entity_max_id = id;
5810 new_entity(Entity &entity, const char *nm, int sz)
5812 int ret = new_entity_(entity, nm, sz);
5813 if( ret ) del_entity(entity);
5818 get_entity(Entity &entity, const char *nm)
5820 EntityLoc &ent = entity.ent;
5821 char name[nmSz]; memset(&name[0],0,sizeof(name));
5822 strncpy(name,nm,sizeof(name)-1); int id;
5823 if_fail( entityNmIndex->Find(&name[0], &id) );
5824 if_err( entityIdIndex->Find(&id, &ent.obj) );
5825 entity.rw_lock = &db_info->rw_locks[id];
5830 get_index(const char *nm, CmprFn cmpr)
5832 int idx = ent->nidxs;
5833 while( --idx >= 0 ) {
5834 int i = ent->indexs[idx];
5835 if( i < 0 ) continue;
5836 IndexBase *ib = db->indecies[i];
5837 if( ib && !strncmp(&ib->st->name[0],nm,nmSz) ) {
5838 if( cmpr && ib->st->type == idxBin ) {
5839 IndexBinary *bidx = (IndexBinary *)ib;
5840 bidx->compare = cmpr;
5841 bidx->bst->cmprId = db->findCmprFn(cmpr);
5846 if( idx < 0 ) Fail(errNotFound);
5853 EntityLoc nent(this);
5854 // construct EntityObj
5855 int nidx = ent->nidxs;
5856 int sz = sizeof(EntityObj) + sizeof(ent->indexs[0])*nidx;
5857 if_err( db->allocate(ent->id, sz, nent.obj) );
5858 nent._wr(); nent->id = ent->id;
5859 memmove(&nent->name[0],&ent->name[0],nmSz);
5860 nent->alloc_cache = ent->alloc_cache;
5861 nent->maxId = ent->maxId;
5862 nent->recdSz = ent->recdSz;
5863 nent->count = ent->count;
5864 // add to entity indecies
5865 if_err( db->entityIdIndex->Modify(&ent->id,&nent.obj) );
5866 for( int i=0; i<nidx; ++i )
5867 nent->indexs[i] = ent->indexs[i];
5869 nent->indexs[nidx] = idx;
5870 nent->nidxs = nidx+1;
5871 if_err( db->deallocate(ent.obj) );
5879 if( i <= idxId ) Fail(errInvalid);
5880 if( i >= ent->nidxs ) Fail(errInvalid);
5881 return del_index_(i);
5887 int idx = ent->indexs[i];
5888 if( idx < 0 ) Fail(errNotFound);
5889 if( idx >= db->root_info->indeciesUsed ) Fail(errNotFound);
5890 if( !db->indecies[idx] ) Fail(errNotFound);
5891 ent._wr(); ent->indexs[i] = -1;
5892 db->indecies[idx]->Clear();
5898 finit(Objects objects)
5901 Objects op = objects; objects = op->next;
5902 Entity *ep = op->obj->entity;
5903 for( varObjs nxt=0, vp=ep->vobjs; vp!=0; vp=nxt ) {
5904 nxt = vp->next; delete vp;
5910 void Db::ObjectLoc::
5913 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
5915 (op->*(vp->ref)).init();
5919 void Db::ObjectLoc::
5922 for( varObjs vp = entity->vobjs; vp!=0; vp=vp->next ) {
5924 (op->*(vp->ref)).del(entity->db);
5930 size(varObj &vobj, int sz)
5932 if( vobj.len != sz ) {
5933 AllocCache &cache = entity->ent->alloc_cache;
5934 if_ret( entity->db->reallocate(sz,vobj.loc,cache) );
5935 vobj.len = sz; entity->ent._wr(); _wr();
5940 // get last index id on member accessed with ip
5942 last(const char *nm,int (ObjectLoc::*ip)())
5944 int idx; if_ret( idx = entity->get_index(nm) );
5945 return last(idx, ip);
5949 last(int idx,int (ObjectLoc::*ip)())
5951 ObjectLoc last_loc(*this);
5952 int id, ret = entity->index(idx)->Last(0,&id);
5953 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
5954 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
5955 return (last_loc.*ip)();
5958 // get last index unsigned id on member accessed with ip
5959 unsigned int Db::ObjectLoc::
5960 last(const char *nm,unsigned int (ObjectLoc::*ip)())
5962 int idx; if_ret( idx = entity->get_index(nm) );
5963 return last(idx, ip);
5966 unsigned int Db::ObjectLoc::
5967 last(int idx,unsigned int (ObjectLoc::*ip)())
5969 ObjectLoc last_loc(*this);
5970 int id, ret = entity->index(idx)->Last(0,&id);
5971 if( ret < 0 ) return ret == errNotFound ? 0 : ret;
5972 if_ret( entity->index(idxId)->Find((void*)&id, &last_loc.obj) );
5973 return (last_loc.*ip)();
5976 #define cmpr_type(nm,ty) int Db::ObjectLoc:: \
5977 nm(const ty *ap, int asz, const ty *bp, int bsz) { \
5978 int n = asz < bsz ? asz : bsz; \
5980 if( *ap > *bp ) return 1; \
5981 if( *ap < *bp ) return -1; \
5982 ++ap; ++bp; n -= sizeof(ty); \
5984 if( asz > bsz ) return 1; \
5985 if( asz < bsz ) return -1; \
5989 cmpr_type(cmpr_char, char)
5990 cmpr_type(cmpr_uchar, unsigned char)
5991 cmpr_type(cmpr_short, short)
5992 cmpr_type(cmpr_ushort, unsigned short)
5993 cmpr_type(cmpr_int, int)
5994 cmpr_type(cmpr_uint, unsigned int)
5995 cmpr_type(cmpr_long, long)
5996 cmpr_type(cmpr_ulong, unsigned long)
5997 cmpr_type(cmpr_float, float)
5998 cmpr_type(cmpr_double, double)
6002 cmpr_media(const unsigned char *ap, int asz, const unsigned char *bp, int bsz)
6004 return mediaCmpr((uint8_t *)ap).cmpr((uint8_t *)bp);
6011 if( !idx ) Err(errInvalid);
6012 int id; if_fail( idx->Find(*this, &id) );
6013 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6020 if( !idx ) Err(errInvalid);
6021 int id; if_fail( idx->Locate(op, *this, 0, &id) );
6022 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6029 if( !idx ) Err(errInvalid);
6030 int id; if_fail( idx->First(0,&id) );
6031 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6038 if( !idx ) Err(errInvalid);
6039 int id; if_fail( idx->Last(0,&id) );
6040 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6047 if( !idx ) Err(errInvalid);
6048 int id; if_fail( idx->Next(this,&id) );
6049 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6056 if( !idx ) Err(errInvalid);
6057 int id; if_fail( idx->First(pos,0,&id) );
6058 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6065 if( !idx ) Err(errInvalid);
6066 int id; if_fail( idx->Next(pos,this,&id) );
6067 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );
6074 if( !idx ) Err(errInvalid);
6075 int id; if_fail( idx->Locate(op, *this, 0, &id) );
6076 if_err( loc.entity->index(idxId)->Find(&id, &loc.obj) );