9 #define freeStoreIndex indecies[freeStoreIdx]
10 #define addrStoreIndex indecies[addrStoreIdx]
11 #define freeSpaceIndex indecies[freeSpaceIdx]
12 #define addrSpaceIndex indecies[addrSpaceIdx]
13 #define entityIdIndex indecies[entityIdIdx]
14 #define entityNmIndex indecies[entityNmIdx]
16 #define noThrow std::nothrow
17 #define likely(x) (__builtin_constant_p(x) ? !!(x) : __builtin_expect(!!(x), 1))
18 #define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __builtin_expect(!!(x), 0))
19 #define lengthof(x) ((int)(sizeof(x)/sizeof(x[0])))
22 inline void *operator new(size_t n) { void *vp = malloc(n); bzero(vp,n); return vp; }
23 inline void operator delete(void *t) { free(t); }
24 inline void operator delete(void *t,size_t n) { free(t); }
25 inline void *operator new[](size_t n) { void *vp = malloc(n); bzero(vp,n); return vp; }
26 inline void operator delete[](void *t) { free(t); }
27 inline void operator delete[](void *t,size_t n) { free(t); }
35 #include <linux/futex.h>
36 #include <sys/syscall.h>
44 class nm##Obj : public Db::Obj { public:
47 class nm##Loc : public Db::ObjectLoc { public: \
48 nm##Obj *operator ->() { return (nm##Obj *)addr(); } \
49 nm##Loc(Db::Entity *ep) : Db::ObjectLoc(ep) {} \
50 nm##Loc(Db::Entity &e) : Db::ObjectLoc(&e) {}
53 #define basic_def(ty,n) class t_##n { public: ty v; t_##n() {} \
54 t_##n(const ty &i) : v(i) {} \
55 t_##n(const t_##n &i) : v(i.v) {} \
56 t_##n &operator =(const t_##n &i) { v = i.v; return *this; } \
57 ty &operator =(const ty &i) { return v = i; } \
58 ty *addr() { return &v; } int size() { return sizeof(v); } \
62 #define array_def(ty,n,l) class t_##n { public: ty v[l]; t_##n() {} \
63 t_##n(const t_##n &i) { memcpy(&v,&i.v,sizeof(v)); } \
64 t_##n(ty *i) { memcpy(&v,i,sizeof(v)); } \
65 t_##n(ty(*i)[l]) { memcpy(&v,i,sizeof(v)); } \
66 ty *operator =(const ty *i) { memcpy(&v,i,sizeof(v)); return &v[0]; } \
67 ty *addr() { return &v[0]; } int size() { return sizeof(v); } \
70 // variable array definitions
71 #define varray_def(ty,n) \
72 class t_##n { public: char *v; int l; t_##n() {} \
73 t_##n(const char *i, int sz) { v = (char *)i; l = sz; } \
74 t_##n(const unsigned char *i, int sz) { v = (char *)i; l = sz; } \
75 ty *addr() { return (ty *)v; } int size() { return l; } \
78 // string array definitions
79 #define sarray_def(ty,n) \
80 class t_##n { public: char *v; int l; t_##n() {} \
81 t_##n(const char *i, int sz) { v = (char *)i; l = sz; } \
82 t_##n(const unsigned char *i, int sz) { v = (char *)i; l = sz; } \
83 t_##n(const char *i) { t_##n(i,strlen(i)+1); } \
84 t_##n(const unsigned char *i) { t_##n(i,strlen(v)+1); } \
85 ty *addr() { return (ty *)v; } int size() { return l; } \
89 #define basic_ref(ty,n) \
90 ty *_##n() { return (*this)->v_##n.addr(); } \
91 ty n() { return *_##n(); } \
92 void n(ty i) { _wr(); *_##n() = i; } \
93 int size_##n() { return (*this)->v_##n.size(); } \
96 #define array_ref(ty,n,l) \
97 ty *_##n() { return (*this)->v_##n.addr(); } \
98 ty (&n())[l] { return *(ty (*)[l])_##n(); } \
99 void n(const ty *i,int m) { _wr(); if( m > 0 ) memcpy(n(),i,m); } \
100 void n(const ty *i) { n(i,(*this)->v_##n.size()); } \
101 int size_##n() { return (*this)->v_##n.size(); } \
103 //variable array type ref
104 #define varray_ref(ty,n) \
105 ty *_##n() { return (ty *)addr((*this)->v_##n); } \
106 ty *_##n(int sz) { size((*this)->v_##n, sz); \
107 return sz > 0 ? (ty *)addr_wr((*this)->v_##n) : 0; } \
108 ty (&n())[] { return *(ty (*)[])_##n(); } \
109 int n(const ty *v, int sz) { ty *vp=_##n(sz); \
110 if( vp && sz > 0 ) memcpy(vp, v, sz); return 0; } \
111 int size_##n() { return (*this)->v_##n.size(); } \
113 //string array type ref
114 #define sarray_ref(ty,n) \
115 ty *_##n() { return (ty *)addr((*this)->v_##n); } \
116 ty *_##n(int sz) { size((*this)->v_##n, sz); \
117 return sz > 0 ? (ty *)addr_wr((*this)->v_##n) : 0; } \
118 ty (&n())[] { return *(ty (*)[])_##n(); } \
119 int n(const ty *v, int sz) { ty *vp=_##n(sz); \
120 if( vp && sz > 0 ) memcpy(vp, v, sz); return 0; } \
121 int n(const char *v) { return n((ty *)v,strlen(v)+1); } \
122 int n(const unsigned char *v) { return n((const char *)v); } \
123 int size_##n() { return (*this)->v_##n.size(); } \
128 #define DEBUG_TIMESTAMPS
129 #define DBBUG_ERR 0x00000001
130 #define DBBUG_FAIL 0x00000002
132 //#define CHK 1 ? 0 :
149 int root_magic; // info_magic label
150 int root_info_size; // root_info blob size
154 ioAddr root_info_addr;
155 ioAddr last_info_addr;
156 transId transaction_id; // current transaction
157 ioAddr file_size; // current file size
158 pageId freePages; // free page table page list
159 int indeciesUsed; // number of active indecies
160 int pageTableUsed; // number of active pages
165 int shm_init, no_shm;
167 static void *get_mem8_t(int id);
168 static void *new_mem8_t(int size, int &id);
169 static int del_mem8_t(const void *vp, int id);
170 static void *get_shm8_t(int id);
171 static void *new_shm8_t(int size, int &id);
172 static int del_shm8_t(const void *vp, int id);
173 void *(*get_mem)(int id);
174 void *(*new_mem)(int size, int &id);
175 int (*del_mem)(const void *vp, int id);
178 uint8_t *get_uint8_t(int id, int pg=-1);
179 uint8_t *new_uint8_t(int size, int &id, int pg=-1);
180 int del_uint8_t(const void *vp, int id=-1, int pg=-1);
183 typedef int (*CmprFn)(char *,char *);
184 static int cmprFrSt(char *a, char *b);
185 static int cmprAdSt(char *a, char *b);
186 static int cmprFrSp(char *a, char *b);
187 static int cmprAdSp(char *a, char *b);
188 static int cmprOIds(char *a, char *b);
189 static int cmprStr(char *a, char *b);
190 static int cmprKey(char *a, char *b);
191 static int cmprLast(char *a, char *b);
192 static CmprFn cmprFns[];
193 typedef void (*errCallback)(Db *db, int v);
194 static const pageId NIL=-1, DDONE=-2;
212 idxId = 0, nmSz = 32,
214 keyLT=-2, keyLE=-1, keyEQ=0, keyGE=1, keyGT=2,
223 static void zincr(volatile int &v) { /* atomic(++v) */
224 asm ( " lock incl %1\n" : "+m" (v) :: );
226 static void zdecr(volatile int &v) { /* atomic(--v) */
227 asm ( " lock decl %1\n" : "+m" (v) :: );
229 static char tdecr(volatile int &v) {
230 char ret; /* ret = atomic(--loc >= 0 ? 1 : 0) */
231 asm ( " lock decl %1\n setge %0\n" : "=r" (ret), "+m" (v) :: );
234 static char tincr(volatile int &v) {
235 char ret; /* ret = atomic(++loc > 0 ? 1 : 0) */
236 asm ( " lock incl %1\n setg %0\n" : "=r" (ret), "+m" (v) :: );
239 static int zcmpxchg(int old, int val, volatile int &v) {
241 asm volatile( " lock\n cmpxchgl %2,%1\n"
242 : "+a" (ret), "+m" (v) : "r" (val) : "memory" );
245 static int zxchg(int val, volatile int &v) {
246 asm volatile( " xchgl %0,%1\n"
247 : "+r" (val), "+m" (v) :: "memory" );
250 static int zadd(int n, volatile int &v) {
252 do { val = (old=v)+n; mod = zcmpxchg(old,val,v);
253 } while( mod != old );
256 static void zmfence() {
257 asm volatile ( " mfence\n" ::: "memory" );
265 #define ZLOCK_INIT zzlock_t()
269 int zfutex(int op, int val, timespec *time=0) {
270 return syscall(SYS_futex,&loc,op,val,time,0,0);
275 int zwake(int nwakeups);
276 int zwait(int val, timespec *ts=0);
277 int zwait() { return zwait(loc); }
278 zloc_t() : loc(-1) {}
282 class zlock_t : zloc_t {
286 void *vp; asm ("movq %%fs:%c1,%q0" : "=r" (vp) : "i" (16));
288 void *vp; asm ("mov %%fs:%c1,%q0" : "=r" (vp) : "i" (16));
293 friend class zblock_t;
294 friend class zrwlock_t;
296 int zunlock(int nwakeups=1);
300 int v, ret = unlikely( (v=zcmpxchg(-1,0,loc)) >= 0 ) ? zlock(v) : 0;
305 if( unlikely(loc < 0) ) { return zemsg1(); }
307 int v, ret = unlikely( (v=zcmpxchg(0,-1,loc)) != 0 ) ? zunlock() : 0;
310 zlock_t() { owner = 0; }
314 class zblock_t : zlock_t {
316 void block() { loc = 0; zwait(0); }
317 void unblock() { loc = -1; zwake(INT_MAX); }
322 class zrwlock_t : zloc_t {
327 void enter() { zincr(loc); if( unlikely( lk.loc >= 0 ) ) zenter(); }
328 void leave() { if( unlikely( !tdecr(loc) ) ) zleave(); }
331 int locked() { return loc >= 0 ? 0 : lk.loc >= 0 ? 1 : -1; }
332 int blocked() { return lk.loc >= 0 ? 1 : 0; }
338 #define ZLOCK_INIT { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER }
342 pthread_mutex_t zlock;
344 void lock() { pthread_mutex_lock(&zlock); }
345 void unlock() { pthread_mutex_unlock(&zlock); }
346 zlock_t() { pthread_mutex_init(&zlock, 0); }
347 ~zlock_t() { pthread_mutex_destroy(&zlock); }
351 pthread_mutex_t zblock;
355 pthread_mutex_init(&zblock, 0);
356 pthread_cond_init(&cond, 0);
359 pthread_mutex_destroy(&zblock);
360 pthread_cond_destroy(&cond);
363 pthread_mutex_lock(&zblock);
364 pthread_cond_wait(&cond, &zblock);
365 pthread_mutex_unlock(&zblock);
367 void unblock() { pthread_cond_broadcast(&cond); }
370 class zrwlock_t : zlock_t {
371 volatile int blocking, users;
374 void wait() { pthread_cond_wait(&cond, &zlock); }
375 void wake() { pthread_cond_signal(&cond); }
377 void enter() { lock();
378 while( blocking ) { unlock(); lk.lock(); lk.unlock(); lock(); }
381 void leave() { lock(); if( !--users && blocking ) wake(); unlock(); }
382 void write_enter() { lk.lock(); blocking = 1;
383 lock(); while( users ) wait(); unlock();
385 void write_leave() { blocking = 0; lk.unlock(); }
386 int count() { return users; }
387 int locked() { return users ? 0 : blocking ? 1 : -1; }
388 int blocked() { return blocking; }
390 zrwlock_t() { pthread_cond_init(&cond, 0); users = 0; blocking = 0; }
391 ~zrwlock_t() { pthread_cond_destroy(&cond); }
399 locked(zlock_t &l) : lk(l) { lk.lock(); }
400 ~locked() { lk.unlock(); }
406 read_locked(zrwlock_t &l) : rwlk(l) { rwlk.enter(); }
407 ~read_locked() { rwlk.leave(); }
410 class write_blocked {
413 write_blocked(zrwlock_t &l) : rwlk(l) { rwlk.write_enter(); }
414 ~write_blocked() { rwlk.write_leave(); }
420 db_magic=0x00624474, // tDb
421 idx_magic=0x00786469, // idx
422 info_magic=0x6f666e69, // info
423 root_magic=0x746f6f72, // root
424 page_magic=0x6770, // pg
425 entity_magic = 0x6d65, // em
427 root_info_extra_pages = 2,
428 idxNil=0, idxBin=1, idxStr=2,
429 opDelete=-1, opFind=0, opInsert=1,
430 pg_unknown=0, pg_root_info=1, pg_free=2,
431 pg_entity=0x0100, pg_index=0x1000,
432 max_entity_type = pg_index-pg_entity-1,
433 max_index_type = 0x10000-pg_index-1,
434 min_heap_allocation = 32,
435 freeStoreIdx = 0, addrStoreIdx = 1,
436 freeSpaceIdx = 2, addrSpaceIdx = 3,
437 entityIdIdx = 4, entityNmIdx = 5, usrIdx = 6,
438 fl_wr=1, fl_rd=2, fl_new=4, fl_free=8,
440 defaultStoreBlockSize = 8192,
441 defaultPageTableHunkSize = 8192,
442 defaultIndexTableHunkSize = 4096,
443 defaultBinaryBlockSize = 16384,
444 defaultStringBlockSize = 4096,
445 defaultEntityPageSize = 65536,
450 errCallback err_callback;
452 transId active_transaction;
456 static const char *errMsgs[];
457 static void dmsg(int msk, const char *msg,...);
458 int _err_(int v,const char *fn,int ln);
459 int _fail_(int v,const char *fn,int ln);
460 #define err_(v) _err_(v,__func__,__LINE__)
461 #define fail_(v) _fail_(v,__func__,__LINE__)
463 static void dmsg(int msk, const char *msg,...) {}
464 int _err_(int v) { error(v); return v; }
465 int _fail_(int v) { return v; }
466 #define err_(v) _err_(v)
467 #define fail_(v) _fail_(v)
470 #define Err(v) return err_(v)
471 #define Fail(v) return fail_(v)
473 #define if_ret(fn) do{ int _ret; \
474 if(unlikely((_ret=(fn))<0)) return _ret; \
476 #define if_err(fn) do{ int _ret; \
477 if(unlikely((_ret=(fn))<0)) Err(_ret); \
479 #define if_fail(fn) do{ int _ret; \
480 if(unlikely((_ret=(fn))<0)) Fail(_ret); \
486 int owner, last_owner;
487 int info_key, info_id;
491 zlock_t infoLk; // lock dbinfo up to here
492 zrwlock_t dbRwLk; // global lock
493 zrwlock_t pgTblLk;// pageTable realloc
494 zlock_t pgAlLk; // new page pagesUsed/pagesAllocated
495 zlock_t pgLdLk; // pageLoad
496 zlock_t blkAlLk; // blockAllocate/Free
497 zlock_t objAlLk; // objectAllocate/Free
498 zrwlock_t rw_locks[max_entity_type];
500 DbInfo(int pid, int key, int id);
502 int new_info(int key);
503 int get_info(int key);
508 int attach_rw(int zrw) { return zrw ? attach_wr() : attach_rd(); }
515 unsigned short magic;
520 Page *get_Page(pageId pid) volatile { return pageTable[pid]; }
521 void set_Page(pageId pid, Page *pp) { pageTable[pid] = pp; }
522 //Page *get_page(pageId pid) { blocked by(pgTblk); return get_Page(pid); }
523 Page *get_page(pageId pid); // locked pageTable access
525 static pageId getPageId(unsigned char *&bp) {
526 int i = sizeof(pageId); pageId id;
527 for( id = *bp++; --i > 0; id |= *bp++ ) id <<= 8;
530 static void putPageId(unsigned char *&bp, pageId id) {
531 int i = sizeof(pageId) * 8;
532 while( (i -= 8) >= 0 ) *bp++ = id >> i;
534 static pageId readPageId(char *cp) {
535 unsigned char *bp = (unsigned char *)cp;
536 return getPageId(bp);
538 static void writePageId(char *cp, pageId id) {
539 unsigned char *bp = (unsigned char *)cp;
543 class keyBlock : public pagePrefix {
544 char rightLink[sizeof(pageId)];
546 int right_link() { return readPageId(&rightLink[0]); }
547 void right_link(pageId id) { writePageId(&rightLink[0],id); }
550 static int defaultBlockSizes[];
552 class IndexTypeInfo {
555 int type; /* type of index */
556 char name[nmSz]; /* index string identifier */
559 class IndexBaseType : public IndexTypeInfo {
561 IndexBaseType(int typ);
564 class IndexRecdInfo {
566 int idx; /* index in db->indecies[] */
567 int keySz, dataSz; /* sizeof key/data fields in bytes */
568 pageId rootPageId; /* index root page ID */
569 pageId rightHandSide; /* the right hand side of the tree for this index */
570 pageId freeBlocks; /* free index page list */
571 unsigned int blockSize; /* size of new index blocks */
573 long count; /* index population count */
576 class IndexBaseRecd : public IndexRecdInfo {
578 IndexBaseRecd(int typ, int zidx, int ksz, int dsz);
581 class IndexBaseStorage;
582 class IndexBaseInfo : public IndexTypeInfo, public IndexRecdInfo {
584 operator IndexBaseStorage *() { return (IndexBaseStorage *)this; }
587 class IndexBaseStorage : public IndexBaseInfo {
589 IndexBaseStorage(int typ, int zidx, int ksz, int dsz);
590 IndexBaseStorage() {}
591 ~IndexBaseStorage() {}
595 IndexBaseStorage *st;
597 Db *db; /* owner db */
598 pgRef lastAccess, lastFind; /* last operational access/find location */
599 pgRef lastInsert, lastDelete; /* last operational insert/delete location */
600 pgRef lastNext; /* last operational next location */
601 int kdSz; /* keySz + dataSz */
602 int lastOp; /* last operation, delete=-1/find=0/insert=1 */
603 int cInsCount; /* number of consecutive insertions */
604 int cFindCount; /* number of consecutive finds */
605 int cDelCount; /* number of consecutive deletions */
608 virtual int keyMap(pageId s, int(IndexBase::*fn)(pageId id)) = 0;
609 virtual int keyCopy(pageId s, IndexBase *ib) = 0;
610 int blockAllocate(pageId &pid, keyBlock *&bp);
611 int blockAllocate(pageId &pid, keyBlock *&bp, Page *&pp, char *&cp) {
612 pp = 0; cp = 0; if_err( blockAllocate(pid, bp) );
613 pp = db->get_page(pid); cp = (char *)(bp + 1);
616 int blockFree(pageId pid);
617 int blockRelease(pageId pid);
618 int deleteFreeBlocks();
619 int chkLast(pgRef &last, int &count);
620 void chkLastInsert();
621 void chkLastDelete();
625 int _err_(int v,const char *fn,int ln) { return db->_err_(v,fn,ln); }
626 int _fail_(int v,const char *fn,int ln) { return db->_fail_(v,fn,ln); }
628 int _err_(int v) { return db->_err_(v); }
629 int _fail_(int v) { return db->_fail_(v); }
631 virtual int Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData) = 0;
632 int Locate(int op,void *key,void *rtnKey,void *rtnData) {
633 return Locate(op,key,0,rtnKey,rtnData);
635 virtual int Find(void *key,void *rtnData) = 0;
636 virtual int Insert(void *key,void *data) = 0;
637 virtual int Delete(void *key) = 0;
638 virtual int First(void *rtnKey,void *rtnData) = 0;
639 virtual int Last(void *rtnKey,void *rtnData) = 0;
640 virtual int Modify(void *key,void *recd) = 0;
641 virtual int Next(pgRef &loc,void *rtnKey,void *rtnData) = 0;
642 int First(pgRef &loc,void *rtnKey,void *rtnData) {
643 if_fail( First(rtnKey, rtnData) );
647 int Next(void *rtnKey,void *rtnData) {
648 if( lastNext.id < 0 ) Fail(errInvalid);
649 return Next(lastNext,rtnKey,rtnData);
651 long Count() { return st->count; }
655 int NextLoc(pgRef &loc) { loc = lastNext; return 0; }
656 IndexBase(Db *zdb, int typ, int zidx, int ksz, int dsz);
657 IndexBase(Db *zdb, IndexBaseStorage &d);
658 virtual ~IndexBase();
661 int indeciesAllocated, indecies_sz;
663 class IndexBinaryStorage;
664 class IndexBinaryInfo {
666 operator IndexBinaryStorage *() { return (IndexBinaryStorage *)this; }
670 class IndexBinaryStorage : public IndexBinaryInfo {
672 IndexBinaryStorage(int cmprId) { this->cmprId = cmprId; }
673 IndexBinaryStorage() {}
674 ~IndexBinaryStorage() {}
677 class IndexBinary : public IndexBase {
678 IndexBinaryStorage *bst;
680 CmprFn compare; /* the key compare function type */
681 int relationship; /* key relation in keyLT..keyGT */
682 char *key; /* pointer to key argument */
683 int keyInterior; /* last insert interior/exterior */
684 int idf; /* interior delete flag */
685 char *iky, *tky; /* search/promoted temp key storage */
688 int keyMap(pageId s, int(IndexBase::*fn)(pageId id));
689 int keyCopy(pageId s, IndexBase *ib);
690 int keyBlockUnderflow(int &t,keyBlock *lbb,pageId p,keyBlock *pbb,int pi);
691 void makeKey(char *cp,char *key,int l,char *recd,int n);
692 void setLastKey(pageId s,pageId u,int k);
693 int keyLocate(pageId s, CmprFn cmpr);
694 int chkNext(pgRef &loc, char *&kp);
695 int keyNext(pgRef &loc, char *kp);
696 int chkFind(char *key, pgRef *last);
697 int keyFind(pageId s);
698 int chkInsert(void *key,void *data);
699 int keyInsert(pageId s, pageId &t);
700 int keyDelete(int &t,void *kp,pageId s,pageId p,keyBlock *pbb,int pi);
701 int keyFirst(pageId s);
702 int keyLast(pageId s);
704 int Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData);
705 int Find(void *key,void *rtnData);
706 int Insert(void *key,void *data);
707 int Delete(void *key);
708 int First(void *rtnKey,void *rtnData);
709 int Last(void *rtnKey,void *rtnData);
710 int Modify(void *key,void *recd);
711 int Next(pgRef &loc,void *rtnKey,void *rtnData);
712 int Next(void *rtnKey,void *rtnData) {
713 return IndexBase::Next(rtnKey,rtnData);
716 char *ikey() { return iky; }
717 char *tkey() { return tky; }
719 IndexBinary(Db *zdb, int zidx, int ksz, int dsz, CmprFn cmpr);
720 IndexBinary(Db *zdb, IndexBaseStorage *b, IndexBinaryStorage *d);
721 IndexBinary(IndexBase *ib, IndexBaseStorage *b, IndexBinaryStorage *d);
724 friend class IndexBinary;
726 class IndexStringStorage;
727 class IndexStringInfo {
728 char dummy; // compiler needs this for some reason
730 operator IndexStringStorage *() { return (IndexStringStorage *)this; }
733 class IndexStringStorage : public IndexStringInfo {
735 IndexStringStorage() {}
736 ~IndexStringStorage() {}
739 class IndexString : public IndexBase {
740 IndexStringStorage *sst;
742 static int ustrcmp(unsigned char *a, unsigned char *b) {
743 return strncmp((char *)a,(char *)b,keysz);
745 static void ustrcpy(unsigned char *a, unsigned char *b) {
746 strncpy((char *)a,(char *)b,keysz);
748 static void umemmove(unsigned char *&a, unsigned char *b, int n) {
749 memmove(a,b,n); a += n;
751 static int kpress(unsigned char *kp, unsigned char *lp, unsigned char *cp);
752 int split(int n, int i, pageId s, pageId &l, pageId r);
754 int keyMap(pageId s, int(IndexBase::*fn)(pageId id));
755 int keyCopy(pageId s, IndexBase *ib);
756 int chkInsert(void *key,void *data);
757 int keyInsert(pageId &t, pageId s);
758 int keyFirst(pageId s);
759 int keyLast(pageId s);
760 int keyLocate(pageId s,int &t, CmprFn cmpr);
761 int chkFind(char *key, pgRef *last, unsigned char *lkey, unsigned char *lky=0);
763 int chkNext(pgRef &loc);
764 int keyNext(pgRef &loc);
765 int keyUnderflow(pageId s, pageId &t, int k);
766 int keyOverflow(pageId s, pageId &t, int k, int o);
767 int keyRemap(pageId s, pageId &t, int k, int o);
768 int keyDelete(pageId s, pageId &t);
770 unsigned char lastAccKey[keysz], lastFndKey[keysz];
771 unsigned char lastInsKey[keysz], lastDelKey[keysz];
772 unsigned char lastNxtKey[keysz];
773 unsigned char *tky, *dky; // dataSz+keysz+1
774 unsigned char *tbfr; // 3*allocated
775 unsigned char key[keysz]; // key in use
776 int idf; /* interior delete flag */
777 int relationship; /* key relation in keyLT..keyGT */
779 int Locate(int op,void *key,CmprFn cmpr,void *rtnKey,void *rtnData);
780 int Find(void *key,void *rtnData);
781 int Insert(void *key,void *data);
782 int Delete(void *key);
783 int First(void *rtnKey,void *rtnData);
784 int Last(void *rtnKey,void *rtnData);
785 int Modify(void *key,void *recd);
786 int Next(pgRef &loc,void *rtnKey,void *rtnData);
787 int Next(void *rtnKey,void *rtnData) {
788 return IndexBase::Next(rtnKey,rtnData);
792 IndexString(Db *zdb, int zidx, int dsz);
793 IndexString(Db *zdb, IndexBaseStorage *b, IndexStringStorage *d);
794 IndexString(IndexBase *ib, IndexBaseStorage *b, IndexStringStorage *d);
797 friend class IndexString;
799 class IndexBinaryData : public IndexBaseInfo, public IndexBinaryInfo {};
800 class IndexStringData : public IndexBaseInfo, public IndexStringInfo {};
807 IndexInfo *index_info; /* image for index storage */
811 unsigned short magic;
816 class freeStoreRecord {
822 class addrStoreRecord {
828 class freeSpaceRecord {
835 class addrSpaceRecord {
846 int cacheFlush(Db *db);
847 int Get(Db *db,int &size, pgRef &ref);
848 int Load(Db *db, pageId id, int ofs, int sz);
849 void init() { loc.id = NIL; loc.offset = 0; avail = 0; }
850 void init(pageId id, int ofs, int sz) {
851 loc.id = id; loc.offset = ofs; avail = sz;
855 return alloc_cache.cacheFlush(this);
857 int cache_all_flush();
862 unsigned int allocated;
863 unsigned short flags;
875 int chk_flags(int fl) { return flags & fl; }
876 int set_flags(int fl) { return flags |= fl; }
877 int clr_flags(int fl) { return flags &= ~fl; }
886 // PageStorage access
887 int iused() { return st->used-sizeof(keyBlock); }
888 void iused(int v) { st->used = v+sizeof(keyBlock); }
889 int iallocated() { return st->allocated-sizeof(keyBlock); }
890 void iallocated(int v) { st->allocated = v+sizeof(keyBlock); }
891 PageStorage *operator ->() { return st; }
893 Page(PageStorage &d) { st = &d; init(); }
897 int pageTableAllocated, page_table_sz;
901 int pageTableHunkSize;
902 int indexTableHunkSize;
911 int cfnAllocated, cfnUsed;
912 undoData() : cfn(0), cfnAllocated(0), cfnUsed(0) {}
913 ~undoData() { delete [] cfn; cfnAllocated = cfnUsed = 0; }
918 ioAddr file_position;
920 char *bfr, *lmt, *inp;
924 int write_bfr(char *dp, int sz);
930 inline static unsigned int on_bits(unsigned int n) {
931 n = (n & 0x55555555) + ((n >> 1) & 0x55555555);
932 n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
933 n = (n & 0x0f0f0f0f) + ((n >> 4) & 0x0f0f0f0f);
934 n += n >> 8; n += n >> 16; //ok, fldsz > 5 bits
939 inline static unsigned int low_bit(unsigned int n) {
943 // bit number of lowest on bit
944 inline static unsigned int low_bit_no(unsigned int n) {
945 return on_bits(low_bit(n) - 1);
948 // highest on bit, and all lower bits set
949 inline static unsigned int high_bit_mask(unsigned int n) {
950 n |= n >> 1; n |= n >> 2;
951 n |= n >> 4; n |= n >> 8;
957 inline static unsigned int high_bit(unsigned int n) {
958 unsigned m = high_bit_mask(n);
962 // bit number of highest on bit
963 inline static unsigned int high_bit_no(unsigned int n) {
964 return on_bits(high_bit_mask(n)) - 1;
968 static inline int cpu_aligned() {
970 asm volatile( "pushf\n" "pop %0\n" : "=rm" (flags) );
971 return (flags>>18) & 1;
977 uint64_t clip(int64_t v) { return v<0 ? 0 : (uint64_t)v>vmx ? vmx : v; }
979 enum { alignBits = 8, };
981 static void init(int v=-1) { aligned = v >= 0 ? v : cpu_aligned(); }
982 void put(uint64_t v, int n);
983 void putc(uint64_t v, int n) { put(clip(v), n); }
984 void iput(int v) { put(v, 8*sizeof(int)); }
985 void lput(int64_t v) { put(v, 8*sizeof(int64_t)); }
987 int iget() { return get(8*sizeof(int)); }
988 int64_t lget() { return get(8*sizeof(int64_t)); }
989 int pos() { return idx; }
990 void seek(int i) { idx = i; }
991 void init(uint8_t *bp) { bits = bp; idx = 0; vmx = 0; }
992 uint8_t *bfr() { return this->bits; }
993 void align() { idx = (idx+alignBits-1) & ~(alignBits-1); }
994 void *addr() { return &bits[idx/8]; }
995 void set_max(uint64_t v) { vmx = v; }
997 pack() : bits(0) { idx = 0; }
998 pack(uint8_t *bp) { init(bp); }
1008 static int64_t bit_size(int len, int w);
1009 static int64_t byte_size(int len, int w);
1010 static void build(uint8_t *kp, uint8_t *bp, int w, int len) {
1011 mediaKey key(kp, bp, w, len);
1013 static int64_t count(void *kp) {
1014 mediaKey *mkp = (mediaKey *)kp;
1015 return be64toh(mkp->cnt);
1017 static int64_t set_count(void *kp, int64_t v) {
1018 mediaKey *mkp = (mediaKey *)kp;
1019 return mkp->cnt = htobe64(v);
1021 static int64_t incr_count(void *kp, int64_t dv) {
1022 return set_count(kp, count(kp) + dv);
1024 static int64_t count1(uint8_t *kp);
1025 mediaKey(uint8_t *kp, uint8_t *bp, int w, int len);
1031 int w, len, dsz, psz, spos;
1032 int64_t cnt, *dat, **dp;
1033 void get_fields(int n, int k);
1034 int dsize(int n) { int m = (n+1)/2; return m>1 ? m+dsize(m) : 1; }
1037 void load(uint8_t *kp);
1039 mediaLoad(uint8_t *bp);
1045 int w, len, dsz, psz, spos;
1046 int64_t acnt, bcnt, *adat, *bdat, **adp, **bdp;
1048 uint64_t sqr(int64_t v) { return v*v; }
1049 int dsize(int n) { int m = (n+1)/2; return m>1 ? m+dsize(m) : 1; }
1050 uint64_t chk_fields(int m, int k);
1051 int cmpr_fields(int m, int k);
1054 uint64_t chk(uint8_t *kp, uint64_t lmt=~0);
1055 int cmpr(uint8_t *kp, uint64_t lmt=~0);
1057 mediaCmpr(uint8_t *bp);
1064 typedef IndexBase *Index;
1065 int new_entity(Entity &entity, const char *nm, int sz);
1066 int get_entity(Entity &entity, const char *nm);
1067 int del_entity(Entity &entity);
1069 class Obj { /* per object storage base class */
1077 int size() { return len; }
1078 void init() { len = -1; loc.id = NIL; loc.offset = 0; }
1079 void del(Db *db) { len = -1; db->deallocate(loc); }
1081 typedef varObj Obj::*vRef;
1087 Obj *addr(pgRef &loc) {
1089 return loc.id < 0 || entity->db->addrRead(loc,op) ? 0 : (Obj *)op;
1091 Obj *addr_wr(pgRef &loc) {
1093 return loc.id < 0 || entity->db->addrWrite(loc,op) ? 0 : (Obj *)op;
1095 void _wr() { Page &pg = *entity->db->get_page(obj.id); pg->set_flags(fl_wr); }
1096 Obj *addr() { return addr(obj); }
1097 Obj *addr_wr() { return addr_wr(obj); }
1098 void *addr(varObj &vobj) { return addr(vobj.loc); }
1099 void *addr_wr(varObj &vobj) { return addr_wr(vobj.loc); }
1100 Obj *operator ->() { return (Obj *)addr(); }
1101 ObjectLoc(Entity *ep) : entity(ep) { obj.id = NIL; obj.offset = 0; }
1104 virtual int allocate(int sz=0) { return entity->allocate(*this,sz); }
1105 virtual int construct() { return entity->construct(*this); }
1106 virtual int destruct() { return entity->destruct(*this); }
1107 virtual int deallocate() { return entity->deallocate(*this); }
1108 virtual int insertCascade() { return 0; }
1109 virtual int insertProhibit() { return 0; }
1110 virtual int deleteCascade() { return 0; }
1111 virtual int deleteProhibit() { return 0; }
1112 virtual int modifyCascade() { return 0; }
1113 virtual int modifyProhibit() { return 0; }
1114 virtual int copy(ObjectLoc &dobj);
1115 int id() { ObjectLoc &oloc = *this; return oloc->id; }
1116 const int *_id() { ObjectLoc &oloc = *this; return &oloc->id; }
1117 int _id_size() { return sizeof(int); }
1118 int size(varObj &vobj) { return vobj.len; }
1119 int size(varObj &vobj, int sz);
1120 Index index(int i) { return entity->index(i); }
1124 int FindId(int id) { return index(idxId)->Find(&id,&obj); }
1125 int LocateId(int op, int id) { return index(idxId)->Locate(op,&id,0,&obj); }
1126 int FirstId() { return index(idxId)->First(0,&obj); }
1127 int LastId() { return index(idxId)->Last(0,&obj); }
1128 int NextId() { return index(idxId)->Next(0,&obj); }
1129 int FirstId(pgRef &loc) { return index(idxId)->First(loc,0,&obj); }
1130 int NextId(pgRef &loc) { return index(idxId)->Next(loc,0,&obj); }
1131 int NextLocId(pgRef &loc) { return index(idxId)->NextLoc(loc); }
1133 static int cmpr_char(const char *ap, int asz, const char *bp, int bsz);
1134 static int cmpr_uchar(const unsigned char *ap, int asz, const unsigned char *bp, int bsz);
1135 static int cmpr_short(const short *ap, int asz, const short *bp, int bsz);
1136 static int cmpr_ushort(const unsigned short *ap, int asz, const unsigned short *bp, int bsz);
1137 static int cmpr_int(const int *ap, int asz, const int *bp, int bsz);
1138 static int cmpr_uint(const unsigned int *ap, int asz, const unsigned int *bp, int bsz);
1139 static int cmpr_long(const long *ap, int asz, const long *bp, int bsz);
1140 static int cmpr_ulong(const unsigned long *ap, int asz, const unsigned long *bp, int bsz);
1141 static int cmpr_float(const float *ap, int asz, const float *bp, int bsz);
1142 static int cmpr_double(const double *ap, int asz, const double *bp, int bsz);
1144 static int cmpr_media(const unsigned char *ap, int asz, const unsigned char *bp, int bsz);
1148 int _err_(int v,const char *fn,int ln) { return entity->db->_err_(v,fn,ln); }
1149 int _fail_(int v,const char *fn,int ln) { return entity->db->_fail_(v,fn,ln); }
1151 int _err_(int v) { return entity->db->_err_(v); }
1152 int _fail_(int v) { return entity->db->_fail_(v); }
1155 int last(int idx,int (ObjectLoc::*ip)());
1156 int last(const char *nm,int (ObjectLoc::*ip)());
1157 unsigned int last(int idx,unsigned int (ObjectLoc::*ip)());
1158 unsigned int last(const char *nm,unsigned int (ObjectLoc::*ip)());
1166 Key(Index i, ObjectLoc &l, CmprFn c) : loc(l), idx(i), cmpr(c) {}
1167 Key(const char *nm, ObjectLoc &l, CmprFn c) : loc(l), cmpr(c) {
1168 idx = loc.entity->index(nm);
1170 operator void *() { return (void *)this; }
1172 int _err_(int v,const char *fn,int ln) { return loc._err_(v,fn,ln); }
1173 int _fail_(int v,const char *fn,int ln) { return loc._fail_(v,fn,ln); }
1175 int _err_(int v) { return loc._err_(v); }
1176 int _fail_(int v) { return loc._fail_(v); }
1180 class iKey : public Key {
1182 iKey(Index i, ObjectLoc &l, CmprFn c) : Key(i,l,c) {}
1183 iKey(const char *nm, ObjectLoc &l, CmprFn c) : Key(nm,l,c) {}
1184 int NextLoc(pgRef &pos) { return idx->NextLoc(pos); }
1186 int Locate(int op=keyGE);
1189 class rKey : public Key {
1191 rKey(Index i, ObjectLoc &l, CmprFn c) : Key(i,l,c) {}
1192 rKey(const char *nm, ObjectLoc &l, CmprFn c) : Key(nm,l,c) {}
1193 int NextLoc(pgRef &pos) { return idx->NextLoc(pos); }
1194 int First(); int First(pgRef &pos);
1195 int Next(); int Next(pgRef &pos);
1197 int Locate(int op=keyGE);
1201 class EntityObj : public Obj { /* entity storage */
1203 char name[nmSz]; /* string identifier */
1204 AllocCache alloc_cache; /* entity allocator cache */
1205 int maxId; /* highest ID value */
1206 int recdSz; /* record size in bytes */
1207 int count; /* number of records */
1208 int nidxs; /* index count */
1209 int indexs[1]; /* id/loc index */
1210 EntityObj(EntityObj &eobj, int eid);
1213 class EntityLoc : public ObjectLoc {
1215 EntityObj *operator ->() { return (EntityObj *)addr(); }
1216 EntityLoc(Entity *ep) : ObjectLoc(ep) {}
1219 EntityLoc &eloc = *this; _wr();
1220 return eloc->alloc_cache.cacheFlush(entity->db);
1225 typedef varObjRef *varObjs;
1230 varObjRef(varObjs &lp, vRef rp) : next(lp), ref(rp) {}
1241 operator zrwlock_t&() { return *rw_lock; }
1244 int _err_(int v,const char *fn,int ln) { return db->_err_(v,fn,ln); }
1245 int _fail_(int v,const char *fn,int ln) { return db->_fail_(v,fn,ln); }
1247 int _err_(int v) { return db->_err_(v); }
1248 int _fail_(int v) { return db->_fail_(v); }
1250 Entity(Db *const db) : db(db), ent(this), vobjs(0) {}
1253 int allocate(ObjectLoc &loc,int sz=0);
1254 int construct_(ObjectLoc &loc, int id);
1255 int construct(ObjectLoc &loc) { return construct_(loc,ent->maxId); }
1256 int destruct_(ObjectLoc &loc, int id);
1257 int destruct(ObjectLoc &loc) { return destruct_(loc, loc->id); }
1258 int deallocate(ObjectLoc &loc);
1259 int get_index(const char *nm, CmprFn cmpr=0);
1260 int key_index(const char *nm) { return get_index(nm,Db::cmprKey); }
1261 Index index(int i) { return db->indecies[ent->indexs[i]]; }
1262 Index index(const char *nm) {
1263 int idx = get_index(nm);
1264 return idx >= 0 ? index(idx) : 0;
1266 int MaxId() { return ent->maxId; }
1267 int Count() { return ent->count; }
1268 int add_index(int idx);
1269 int add_bindex(const char *nm,int keySz,int dataSz) {
1270 int idx = db->new_binary_index(nm,keySz,dataSz);
1271 if_err( idx ); if_err( add_index(idx) );
1274 int add_kindex(const char *nm) { return add_bindex(nm,0,sizeof(int)); }
1275 int add_sindex(const char *nm,int dataSz) {
1276 int idx = db->new_string_index(nm,dataSz);
1277 if_err( idx ); if_err( add_index(idx) );
1280 int del_index_(int idx);
1281 int del_index(int idx);
1282 int new_entity(const char *nm, int sz) { return db->new_entity(*this,nm,sz); }
1283 int get_entity(const char *nm) { return db->get_entity(*this,nm); }
1284 int del_entity() { return db->del_entity(*this); }
1285 void add_vref(vRef rp) { vobjs = new varObjRef(vobjs,rp); }
1289 typedef ObjectList *Objects;
1290 static void finit(Objects objects);
1296 ObjectList(Objects op, ObjectLoc &o) : next(op), obj(&o) {}
1300 int new_entity_(Entity &entity, const char *nm, int sz);
1302 int findCmprFn(CmprFn fn);
1303 int pageLoad(pageId id);
1304 int addrRead_(pgRef &loc, char *&vp, int mpsz=0) {
1305 Page &pg = *get_page(loc.id); vp = 0;
1306 if( unlikely( !pg.addr || pg->chk_flags(fl_rd) ) )
1307 if_err( pageLoad(loc.id) );
1308 vp = (char *)pg.addr+loc.offset+mpsz;
1311 int addrRead_(pgRef &loc, keyBlock *&vp, int mpsz=0) {
1312 return addrRead_(loc,*(char**)&vp, mpsz);
1314 int addrRead_(pgRef &loc, allocPrefix *&vp, int mpsz=0) {
1315 return addrRead_(loc,*(char**)&vp, mpsz);
1317 int addrRead_(pgRef &loc, pagePrefix *&vp, int mpsz=0) {
1318 return addrRead_(loc,*(char**)&vp, mpsz);
1320 int addrWrite_(pgRef &loc, char *&vp, int mpsz=0) {
1321 Page &pg = *get_page(loc.id); vp = 0;
1322 if( unlikely( !pg.addr || pg->chk_flags(fl_rd) ) )
1323 if_err( pageLoad(loc.id) );
1324 pg->set_flags(fl_wr);
1325 vp = (char *)pg.addr+loc.offset+mpsz;
1328 int addrWrite_(pgRef &loc, keyBlock *&vp, int mpsz=0) {
1329 return addrWrite_(loc,*(char**)&vp, mpsz);
1331 int addrWrite_(pgRef &loc, allocPrefix *&vp, int mpsz=0) {
1332 return addrWrite_(loc,*(char**)&vp, mpsz);
1334 int addrWrite_(pgRef &loc, pagePrefix *&vp, int mpsz=0) {
1335 return addrWrite_(loc,*(char**)&vp, mpsz);
1337 int addrRead(pgRef &loc, char *&vp) {
1338 return addrRead_(loc, vp, sizeof(allocPrefix));
1340 int addrWrite(pgRef &loc, char *&vp) {
1341 return addrWrite_(loc, vp, sizeof(allocPrefix));
1344 int objectHeapInsert(int sz,int pg,int off);
1345 int objectHeapDelete(int sz,int pg,int off);
1346 int objectAllocate(int typ, int &size, pgRef &loc,AllocCache &cache);
1347 int objectFree(pgRef &loc);
1348 int pgRefGet(int &size, pgRef &loc,AllocCache &cache);
1349 int pgRefNew(int &size, pgRef &lo,AllocCache &cache);
1350 int pgRefAllocate(int &size, pgRef &lo,AllocCache &cache);
1352 int storeInsert(long size, ioAddr io_addr);
1353 int storeDelete(long size, ioAddr io_addr);
1354 int storeGet(int &size, ioAddr &io_addr);
1355 int storeNew(int &size, ioAddr &io_addr);
1356 int storeAllocate(int &size, ioAddr &io_addr);
1357 int storeFree(int size, ioAddr io_addr);
1359 int icommit(int force);
1364 int iopen(int undo_save=1);
1369 void del_page(int id);
1370 int alloc_pageTable(int sz);
1371 void free_page(int pid);
1372 int alloc_indecies(int n);
1374 void del_index(int idx);
1375 int new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexBinaryInfo *d);
1376 int new_index(IndexBase *&ibp, IndexBaseInfo *b, IndexStringInfo *d);
1377 int indexRead(pageId pid, int df, keyBlock *&bp) {
1378 pgRef pg; pg.id = pid; pg.offset = 0;
1379 return !df ? addrRead_(pg,*(char**)&bp) : addrWrite_(pg,*(char**)&bp);
1381 int indexRead(pageId pid,int df,keyBlock *&bp, Page *&pp, char *&cp) {
1382 pp = 0; cp = 0; if_err( indexRead(pid, df, bp) );
1383 pp = get_page(pid); cp = (char *)(bp + 1);
1386 void pageDealloc(Page &pg, int mode=1);
1387 int pageRead(Page &pg);
1388 int pageWrite(Page &pg);
1389 int seek_data(ioAddr io_addr);
1390 int size_data(char *dp, int sz);
1391 int read_data(char *dp, int sz);
1392 int write_data(char *dp, int sz);
1393 int write_zeros(ioAddr io_addr);
1394 int write_padding(ioAddr io_addr);
1395 int readRootInfo(int(Db::*fn)(char *dp,int sz));
1396 int writeRootInfo(int(Db::*fn)(char *dp,int sz));
1397 ioAddr storeBlocks(ioAddr sz) { return (sz+storeBlockSize-1)/storeBlockSize; }
1398 ioAddr entityPages(ioAddr sz) { return (sz+entityPageSize-1)/entityPageSize; }
1399 int indeciesHunks(int sz) { return (sz+indexTableHunkSize-1)/indexTableHunkSize; }
1400 int pageTableHunks(int sz) { return (sz+pageTableHunkSize-1)/pageTableHunkSize; }
1401 int pagePrefixHunks(int sz) { return (sz+sizeof(pagePrefix)-1)/sizeof(pagePrefix); }
1407 int start_transaction(int undo_save=1);
1408 void enter() { db_info->dbRwLk.enter(); }
1409 void leave() { db_info->dbRwLk.leave(); }
1410 void write_enter() { db_info->dbRwLk.write_enter(); }
1411 void write_leave() { db_info->dbRwLk.write_leave(); }
1414 // 1:wr, 0:rd, -1:unlocked
1415 int is_locked() { return db_info->dbRwLk.locked(); }
1416 int is_blocked() { return db_info->dbRwLk.blocked(); }
1419 int open(int zfd, int zkey=-1);
1422 int attach(int zrw, int zfd, int zkey);
1423 int attach(int zrw=0) { return attach(fd, key, zrw); }
1426 int copy(Db *db, Objects objs);
1427 int new_binary_index(const char *nm, int ksz, int dsz, CmprFn cmpr=0);
1428 int new_string_index(const char *nm, int dsz);
1429 int get_index(const char *nm, CmprFn cmpr=0);
1430 long get_count(int r);
1431 int ins (int r, void *key, void *data);
1432 int del (int r, void *key);
1433 int find (int r, void *key, void *rtnData=0);
1434 int locate(int r, int op, void *key, CmprFn cmpr, void *rtnKey, void *rtnData=0);
1435 int locate(int r, int op, void *key, void *rtnKey, void *rtnData=0) {
1436 return locate(r,op,key,0,rtnKey,rtnData);
1438 int first (int r, void *key, void *rtnData=0);
1439 int last (int r, void *key, void *rtnData=0);
1440 int next (int r, void *key, void *rtnData=0);
1441 int nextloc(int r, pgRef &loc);
1442 int allocate(int typ, int size, pgRef &loc, AllocCache &cache);
1443 int allocate(int typ, int size, pgRef &loc) {
1444 return allocate(typ, size, loc, alloc_cache);
1446 int reallocate(int size, pgRef &loc, AllocCache &cache);
1447 int deallocate(pgRef &loc);
1448 int commit(int force=0);
1452 int transaction() { return !root_info ? -1 : root_info->transaction_id; }
1453 int64_t filesize() { return !root_info ? -1 : root_info->file_size; }
1454 int opened() { return fd>=0 ? 1 : 0; }
1455 void use_shm(int v) { no_shm = v ? 0 : 1; }
1456 int error() { return err_no; }
1458 void Error(int v,const char *msg);
1466 void admp(); void achk(); void fchk();
1469 void stats(int chk=1);