Skip to content

Commit

Permalink
cleaned up Book results & sorting & caching
Browse files Browse the repository at this point in the history
  • Loading branch information
amark committed Jan 17, 2024
1 parent 3688ba1 commit 203bd40
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 95 deletions.
62 changes: 29 additions & 33 deletions gun.js
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@
}
function got(word, page){
var b = page.book, l, has, a, i;
if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure.
if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. // TOOD: BUG!!! Not actually, but if we want to do non-exact radix-like closest-word lookups on a page, we need to check limbo & potentially sort first.
// parseless may return -1 from actual value, so we may need to test both. // TODO: Double check? I think this is correct.
if(has && word == has.word){ return (b.all[word] = has).is }
if('string' != typeof has){ has = l[got.i = i+=1] }
Expand All @@ -208,14 +208,14 @@

function from(a, t, l){
if('string' != typeof a.from){ return a.from }
//(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])).toString = join; // slot
(l = a.from = slot(t = t||a.from||'')).toString = join;
//(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])); // slot
(l = a.from = slot(t = t||a.from||''));
return l;
}
function list(each){ each = each || function(x){return x}
// TODO: BUG!!! in limbo items need to get situated before calling this, if there are any. (obviously, we shouldn't do it again if limbo has previously been sorted).
var i = 0, l = from(this)||[], w, r = [], p = this.book.parse || function(){};
while(w = l[i++]){ r.push(each(slot(w)[1],p(w)||w,this)) }
var i = 0, l = sort(this), w, r = [], p = this.book.parse || function(){};
//while(w = l[i++]){ r.push(each(slot(w)[1], p(w)||w, this)) }
while(w = l[i++]){ r.push(each(this.get(w = w.word||p(w)||w), w, this)) } // TODO: BUG! PERF?
return r;
}

Expand All @@ -230,7 +230,7 @@
// MUST be an insert:
has = b.all[word] = {word: word, is: is, page: page, substring: subt, toString: tot};
page.first = (page.first < word)? page.first : word;
if(!page.limbo){ (page.limbo = []).toString = join }
if(!page.limbo){ (page.limbo = []) }
page.limbo.push(has);
b(word, is);
page.size += size(word) + size(is);
Expand All @@ -240,24 +240,18 @@

function split(p, b){ // TODO: use closest hash instead of half.
//console.time();
// TODO: BUG???? May need to do a SORTED merge with FROM.
var i = 0, L = p.limbo, tmp;
//while(tmp = L[i++]){ }
var L = p.limbo = sort(p.limbo), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp;
var L = sort(p), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp;
//console.timeEnd();
var next = {limbo: [], first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, nl = next.limbo;
nl.toString = join;
var next = {first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, f = next.from = [];
//console.time();
while(tmp = L[i++]){
nl.push(tmp);
f.push(tmp);
next.size += (tmp.is||'').length||1;
tmp.page = next;
}
//console.timeEnd();
//console.time();
p.limbo = p.limbo.slice(0, j);
//console.timeEnd(); console.time();
p.from = p.from.slice(0, j);
p.size -= next.size;
p.sort = 0;
b.list.splice(spot(next.first, b.list)+1, 0, next); // TODO: BUG! Make sure next.first is decoded text. // TODO: BUG! spot may need parse too?
//console.timeEnd();
if(b.split){ b.split(next, p) }
Expand Down Expand Up @@ -285,27 +279,29 @@
}
function sub(i,j){ return (this.first||this.word||B.decode((from(this)||'')[0]||'')).substring(i,j) }
function to(){ return this.text = this.text || text(this) }
function join(){ return this.join('|') }
function text(p){ var l = p.limbo; // TODO: BUG??? Shouldn't any stringify cause limbo to be reset?
if(!l){ return (typeof p.from == 'string')? (p.from||'')+'' : '|'+p.from+'|' }
if(!p.from){ return p.limbo = null, '|'+((l && sort(l).join('|'))||'')+'|' } // TODO: p.limbo should be reset each time we "flush".
return '|'+mix(l, from(p), p).join('|')+'|'; // commenting out this sub-portion of code fixed a more basic test, but will probably cause a bug with a FROM + MEMORY.
}
function mix(l, f, p){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push(
var j = 0, i;
function text(p){ // PERF: read->[*] : text->"*" no edit waste 1 time perf.
if(p.limbo){ sort(p) } // TODO: BUG? Empty page meaning? undef, '', '||'?
return ('string' == typeof p.from)? p.from : '|'+(p.from||[]).join('|')+'|';
}

function sort(p, l){
var f = p.from = ('string' == typeof p.from)? slot(p.from) : p.from||[];
if(!(l = l || p.limbo)){ return f }
return mix(p).sort(function(a,b){
return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1;
});
}
function mix(p, l){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push(
l = l || p.limbo || []; p.limbo = null;
var j = 0, i, f = p.from;
while(i = l[j++]){
if(got(i.word, p)){
f[got.i] = i;
f[got.i] = i; // TODO: Trick: allow for a GUN'S HAM CRDT hook here.
} else {
f.push(i);
}
}
return sort(f);
}
function sort(l){ //return l.sort();
return l.sort(function(a,b){
return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1;
});
return f;
}

B.encode = function(d, s, u){ s = s || "|"; u = u || String.fromCharCode(32);
Expand Down
40 changes: 11 additions & 29 deletions rad.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
var log = opt.log || nope;

var has = (sT.RAD.has || (sT.RAD.has = {}))[opt.file];
if(has){ return has }
if(has){ return has } // TODO: BUG? Not reuses same instance?
var r = function rad(word, is, reply){ r.word = word;
if(!b){ start(word, is, reply); return r }
if(is === undefined || 'function' == typeof is){ // THIS IS A READ:
Expand Down Expand Up @@ -35,29 +35,18 @@
})
}

async function write(word, reply){
log('write() word', word);
function write(word, reply){
var p = b.page(word), tmp;
if(tmp = p.saving){ reply && tmp.push(reply); return } p.saving = [reply];
var S = +new Date; log(" writing", p.substring(), 'since last', S - p.saved, RAD.c, 'records', env.count++, 'mid-swap.');
if(tmp=p.saving){(reply||!tmp.length)&&(p.saving=tmp.concat(reply));return} // TODO: PERF! Rogowski points out concat is slow. BUG??? I HAVE NO clue how/why this if statement being called from recursion yet not set to 0.
p.saving = ('function' == typeof reply)? [reply] : reply || [];
get(p, function(err, disk){
if(err){ log("ERR! in write() get() cb ", err); return }
log(' get() - p.saving ', (p.saving || []).length);
if(p.from && disk){
log(" get() merge: p.from ", p.toString().slice(0, 40), " disk.length", disk?.length || 0);
}
if(err){ log("ERR! in write() get() cb ", err); return } // TODO: BUG!!! Unhandled, no callbacks called.
p.from = disk || p.from;
// p.list = p.text = p.from = 0;
// p.first = p.first.word || p.first;
tmp = p.saving; p.saving = [];
put(p, '' + p, function(err, ok){
env.count--; p.saved = +new Date; log(" ...wrote %d bytes in %dms", ('' + p).length, (p.saved = +new Date) - S);
// TODO: BUG: Confirmed! Only calls back first. Need to fix + use perf hack from old RAD.
put(p, ''+p, function(err, ok){
sT.each(tmp, function(cb){ cb && cb(err, ok) });
if(!p.saving.length){ p.saving = 0; return; } //p.saving = 0; // what?
// log({ tmp });
console.log("hm?", word, reply+'');
write(word, reply);
tmp = p.saving; p.saving = 0;
if(tmp.length){ write(word, tmp) }
});
}, p);
}
Expand All @@ -71,13 +60,13 @@
function get(file, cb){
var tmp;
if(!file){ return } // TODO: HANDLE ERROR!!
if(file.from){ cb(null, file.from); return } // IS THIS LINE SAFE? ADD TESTS!
if(file.from){ cb(null, file.from); return }
if(b&&1==b.list.length){ file.first = (file.first < '!')? file.first : '!'; } // TODO: BUG!!!! This cleanly makes for a common first file, but SAVING INVISIBLE ASCII KEYS IS COMPLETELY UNTESTED and guaranteed to have bugs/corruption issues.
if(tmp = put[file = fname(file)]){ cb(u, tmp.data); return }
if(tmp = get[file]){ tmp.push(cb); return } get[file] = [cb];
RAD.get(file, function(err, data){
tmp = get[file]||''; delete get[file];
var i = -1, f; while (f = tmp[++i]){ f(err, data) } // TODO: BUG! CPU SCHEDULE?
sT.each(tmp, function(cb){ cb && cb(err, data) });
}, opt);
};

Expand All @@ -104,7 +93,6 @@
return t;
}
b.split = function(next, page){
log("SPLIT!!!!", b.list.length);
put(' ', '' + b.list, function(err, ok){
if(err){ console.log("ERR!"); return }
// ??
Expand All @@ -117,12 +105,6 @@
//function fname(p){ return opt.file + '/' + ename(p.substring()) }
function fname(p){ return ename(p.substring()) }


function valid(word, is, reply){
if(is !== is){ reply(word +" cannot be NaN!"); return }
return true;
}

function valid(word, is, reply){
if(is !== is){ reply(word +" cannot be NaN!"); return }
return true;
Expand Down Expand Up @@ -210,7 +192,7 @@
cb(401)
}
RAD.get = async function(file, cb, opt){ get && get(file, cb, opt);
var t = (await (await fetch('http://localhost:8766/gun/1data/'+file)).text());
var t = (await (await fetch('http://localhost:8765/gun/authorsData/'+file)).text());
if('404' == t){ cb(); return }
cb(null, t);
}
Expand Down
62 changes: 29 additions & 33 deletions src/book.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function get(word){
}
function got(word, page){
var b = page.book, l, has, a, i;
if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure.
if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. // TOOD: BUG!!! Not actually, but if we want to do non-exact radix-like closest-word lookups on a page, we need to check limbo & potentially sort first.
// parseless may return -1 from actual value, so we may need to test both. // TODO: Double check? I think this is correct.
if(has && word == has.word){ return (b.all[word] = has).is }
if('string' != typeof has){ has = l[got.i = i+=1] }
Expand All @@ -70,14 +70,14 @@ function spot(word, sorted, parse){ parse = parse || spot.no || (spot.no = funct

function from(a, t, l){
if('string' != typeof a.from){ return a.from }
//(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])).toString = join; // slot
(l = a.from = slot(t = t||a.from||'')).toString = join;
//(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])); // slot
(l = a.from = slot(t = t||a.from||''));
return l;
}
function list(each){ each = each || function(x){return x}
// TODO: BUG!!! in limbo items need to get situated before calling this, if there are any. (obviously, we shouldn't do it again if limbo has previously been sorted).
var i = 0, l = from(this)||[], w, r = [], p = this.book.parse || function(){};
while(w = l[i++]){ r.push(each(slot(w)[1],p(w)||w,this)) }
var i = 0, l = sort(this), w, r = [], p = this.book.parse || function(){};
//while(w = l[i++]){ r.push(each(slot(w)[1], p(w)||w, this)) }
while(w = l[i++]){ r.push(each(this.get(w = w.word||p(w)||w), w, this)) } // TODO: BUG! PERF?
return r;
}

Expand All @@ -92,7 +92,7 @@ function set(word, is){
// MUST be an insert:
has = b.all[word] = {word: word, is: is, page: page, substring: subt, toString: tot};
page.first = (page.first < word)? page.first : word;
if(!page.limbo){ (page.limbo = []).toString = join }
if(!page.limbo){ (page.limbo = []) }
page.limbo.push(has);
b(word, is);
page.size += size(word) + size(is);
Expand All @@ -102,24 +102,18 @@ function set(word, is){

function split(p, b){ // TODO: use closest hash instead of half.
//console.time();
// TODO: BUG???? May need to do a SORTED merge with FROM.
var i = 0, L = p.limbo, tmp;
//while(tmp = L[i++]){ }
var L = p.limbo = sort(p.limbo), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp;
var L = sort(p), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp;
//console.timeEnd();
var next = {limbo: [], first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, nl = next.limbo;
nl.toString = join;
var next = {first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, f = next.from = [];
//console.time();
while(tmp = L[i++]){
nl.push(tmp);
f.push(tmp);
next.size += (tmp.is||'').length||1;
tmp.page = next;
}
//console.timeEnd();
//console.time();
p.limbo = p.limbo.slice(0, j);
//console.timeEnd(); console.time();
p.from = p.from.slice(0, j);
p.size -= next.size;
p.sort = 0;
b.list.splice(spot(next.first, b.list)+1, 0, next); // TODO: BUG! Make sure next.first is decoded text. // TODO: BUG! spot may need parse too?
//console.timeEnd();
if(b.split){ b.split(next, p) }
Expand Down Expand Up @@ -147,27 +141,29 @@ function tot(){ var tmp = {};
}
function sub(i,j){ return (this.first||this.word||B.decode((from(this)||'')[0]||'')).substring(i,j) }
function to(){ return this.text = this.text || text(this) }
function join(){ return this.join('|') }
function text(p){ var l = p.limbo; // TODO: BUG??? Shouldn't any stringify cause limbo to be reset?
if(!l){ return (typeof p.from == 'string')? (p.from||'')+'' : '|'+p.from+'|' }
if(!p.from){ return p.limbo = null, '|'+((l && sort(l).join('|'))||'')+'|' } // TODO: p.limbo should be reset each time we "flush".
return '|'+mix(l, from(p), p).join('|')+'|'; // commenting out this sub-portion of code fixed a more basic test, but will probably cause a bug with a FROM + MEMORY.
}
function mix(l, f, p){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push(
var j = 0, i;
function text(p){ // PERF: read->[*] : text->"*" no edit waste 1 time perf.
if(p.limbo){ sort(p) } // TODO: BUG? Empty page meaning? undef, '', '||'?
return ('string' == typeof p.from)? p.from : '|'+(p.from||[]).join('|')+'|';
}

function sort(p, l){
var f = p.from = ('string' == typeof p.from)? slot(p.from) : p.from||[];
if(!(l = l || p.limbo)){ return f }
return mix(p).sort(function(a,b){
return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1;
});
}
function mix(p, l){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push(
l = l || p.limbo || []; p.limbo = null;
var j = 0, i, f = p.from;
while(i = l[j++]){
if(got(i.word, p)){
f[got.i] = i;
f[got.i] = i; // TODO: Trick: allow for a GUN'S HAM CRDT hook here.
} else {
f.push(i);
}
}
return sort(f);
}
function sort(l){ //return l.sort();
return l.sort(function(a,b){
return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1;
});
return f;
}

B.encode = function(d, s, u){ s = s || "|"; u = u || String.fromCharCode(32);
Expand Down
19 changes: 19 additions & 0 deletions test/rad/book.js
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,25 @@ var names = ["Adalard","Adora","Aia","Albertina","Alfie","Allyn","Amabil","Ammam

});

describe('API usage checks', function(){
var opt = {file: 'search'}
var search = RAD(opt);
var b = Book();
it('read results from in-memory data', async done => {
b('hello', '1data');
var r = b.page('wat').read();
expect(r).to.be.eql(['1data']);
b('hello', '1dataZ');
r = b.page('wat').read();
expect(r).to.be.eql(['1dataZ']);
b('new', '2data');
r = b.page('wat').read();
expect(r).to.be.eql(['1dataZ','2data']);
done();
});

});

console.log("Performance Tests: 2023 Nov 12, 60M put/sec, 120M get/sec, 1M get/sec with splits.");

});
Expand Down

0 comments on commit 203bd40

Please sign in to comment.