@@ -177,11 +177,14 @@ const noElementMarker = Symbol();
177177const generationKey = Symbol ( ) ;
178178
179179// Some commonly used constants throughout the code.
180- const emptyNode = /* @__PURE__ */ makeNode ( 0 ) ;
180+ const emptyNode = /* @__PURE__ */ newNode ( 0 ) ;
181181const emptyDict = /* @__PURE__ */ new Dict ( 0 , emptyNode ) ;
182182const errorNil = /* @__PURE__ */ Result$Error ( undefined ) ;
183183
184- function makeNode ( generation ) {
184+ function makeNode ( generation , datamap , nodemap , data ) {
185+ // The order of fields is important, as they define the order `isEqual` will
186+ // compare our fields. Putting the bitmaps first means that equality can
187+ // early-out if the bitmaps are not equal.
185188 return {
186189 // A node is a high-arity (32 in practice) hybrid tree node.
187190 // Hybrid means that it stores data directly as well as pointers to child nodes.
@@ -196,8 +199,8 @@ function makeNode(generation) {
196199 // suffix (least significant bits first) encoding.
197200 // For example, if the last 5 bits of the hash are 1101, the bit to check for
198201 // that value is the 13th bit.
199- datamap : 0 ,
200- nodemap : 0 ,
202+ datamap,
203+ nodemap,
201204 // The slots itself are stored in a single contiguous array that contains
202205 // both direct k/v-pairs and child nodes.
203206 //
@@ -215,12 +218,17 @@ function makeNode(generation) {
215218 //
216219 // Children are stored in reverse order to avoid having to store or calculate an
217220 // "offset" value to skip over the direct children.
218- data : [ ] ,
221+ data,
219222 // The generation is used to track which nodes need to be copied during transient updates.
223+ // Using a symbol here makes `isEqual` ignore this field.
220224 [ generationKey ] : generation ,
221225 } ;
222226}
223227
228+ function newNode ( generation ) {
229+ return makeNode ( generation , 0 , 0 , [ ] ) ;
230+ }
231+
224232/**
225233 * Copies a node and its data array if it's from another generation, making it safe
226234 * to mutate the node.
@@ -230,14 +238,50 @@ function copyNode(node, generation) {
230238 return node ;
231239 }
232240
233- const { datamap, nodemap, data } = node ;
241+ const newData = node . data . slice ( 0 ) ;
242+ return makeNode ( generation , node . datamap , node . nodemap , newData ) ;
243+ }
234244
235- return {
236- datamap,
237- nodemap,
238- data : data . slice ( 0 ) ,
239- [ generationKey ] : generation ,
240- } ;
245+ /**
246+ * Copies a node if needed ands sets a new value.
247+ */
248+ function copyAndSet ( node , generation , idx , val ) {
249+ if ( node . data [ idx ] === val ) {
250+ return node ;
251+ }
252+
253+ // Using copyNode is faster than a specialised implementation.
254+ node = copyNode ( node , generation ) ;
255+ node . data [ idx ] = val ;
256+ return node ;
257+ }
258+
259+ /**
260+ * Copies a node if needed, and then inserts a new key-value pair.
261+ */
262+ function copyAndInsertPair ( node , generation , bit , idx , key , val ) {
263+ const data = node . data ;
264+ const length = data . length ;
265+
266+ // the fastest way to insert a pair is to always copy.
267+ const newData = new Array ( length + 2 ) ;
268+
269+ let readIndex = 0 ;
270+ let writeIndex = 0 ;
271+
272+ while ( readIndex < idx ) newData [ writeIndex ++ ] = data [ readIndex ++ ] ;
273+ newData [ writeIndex ++ ] = key ;
274+ newData [ writeIndex ++ ] = val ;
275+ while ( readIndex < length ) newData [ writeIndex ++ ] = data [ readIndex ++ ] ;
276+
277+ return makeNode ( generation , node . datamap | bit , node . nodemap , newData ) ;
278+ }
279+
280+ function copyAndRemovePair ( node , generation , bit , idx ) {
281+ node = copyNode ( node , generation ) ;
282+ node . datamap ^= bit ;
283+ node . data . splice ( idx , 2 ) ;
284+ return node ;
241285}
242286
243287export function make ( ) {
@@ -267,13 +311,13 @@ export function has(dict, key) {
267311
268312function lookup ( node , key , hash ) {
269313 for ( let shift = 0 ; shift < 32 ; shift += bits ) {
270- const { data, datamap , nodemap } = node ;
314+ const data = node . data ;
271315 const bit = hashbit ( hash , shift ) ;
272316
273- if ( nodemap & bit ) {
317+ if ( node . nodemap & bit ) {
274318 // we found our hash inside the nodemap, so we can continue our search there.
275- node = data [ data . length - 1 - index ( nodemap , bit ) ] ;
276- } else if ( datamap & bit ) {
319+ node = data [ data . length - 1 - index ( node . nodemap , bit ) ] ;
320+ } else if ( node . datamap & bit ) {
277321 // we store this hash directly!
278322 //
279323 // this also means that there are no other values with the same
@@ -282,7 +326,7 @@ function lookup(node, key, hash) {
282326 // We still need to check if the key matches, but if it does we know for
283327 // sure that this is the correct value, and if it doesn't that we don't
284328 // contain the value in question.
285- const dataidx = Math . imul ( index ( datamap , bit ) , 2 ) ;
329+ const dataidx = Math . imul ( index ( node . datamap , bit ) , 2 ) ;
286330 return isEqual ( key , data [ dataidx ] ) ? data [ dataidx + 1 ] : noElementMarker ;
287331 } else {
288332 // if the hash bit is not set in neither bitmaps, we immediately know that
@@ -381,6 +425,9 @@ export function insert(dict, key, value) {
381425 globalTransient . size = dict . size ;
382426
383427 const root = doPut ( globalTransient , dict . root , key , value , getHash ( key ) , 0 ) ;
428+ if ( root === dict . root ) {
429+ return dict ;
430+ }
384431
385432 return new Dict ( globalTransient . size , root ) ;
386433}
@@ -398,89 +445,82 @@ export function put(key, value, transient) {
398445}
399446
400447function doPut ( transient , node , key , value , hash , shift ) {
401- node = copyNode ( node , transient . generation ) ;
402- const { data , datamap , nodemap } = node ;
448+ const data = node . data ;
449+ const generation = transient . generation ;
403450
404451 // 1. Overflow Node
405452 // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match.
406453 if ( shift > 32 ) {
407454 for ( let i = 0 ; i < data . length ; i += 2 ) {
408455 if ( isEqual ( key , data [ i ] ) ) {
409- data [ i + 1 ] = value ;
410- return node ;
456+ return copyAndSet ( node , generation , i + 1 , value ) ;
411457 }
412458 }
413459
414- data . push ( key , value ) ;
415460 transient . size += 1 ;
416-
417- return node ;
461+ return copyAndInsertPair ( node , generation , 0 , data . length , key , value ) ;
418462 }
419463
420464 const bit = hashbit ( hash , shift ) ;
421465
422466 // 2. Child Node
423467 // We have to check first if there is already a child node we have to traverse to.
424- if ( ( nodemap & bit ) !== 0 ) {
425- const nodeidx = data . length - 1 - index ( nodemap , bit ) ;
468+ if ( node . nodemap & bit ) {
469+ const nodeidx = data . length - 1 - index ( node . nodemap , bit ) ;
426470 const child = data [ nodeidx ] ;
427- data [ nodeidx ] = doPut ( transient , child , key , value , hash , shift + bits ) ;
428- return node ;
471+ const newChild = doPut ( transient , child , key , value , hash , shift + bits ) ;
472+ return copyAndSet ( node , generation , nodeidx , newChild ) ;
429473 }
430474
431475 // 3. New Data Node
432476 // No child node and no data node exists yet, so we can potentially just insert a new value.
433- const dataidx = Math . imul ( index ( datamap , bit ) , 2 ) ;
434- if ( ( datamap & bit ) === 0 ) {
435- node . datamap |= bit ;
436- data . splice ( dataidx , 0 , key , value ) ;
477+ const dataidx = Math . imul ( index ( node . datamap , bit ) , 2 ) ;
478+ if ( ( node . datamap & bit ) === 0 ) {
437479 transient . size += 1 ;
438-
439- return node ;
480+ return copyAndInsertPair ( node , generation , bit , dataidx , key , value ) ;
440481 }
441482
442483 // 4. Existing Data Node
443484 // We have a match that we can update, or remove.
444485 if ( isEqual ( key , data [ dataidx ] ) ) {
445- data [ dataidx + 1 ] = value ;
446- return node ;
486+ return copyAndSet ( node , generation , dataidx + 1 , value ) ;
447487 }
448488
449489 // 5. Collision
450490 // There is no child node, but a data node with the same hash, but with a different key.
451491 // To resolve this, we push both nodes down one level.
452- let child = makeNode ( transient . generation ) ;
453- child = doPut ( transient , child , key , value , hash , shift + bits ) ;
454-
455492 const otherKey = data [ dataidx ] ;
456- child = doPut (
457- transient ,
458- child ,
459- otherKey ,
460- data [ dataidx + 1 ] ,
461- getHash ( otherKey ) ,
462- shift + bits ,
463- ) ;
493+ const otherVal = data [ dataidx + 1 ] ;
494+ const otherHash = getHash ( otherKey ) ;
495+ const childShift = shift + bits ;
496+
497+ let child = emptyNode ;
498+ child = doPut ( transient , child , key , value , hash , childShift ) ;
499+ child = doPut ( transient , child , otherKey , otherVal , otherHash , childShift ) ;
500+
464501 // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap.
465502 transient . size -= 1 ;
466503
467- node . datamap ^= bit ;
468- node . nodemap |= bit ;
469-
470504 // remove the old data pair, and insert the new child node.
471- // because we remove 2 elements first, our indices are off-by-one!
472- // When calculating the nodeidx, we measure with the length including those
473- // 2 extra elements, but missing the one we haven't inserted yet, so we have
474- // to correct for both of these with (1-2) = -1
505+ const length = data . length ;
506+ const nodeidx = length - 1 - index ( node . nodemap , bit ) ;
475507
476- const nodeidx = data . length - 1 - index ( nodemap , bit ) ;
508+ // writing these loops in javascript instead of a combination of splices
509+ // turns out to be faster. Copying always turned out to be faster.
510+ const newData = new Array ( length - 1 ) ;
477511
478- data . splice ( dataidx , 2 ) ;
479- data . splice ( nodeidx - 1 , 0 , child ) ;
512+ let readIndex = 0 ;
513+ let writeIndex = 0 ;
480514
481- return node ;
482- }
515+ // [0..dataidx, skip 2 elements, ..nodeidx, newChild, ..rest]
516+ while ( readIndex < dataidx ) newData [ writeIndex ++ ] = data [ readIndex ++ ] ;
517+ readIndex += 2 ;
518+ while ( readIndex <= nodeidx ) newData [ writeIndex ++ ] = data [ readIndex ++ ] ;
519+ newData [ writeIndex ++ ] = child ;
520+ while ( readIndex < length ) newData [ writeIndex ++ ] = data [ readIndex ++ ] ;
483521
522+ return makeNode ( generation , node . datamap ^ bit , node . nodemap | bit , newData ) ;
523+ }
484524/**
485525 * Consume a transient, removing a key if it exists.
486526 * Returns a new transient.
@@ -491,70 +531,63 @@ export function remove(key, transient) {
491531}
492532
493533function doRemove ( transient , node , key , hash , shift ) {
494- const { data, datamap, nodemap } = node ;
534+ const data = node . data ;
535+ const generation = transient . generation ;
495536
496537 // 1. Overflow Node
497538 // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match.
498539 if ( shift > 32 ) {
499540 for ( let i = 0 ; i < data . length ; i += 2 ) {
500541 if ( isEqual ( key , data [ i ] ) ) {
501- node = copyNode ( node , transient . generation ) ;
502- node . data . splice ( i , 2 ) ;
503542 transient . size -= 1 ;
504- break ;
543+ return copyAndRemovePair ( node , generation , 0 , i ) ;
505544 }
506545 }
507546
508547 return node ;
509548 }
510549
511550 const bit = hashbit ( hash , shift ) ;
512- const nodeidx = data . length - 1 - index ( nodemap , bit ) ;
513- const dataidx = Math . imul ( index ( datamap , bit ) , 2 ) ;
551+ const dataidx = Math . imul ( index ( node . datamap , bit ) , 2 ) ;
514552
515553 // 2. Child Node
516554 // We have to check first if there is already a child node we have to traverse to.
517- if ( ( nodemap & bit ) !== 0 ) {
555+ if ( ( node . nodemap & bit ) !== 0 ) {
556+ const nodeidx = data . length - 1 - index ( node . nodemap , bit ) ;
557+
518558 const oldChild = data [ nodeidx ] ;
519559 const newChild = doRemove ( transient , oldChild , key , hash , shift + bits ) ;
520- // no child entry found, we don't have to update this path.
521- if ( newChild === oldChild ) {
522- return node ;
523- }
524560
525561 // the node did change, so let's copy to incorporate that change.
526- node = copyNode ( node , transient . generation ) ;
527562 if ( newChild . nodemap !== 0 || newChild . data . length > 2 ) {
528- node . data [ nodeidx ] = newChild ;
529- } else {
530- // this node only has a single data (k/v-pair) child.
531- // to restore the CHAMP invariant, we "pull" that pair up into ourselves.
532- // this ensures that every tree stays in its single optimal representation,
533- // and allows dicts to be structurally compared.
534- node . datamap |= bit ;
535- node . nodemap ^= bit ;
536- // NOTE: the order here is important to avoid mutation bugs!
537- // Remove the old child node, and insert the data pair into ourselves.
538- node . data . splice ( nodeidx , 1 ) ;
539- node . data . splice ( dataidx , 0 , newChild . data [ 0 ] , newChild . data [ 1 ] ) ;
563+ return copyAndSet ( node , generation , nodeidx , newChild ) ;
540564 }
541565
566+ // when writing, it looks like since we delete first it's not too bad.
567+ node = copyNode ( node , generation ) ;
568+ // this node only has a single data (k/v-pair) child.
569+ // to restore the CHAMP invariant, we "pull" that pair up into ourselves.
570+ // this ensures that every tree stays in its single optimal representation,
571+ // and allows dicts to be structurally compared.
572+ node . datamap |= bit ;
573+ node . nodemap ^= bit ;
574+ // NOTE: the order here is important to avoid mutation bugs!
575+ // Remove the old child node, and insert the data pair into ourselves.
576+ node . data . splice ( nodeidx , 1 ) ;
577+ node . data . splice ( dataidx , 0 , newChild . data [ 0 ] , newChild . data [ 1 ] ) ;
578+
542579 return node ;
543580 }
544581
545582 // 3. Data Node
546583 // There is no data entry here, or it is a prefix for a different key
547- if ( ( datamap & bit ) === 0 || ! isEqual ( key , data [ dataidx ] ) ) {
584+ if ( ( node . datamap & bit ) === 0 || ! isEqual ( key , data [ dataidx ] ) ) {
548585 return node ;
549586 }
550587
551588 // we found a data entry that we can delete.
552- node = copyNode ( node , transient . generation ) ;
553- node . data . splice ( dataidx , 2 ) ;
554- node . datamap ^= bit ;
555589 transient . size -= 1 ;
556-
557- return node ;
590+ return copyAndRemovePair ( node , generation , bit , dataidx ) ;
558591}
559592
560593export function update_with ( key , fun , value , transient ) {
@@ -578,9 +611,10 @@ export function map(dict, fun) {
578611
579612 while ( queue . length ) {
580613 // order doesn't matter, so we can use push/pop for faster array usage.
581- const { data, datamap } = queue . pop ( ) ;
614+ const node = queue . pop ( ) ;
615+ const data = node . data ;
582616 // every node contains popcount(datamap) direct entries
583- const edgesStart = Math . imul ( popcount ( datamap ) , 2 ) ;
617+ const edgesStart = Math . imul ( popcount ( node . datamap ) , 2 ) ;
584618 for ( let i = 0 ; i < edgesStart ; i += 2 ) {
585619 // we copied the node while queueing it, so direct mutation here is safe.
586620 data [ i + 1 ] = fun ( data [ i ] , data [ i + 1 ] ) ;
@@ -601,9 +635,10 @@ export function fold(dict, state, fun) {
601635
602636 while ( queue . length ) {
603637 // order doesn't matter, so we can use push/pop for faster array usage.
604- const { data, datamap } = queue . pop ( ) ;
638+ const node = queue . pop ( ) ;
639+ const data = node . data ;
605640 // every node contains popcount(datamap) direct entries
606- const edgesStart = Math . imul ( popcount ( datamap ) , 2 ) ;
641+ const edgesStart = Math . imul ( popcount ( node . datamap ) , 2 ) ;
607642 for ( let i = 0 ; i < edgesStart ; i += 2 ) {
608643 state = fun ( state , data [ i ] , data [ i + 1 ] ) ;
609644 }
0 commit comments