diff --git a/.gitignore b/.gitignore index 7514d55cc3c9a..291a63cdeef92 100644 --- a/.gitignore +++ b/.gitignore @@ -64,4 +64,5 @@ testfixtures_shared/ .ci/jobs/ # build files generated -doc-tools/missing-doclet/bin/ \ No newline at end of file +doc-tools/missing-doclet/bin/ +server/src/main/java/org/opensearch/indices/KLSPerformanceTest.java diff --git a/server/build.gradle b/server/build.gradle index 2c1b247d46a0e..d81f6df99c335 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -161,6 +161,9 @@ dependencies { api "org.ehcache:ehcache:${versions.ehcache}" api "org.slf4j:slf4j-api:${versions.slf4j}" + // roaring bitmaps + api 'org.roaringbitmap:RoaringBitmap:0.9.49' + runtimeOnly 'org.roaringbitmap:shims:0.9.49' testImplementation(project(":test:framework")) { // tests use the locally compiled version of server diff --git a/server/licenses/RoaringBitmap-0.9.49.jar.sha1 b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..919a73c074b6a --- /dev/null +++ b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 @@ -0,0 +1 @@ +b45b49c1ec5c5fc48580412d0ca635e1833110ea \ No newline at end of file diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/licenses/shims-0.9.49.jar.sha1 b/server/licenses/shims-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..9e76614ca5207 --- /dev/null +++ b/server/licenses/shims-0.9.49.jar.sha1 @@ -0,0 +1 @@ +8bd7794fbdaa9536354dd2d8d961d9503beb9460 \ No newline at end of file diff --git a/server/licenses/shims-LICENSE.txt b/server/licenses/shims-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/shims-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/shims-NOTICE.txt b/server/licenses/shims-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java b/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java deleted file mode 100644 index 55ffe22c2a339..0000000000000 --- a/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.common.cache.tier; - -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.common.bytes.BytesReference; - -import java.io.IOException; -import java.util.Arrays; - -public class BytesReferenceSerializer implements Serializer { - // This class does not get passed to ehcache itself, so it's not required that classes match after deserialization. - - public BytesReferenceSerializer() {} - @Override - public byte[] serialize(BytesReference object) { - return BytesReference.toBytes(object); - } - - @Override - public BytesReference deserialize(byte[] bytes) { - return new BytesArray(bytes); - } - - @Override - public boolean equals(BytesReference object, byte[] bytes) { - return Arrays.equals(serialize(object), bytes); - } -} diff --git a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java index fad0c5b1f8552..1fcdd6963794a 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java @@ -14,6 +14,7 @@ import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; +import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; import org.opensearch.common.metrics.CounterMetric; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -42,6 +43,7 @@ import org.ehcache.event.EventType; import org.ehcache.expiry.ExpiryPolicy; import org.ehcache.impl.config.store.disk.OffHeapDiskStoreConfiguration; +import org.opensearch.core.common.unit.ByteSizeValue; /** * @param The key type of cache entries @@ -92,6 +94,7 @@ public class EhCacheDiskCachingTier implements DiskCachingTier { // Defines how many segments the disk cache is separated into. Higher number achieves greater concurrency but // will hold that many file pointers. public final Setting DISK_SEGMENTS; + private final RBMIntKeyLookupStore keystore; private final Serializer keySerializer; private final Serializer valueSerializer; @@ -124,6 +127,11 @@ private EhCacheDiskCachingTier(Builder builder) { close(); cacheManager = buildCacheManager(); this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder); + + // IndicesRequestCache gets 1%, of which we allocate 5% to the keystore = 0.05% + // TODO: how do we change this automatically based on INDICES_CACHE_QUERY_SIZE setting? + Setting keystoreSizeSetting = Setting.memorySizeSetting(builder.settingPrefix + ".tiered.disk.keystore_size", "0.05%"); + this.keystore = new RBMIntKeyLookupStore(keystoreSizeSetting.get(this.settings).getBytes()); } private PersistentCacheManager buildCacheManager() { @@ -193,12 +201,16 @@ private CacheEventListenerConfigurationBuilder getListenerConfiguration(Builder< @Override public V get(K key) { - return valueSerializer.deserialize(cache.get(key)); + if (keystore.contains(key.hashCode())) { // Check in-memory store of key hashes to avoid unnecessary disk seek + return valueSerializer.deserialize(cache.get(key)); + } + return null; } @Override public void put(K key, V value) { cache.put(key, valueSerializer.serialize(value)); + keystore.add(key.hashCode()); } @Override @@ -211,6 +223,7 @@ public V computeIfAbsent(K key, TieredCacheLoader loader) throws Exception public void invalidate(K key) { // There seems to be a thread leak issue while calling this and then closing cache. cache.remove(key); + keystore.remove(key.hashCode()); } @Override @@ -227,6 +240,7 @@ public void setRemovalListener(RemovalListener removalListener) { @Override public void invalidateAll() { // Clear up files. + keystore.clear(); } @Override diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java new file mode 100644 index 0000000000000..dc2b7a4ba1234 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java @@ -0,0 +1,132 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.common.cache.tier.keystore; + +/** + * An interface for objects that hold an in-memory record of hashes of keys in the disk cache. + * These objects have some internal data structure which stores some transformation of added + * int values. The internal representations may have collisions. Example transformations include a modulo + * or -abs(value), or some combination. + */ +public interface KeyLookupStore { + + /** + * Transforms the input value into the internal representation for this keystore + * and adds it to the internal data structure. + * @param value The value to add. + * @return true if the value was added, false if it wasn't added because of a + * collision or if it was already present. + */ + boolean add(T value); + + /** + * Checks if the transformation of the value is in the keystore. + * @param value The value to check. + * @return true if the value was found, false otherwise. Due to collisions, false positives are + * possible, but there should be no false negatives unless forceRemove() is called. + */ + boolean contains(T value); + + /** + * Returns the transformed version of the input value, that would be used to stored it in the keystore. + * This transformation should be always be the same for a given instance. + * @param value The value to transform. + * @return The transformed value. + */ + T getInternalRepresentation(T value); + + /** + * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value) + * will never return a false negative. If removing would lead to a false negative, the value won't be removed. + * Classes may not implement safe removal. + * @param value The value to attempt to remove. + * @return true if the value was removed, false if it wasn't. + */ + boolean remove(T value); + + /** + * Returns the number of distinct values stored in the internal data structure. + * Does not count values which weren't successfully added due to collisions. + * @return The number of values + */ + int getSize(); + + /** + * Returns the number of times add() has been run, including unsuccessful attempts. + * @return The number of adding attempts. + */ + int getAddAttempts(); + + /** + * Returns the number of times add() has returned false due to a collision. + * @return The number of collisions. + */ + int getCollisions(); + + /** + * Checks if two values would collide after being transformed by this store's transformation. + * @param value1 The first value to compare. + * @param value2 The second value to compare. + * @return true if the transformations are equal, false otherwise. + */ + boolean isCollision(T value1, T value2); + + /** + * Returns an estimate of the store's memory usage. + * @return The memory usage + */ + long getMemorySizeInBytes(); + + /** + * Returns the cap for the store's memory usage. + * @return The cap, in bytes + */ + long getMemorySizeCapInBytes(); + + /** + * Returns whether the store is at memory capacity and can't accept more entries + */ + boolean isFull(); + + /** + * Deletes the internal data structure and regenerates it from the values passed in. + * Also resets all stats related to adding. + * @param newValues The keys that should be in the reset structure. + */ + void regenerateStore(T[] newValues); + + /** + * Deletes all keys and resets all stats related to adding. + */ + void clear(); +} diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java new file mode 100644 index 0000000000000..ab3055a81d4c9 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.tier.keystore; + +import org.opensearch.common.metrics.CounterMetric; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A stats holder for use in KeyLookupStore implementations. + * Getters should be exposed by the KeyLookupStore which uses it. + */ +public class KeyStoreStats { + protected CounterMetric size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected AtomicBoolean atCapacity; + protected CounterMetric numRemovalAttempts; + protected CounterMetric numSuccessfulRemovals; + + protected KeyStoreStats(long memSizeCapInBytes) { + this.size = new CounterMetric(); + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.memSizeCapInBytes = memSizeCapInBytes; + this.atCapacity = new AtomicBoolean(false); + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } +} diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..e690deae7b521 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -0,0 +1,365 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.common.cache.tier.keystore; + +import org.opensearch.common.metrics.CounterMetric; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.roaringbitmap.RoaringBitmap; + +/** + * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. + * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28. + * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be + * safely removed from the store. The fraction of collided values should be low, + * about 0.5% for a store with 10^7 values and a modulo of 2^28. + * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. + */ +public class RBMIntKeyLookupStore implements KeyLookupStore { + /** + * An enum representing modulo values for use in the keystore + */ + public enum KeystoreModuloValue { + NONE(0), // No modulo applied + TWO_TO_THIRTY_ONE((int) Math.pow(2, 31)), + TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), // recommended value + TWO_TO_TWENTY_EIGHT((int) Math.pow(2, 28)), + TWO_TO_TWENTY_SIX((int) Math.pow(2, 26)); + + private final int value; + + private KeystoreModuloValue(int value) { + this.value = value; + } + + public int getValue() { + return this.value; + } + } + + protected final int modulo; + protected final int modulo_bitmask; + // Since our modulo is always a power of two we can optimize it by ANDing with a particular bitmask + KeyStoreStats stats; + protected RoaringBitmap rbm; + private HashMap collidedIntCounters; + private HashMap> removalSets; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + private long mostRecentByteEstimate; + protected final int REFRESH_SIZE_EST_INTERVAL = 10000; + // Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms, + // so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store + // in terms of storage impact + + // Default constructor sets modulo = 2^28 + public RBMIntKeyLookupStore(long memSizeCapInBytes) { + this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes); + } + + public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { + this.modulo = moduloValue.getValue(); + if (modulo > 0) { + this.modulo_bitmask = modulo - 1; // keep last log_2(modulo) bits + } else { + this.modulo_bitmask = -1; // -1 in twos complement is all ones -> includes all bits -> same as no modulo + } + this.stats = new KeyStoreStats(memSizeCapInBytes); + this.rbm = new RoaringBitmap(); + this.collidedIntCounters = new HashMap<>(); + this.removalSets = new HashMap<>(); + this.mostRecentByteEstimate = 0L; + } + + private int transform(int value) { + return value & modulo_bitmask; + } + + private void handleCollisions(int transformedValue) { + stats.numCollisions.inc(); + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions == null) { // First time the transformedValue has had a collision + numCollisions = new CounterMetric(); + numCollisions.inc(2); + collidedIntCounters.put(transformedValue, numCollisions); // Initialize the number of colliding keys to 2 + } else { + numCollisions.inc(); + } + } + + @Override + public boolean add(Integer value) { + if (value == null) { + return false; + } + stats.numAddAttempts.inc(); + + if (getSize() % REFRESH_SIZE_EST_INTERVAL == 0) { + mostRecentByteEstimate = getMemorySizeInBytes(); + } + if (getMemorySizeCapInBytes() > 0 && mostRecentByteEstimate > getMemorySizeCapInBytes()) { + stats.atCapacity.set(true); + return false; + } + int transformedValue = transform(value); + + writeLock.lock(); + try { + boolean alreadyContained; + // saves calling transform() an additional time + alreadyContained = rbm.contains(transformedValue); + if (!alreadyContained) { + rbm.add(transformedValue); + stats.size.inc(); + return true; + } + // If the value is already pending removal, take it out of the removalList + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet != null) { + removalSet.remove(value); + // Don't increment the counter - this is handled by handleCollisions() later + if (removalSet.isEmpty()) { + removalSets.remove(transformedValue); + } + } + + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } + + /** + * Attempts to remove a value from the keystore. WARNING: Removing keys which have not been added to the keystore + * may cause undefined behavior, including future false negatives!! + * @param value The value to attempt to remove. + * @return true if the value was removed, false otherwise + */ + @Override + public boolean remove(Integer value) { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + if (!rbm.contains(transformedValue)) { // saves additional transform() call + return false; + } + // move below into write lock + stats.numRemovalAttempts.inc(); + } finally { + readLock.unlock(); + } + writeLock.lock(); + try { + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions != null) { + // This transformed value has had a collision before + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet == null) { + // First time a removal has been attempted for this transformed value + HashSet newRemovalSet = new HashSet<>(); + newRemovalSet.add(value); // Add the key value, not the transformed value, to the list of attempted removals for this transformedValue + removalSets.put(transformedValue, newRemovalSet); + numCollisions.dec(); + } else { + if (removalSet.contains(value)) { + return false; // We have already attempted to remove this value. Do nothing + } + removalSet.add(value); + numCollisions.dec(); + // If numCollisions has reached zero, we can safely remove all values in removalList + if (numCollisions.count() == 0) { + removeFromRBM(transformedValue); + collidedIntCounters.remove(transformedValue); + removalSets.remove(transformedValue); + return true; + } + } + return false; + } + // Otherwise, there's not been a collision for this transformedValue, so we can safely remove + removeFromRBM(transformedValue); + return true; + } finally { + writeLock.unlock(); + } + } + + // Helper fn for remove() + private void removeFromRBM(int transformedValue) { + rbm.remove(transformedValue); + stats.size.dec(); + stats.numSuccessfulRemovals.inc(); + } + + @Override + public int getSize() { + readLock.lock(); + try { + return (int) stats.size.count(); + } finally { + readLock.unlock(); + } + } + + @Override + public int getAddAttempts() { + return (int) stats.numAddAttempts.count(); + } + + @Override + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + @Override + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + + static double getRBMSizeMultiplier(int numEntries, int modulo) { + double effectiveModulo = (double) modulo / 2; + /* This model was created when we used % operator to calculate modulo. This has range (-modulo, modulo). + Now we have optimized to use a bitmask, which has range [0, modulo). So the number of possible values stored + is halved. */ + if (modulo == 0) { + effectiveModulo = Math.pow(2, 32); + } + double x = Math.log10((double) numEntries / effectiveModulo); + if (x < -5) { + return 7.0; + } + if (x < -2.75) { + return -2.5 * x - 5.5; + } + if (x <= 0) { + return -3.0 / 22.0 * x + 1; + } + return 1; + } + + @Override + public long getMemorySizeInBytes() { + double multiplier = getRBMSizeMultiplier((int) stats.size.count(), modulo); + return (long) (rbm.getSizeInBytes() * multiplier); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity.get(); + } + + @Override + public void regenerateStore(Integer[] newValues) { + rbm.clear(); + collidedIntCounters = new HashMap<>(); + removalSets = new HashMap<>(); + stats.size = new CounterMetric(); + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } + } + + @Override + public void clear() { + regenerateStore(new Integer[] {}); + } + + public int getNumRemovalAttempts() { + return (int) stats.numRemovalAttempts.count(); + } + + public int getNumSuccessfulRemovals() { + return (int) stats.numSuccessfulRemovals.count(); + } + + public boolean valueHasHadCollision(Integer value) { + if (value == null) { + return false; + } + return collidedIntCounters.containsKey(transform(value)); + } + + CounterMetric getNumCollisionsForValue(int value) { // package private for testing + return collidedIntCounters.get(transform(value)); + } + + HashSet getRemovalSetForValue(int value) { + return removalSets.get(transform(value)); + } +} diff --git a/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java b/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java deleted file mode 100644 index 2fc9c7cbb2756..0000000000000 --- a/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.common.cache.tier; - -import org.opensearch.common.Randomness; -import org.opensearch.common.bytes.ReleasableBytesReference; -import org.opensearch.common.util.BigArrays; -import org.opensearch.common.util.PageCacheRecycler; -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.common.bytes.CompositeBytesReference; -import org.opensearch.core.common.util.ByteArray; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.Random; - -public class BytesReferenceSerializerTests extends OpenSearchTestCase { - public void testEquality() throws Exception { - BytesReferenceSerializer ser = new BytesReferenceSerializer(); - // Test that values are equal before and after serialization, for each implementation of BytesReference. - byte[] bytesValue = new byte[1000]; - Random rand = Randomness.get(); - rand.nextBytes(bytesValue); - - BytesReference ba = new BytesArray(bytesValue); - byte[] serialized = ser.serialize(ba); - assertTrue(ser.equals(ba, serialized)); - BytesReference deserialized = ser.deserialize(serialized); - assertEquals(ba, deserialized); - - BytesReference cbr = CompositeBytesReference.of(new BytesArray(bytesValue), new BytesArray(bytesValue)); - serialized = ser.serialize(cbr); - assertTrue(ser.equals(cbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(cbr, deserialized); - - // We need the PagedBytesReference to be larger than the page size (16 KB) in order to actually create it - byte[] pbrValue = new byte[PageCacheRecycler.PAGE_SIZE_IN_BYTES * 2]; - rand.nextBytes(pbrValue); - ByteArray arr = BigArrays.NON_RECYCLING_INSTANCE.newByteArray(pbrValue.length); - arr.set(0L, pbrValue, 0, pbrValue.length); - assert !arr.hasArray(); - BytesReference pbr = BytesReference.fromByteArray(arr, pbrValue.length); - serialized = ser.serialize(pbr); - assertTrue(ser.equals(pbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(pbr, deserialized); - - BytesReference rbr = new ReleasableBytesReference(new BytesArray(bytesValue), ReleasableBytesReference.NO_OP); - serialized = ser.serialize(rbr); - assertTrue(ser.equals(rbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(rbr, deserialized); - } -} diff --git a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java index e6222a9065f94..0f7bf51b65546 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java @@ -65,41 +65,6 @@ public void testBasicGetAndPut() throws IOException { } } - public void testBasicGetAndPutBytesReference() throws Exception { - Settings settings = Settings.builder().build(); - try (NodeEnvironment env = newNodeEnvironment(settings)) { - EhCacheDiskCachingTier ehCacheDiskCachingTier = new EhCacheDiskCachingTier.Builder() - .setKeyType(String.class) - .setValueType(BytesReference.class) - .setExpireAfterAccess(TimeValue.MAX_VALUE) - .setSettings(settings) - .setThreadPoolAlias("ehcacheTest") - .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES * 2) // bigger so no evictions happen - .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache") - .setSettingPrefix(SETTING_PREFIX) - .setKeySerializer(new StringSerializer()) - .setValueSerializer(new BytesReferenceSerializer()) - .build(); - int randomKeys = randomIntBetween(10, 100); - int valueLength = 1000; - Random rand = Randomness.get(); - Map keyValueMap = new HashMap<>(); - for (int i = 0; i < randomKeys; i++) { - byte[] valueBytes = new byte[valueLength]; - rand.nextBytes(valueBytes); - keyValueMap.put(UUID.randomUUID().toString(), new BytesArray(valueBytes)); - } - for (Map.Entry entry : keyValueMap.entrySet()) { - ehCacheDiskCachingTier.put(entry.getKey(), entry.getValue()); - } - for (Map.Entry entry : keyValueMap.entrySet()) { - BytesReference value = ehCacheDiskCachingTier.get(entry.getKey()); - assertEquals(entry.getValue(), value); - } - ehCacheDiskCachingTier.close(); - } - } - public void testConcurrentPut() throws Exception { Settings settings = Settings.builder().build(); try (NodeEnvironment env = newNodeEnvironment(settings)) { diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..d9b1ece1310ca --- /dev/null +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -0,0 +1,414 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.common.cache.tier.keystore; + +import org.opensearch.common.Randomness; +import org.opensearch.common.metrics.CounterMetric; +import org.opensearch.test.OpenSearchTestCase; +import org.roaringbitmap.RoaringBitmap; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + + final int BYTES_IN_MB = 1048576; + public void testInit() { + long memCap = 100 * BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap); + assertEquals(0, kls.getSize()); + assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } + + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + assertEquals(offset, (int) kls.getInternalRepresentation(posValue)); + int negValue = -(i * modulo + offset); + kls.add(negValue); + assertEquals(modulo - offset, (int) kls.getInternalRepresentation(negValue)); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) >= 0); + } + RBMIntKeyLookupStore no_modulo_kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + Random rand = Randomness.get(); + for (int i = 0; i < 100; i++) { + int val = rand.nextInt(); + assertEquals(val, (int) no_modulo_kls.getInternalRepresentation(val)); + } + } + + public void testContains() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + RBMIntKeyLookupStore noModuloKls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + for (int i = 0; i < kls.REFRESH_SIZE_EST_INTERVAL + 1000; i++) { + // set upper bound > number of elements to trigger a size check, ensuring we test that too + kls.add(i); + assertTrue(kls.contains(i)); + noModuloKls.add(i); + assertTrue(noModuloKls.contains(i)); + } + } + + public void testAddingStatsGetters() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getAddAttempts()); + assertEquals(0, kls.getCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * moduloValue.getValue() + offset); + } + assertEquals(12, kls.getAddAttempts()); + assertEquals(9, kls.getCollisions()); + } + + public void testRegenerateStore() throws Exception { + int numToAdd = 10000000; + Random rand = Randomness.get(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + Integer[] newVals = new Integer[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } + + public void testAddingDuplicates() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0L); + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); + } + + public void testMemoryCapBlocksAdd() throws Exception { + // Now that we're using a modified version of rbm.getSizeInBytes(), which doesn't provide an inverse function, + // we have to test filling just an RBM with random test values first so that we can get the resulting memory cap limit + // to use with our modified size estimate. + // This is much noisier so the precision is lower. + + // It is necessary to use randomly distributed integers for both parts of this test, as we would do with hashes in the cache, + // as that's what our size estimator is designed for. + // If we add a run of integers, our size estimator is not valid, especially for small RBMs. + + int[] maxEntriesArr = new int[] { 1342000, 100000, 3000000}; + long[] rbmReportedSizes = new long[4]; + Random rand = Randomness.get(); + for (int j = 0; j < maxEntriesArr.length; j++) { + RoaringBitmap rbm = new RoaringBitmap(); + for (int i = 0; i < maxEntriesArr[j]; i++) { + rbm.add(rand.nextInt()); + } + rbmReportedSizes[j] = rbm.getSizeInBytes(); + } + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE; + for (int i = 0; i < maxEntriesArr.length; i++) { + double multiplier = RBMIntKeyLookupStore.getRBMSizeMultiplier(maxEntriesArr[i], moduloValue.getValue()); + long memSizeCapInBytes = (long) (rbmReportedSizes[i] * multiplier); + //long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); + for (int j = 0; j < maxEntriesArr[i] + 5000; j++) { + kls.add(rand.nextInt()); + } + assertTrue(Math.abs(maxEntriesArr[i] - kls.getSize()) < (double) maxEntriesArr[i] / 10); + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + for (int j = 0; j < 5; j++) { // test with different numbers of threads + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); + executor.shutdown(); + } + } + + public void testRemoveNoCollisions() throws Exception { + long memCap = 100L * BYTES_IN_MB; + int numToAdd = 195000; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap); + // there should be no collisions for sequential positive numbers up to modulo + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + for (int i = 0; i < 1000; i++) { + assertTrue(kls.remove(i)); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + assertEquals(numToAdd - 1000, kls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + long memCap = 100L * BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap); + for (int i = 0; i < 10; i++) { + kls.add(i); + if (i % 2 == 1) { + kls.add(-i); + assertFalse(kls.valueHasHadCollision(i)); + kls.add(i + modulo); + assertTrue(kls.valueHasHadCollision(i)); + } else { + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(15, kls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = kls.remove(i); + if (i % 2 == 1) { + // we expect a collision with i + modulo, so we can't remove + assertFalse(didRemove); + assertTrue(kls.contains(i)); + // but we should be able to remove -i + boolean didRemoveNegative = kls.remove(-i); + assertTrue(didRemoveNegative); + assertFalse(kls.contains(-i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(5, kls.getSize()); + int offset = 12; + kls.add(offset); + for (int j = 1; j < 5; j++) { + kls.add(offset + j * modulo); + } + assertEquals(6, kls.getSize()); + assertFalse(kls.remove(offset + modulo)); + assertTrue(kls.valueHasHadCollision(offset + 15 * modulo)); + assertTrue(kls.contains(offset + 17 * modulo)); + } + + public void testNullInputs() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getAddAttempts()); + Integer[] newVals = new Integer[] { 1, 17, -2, null, -4, null }; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } + + public void testRemovalLogic() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test standard sequence: add K1, K2, K3 which all transform to C, then: + // Remove K3 + // Remove K2, re-add it, re-remove it twice (duplicate should do nothing) + // Remove K1, which should finally actually remove everything + int c = -42; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + int k3 = c + 3 * modulo; + kls.add(k1); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k3)); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k2); + assertNotNull(numCollisions); + assertEquals(2, numCollisions.count()); + kls.add(k3); + assertEquals(3, numCollisions.count()); + assertEquals(1, kls.getSize()); + + boolean removed = kls.remove(k3); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k3); + assertEquals(1, removalSet.size()); + assertTrue(removalSet.contains(k3)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + kls.add(k2); + assertEquals(1, removalSet.size()); + assertFalse(removalSet.contains(k2)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k1); + assertTrue(removed); + assertNull(kls.getRemovalSetForValue(k1)); + assertNull(kls.getNumCollisionsForValue(k1)); + assertEquals(0, kls.getSize()); + } + + public void testRemovalLogicWithHashCollision() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test adding K1 twice (maybe two keys hash to K1), then removing it twice. + // We expect it to be unable to remove the last one, but there should be no false negatives. + int c = 77; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + kls.add(k1); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k1); + assertEquals(2, numCollisions.count()); + kls.add(k1); + assertEquals(3, numCollisions.count()); + + boolean removed = kls.remove(k1); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k1); + assertTrue(removalSet.contains(k1)); + assertEquals(2, numCollisions.count()); + + removed = kls.remove(k2); + assertFalse(removed); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + + removed = kls.remove(k1); + assertFalse(removed); + assertTrue(removalSet.contains(k1)); + assertEquals(1, numCollisions.count()); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k2)); + } +}