diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index 5482104c..f914f5b9 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -759,6 +759,76 @@ describe('MiniSearch', () => { }) }) + describe('addFields', () => { + it('add fields to an existing document', () => { + const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] } + const ms = new MiniSearch(options) + const other = new MiniSearch(options) + + ms.add({ id: 1, text: 'Some quite interesting stuff' }) + ms.addFields(1, { author: 'Al et. al.', n: 5 }) + + other.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 }) + + expect(ms).toEqual(other) + }) + + it('throws an error if the document did not exist', () => { + const ms = new MiniSearch({ fields: ['text'] }) + expect(() => { + ms.addFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: no document with ID 1') + }) + + it('throws an error if adding a field that already exists', () => { + const ms = new MiniSearch({ fields: ['text'] }) + ms.add({ id: 1, text: 'Some interesting stuff' }) + expect(() => { + ms.addFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: field text already exists on document with ID 1') + }) + }) + + describe('removeFields', () => { + it('removes fields to an existing document', () => { + const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] } + const ms = new MiniSearch(options) + const other = new MiniSearch(options) + + ms.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 }) + ms.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 }) + ms.removeFields(1, { text: 'Some quite interesting stuff', n: 5 }) + + other.add({ id: 1, author: 'Al et. al.' }) + other.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 }) + + expect(ms).toEqual(other) + }) + + it('throws an error if the document did not exist', () => { + const ms = new MiniSearch({ fields: ['text'] }) + expect(() => { + ms.removeFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: no document with ID 1') + }) + + it('throws an error if removing a field that did not exist', () => { + const ms = new MiniSearch({ fields: ['text', 'author'] }) + ms.addAll([ + { id: 1, author: 'Al et. al.' }, + { id: 2 } + ]) + + expect(() => { + ms.removeFields(1, { text: 'Some interesting stuff' }) + }).toThrow('MiniSearch: field text does not exist on document with ID 1') + + expect(() => { + ms.removeFields(2, { author: 'Someone' }) + }).toThrow('MiniSearch: field author does not exist on document with ID 2') + }) + }) + describe('vacuum', () => { it('cleans up discarded documents from the index', async () => { const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] }) diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index 7350b9c4..71141680 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -588,7 +588,7 @@ export default class MiniSearch { this._enqueuedVacuum = null this._enqueuedVacuumConditions = defaultVacuumConditions - this.addFields(this._options.fields) + this.addFieldIds(this._options.fields) } /** @@ -597,8 +597,9 @@ export default class MiniSearch { * @param document The document to be indexed */ add (document: T): void { - const { extractField, tokenize, processTerm, fields, idField } = this._options + const { extractField, idField } = this._options const id = extractField(document, idField) + if (id == null) { throw new Error(`MiniSearch: document does not have ID field "${idField}"`) } @@ -610,6 +611,52 @@ export default class MiniSearch { const shortDocumentId = this.addDocumentId(id) this.saveStoredFields(shortDocumentId, document) + this.addToIndex(shortDocumentId, document, true) + } + + /** + * Adds some fields to an existing documeny + * + * The added fields should not be already present on the document, or an error + * will be thrown. + * + * ## Example: + * + * const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] }) + * + * miniSearch.add({ id: 1, title: 'Neuromancer' }) + * + * miniSearch.addFields(1, { + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * // The above is equivalent to: + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer', + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * @param id The document ID + * @param toAdd The fields to add + */ + addFields (id: any, toAdd: T): void { + const shortDocumentId = this._idToShortId.get(id) + + if (shortDocumentId == null) { + throw new Error(`MiniSearch: no document with ID ${id}`) + } + + this.saveStoredFields(shortDocumentId, toAdd) + + this.addToIndex(shortDocumentId, toAdd, false) + } + + private addToIndex (shortDocumentId: number, document: T, added: boolean) { + const { extractField, tokenize, processTerm, fields } = this._options + for (const field of fields) { const fieldValue = extractField(document, field) if (fieldValue == null) continue @@ -617,8 +664,13 @@ export default class MiniSearch { const tokens = tokenize(fieldValue.toString(), field) const fieldId = this._fieldIds[field] - const uniqueTerms = new Set(tokens).size - this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms) + const uniqueTerms = new Set(tokens) + uniqueTerms.delete('') + + if (this._fieldLength.get(shortDocumentId)?.[fieldId] != null) { + throw new Error(`MiniSearch: field ${field} already exists on document with ID ${this._documentIds.get(shortDocumentId)}`) + } + this.addFieldLength(shortDocumentId, fieldId, this._documentCount, uniqueTerms.size, added) for (const term of tokens) { const processedTerm = processTerm(term, field) @@ -689,7 +741,7 @@ export default class MiniSearch { * @param document The document to be removed */ remove (document: T): void { - const { tokenize, processTerm, extractField, fields, idField } = this._options + const { extractField, idField } = this._options const id = extractField(document, idField) if (id == null) { @@ -702,6 +754,76 @@ export default class MiniSearch { throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`) } + this.removeFromIndex(shortId, document, true) + + this._storedFields.delete(shortId) + this._documentIds.delete(shortId) + this._idToShortId.delete(id) + this._fieldLength.delete(shortId) + this._documentCount -= 1 + } + + /** + * Removes some fields from an existing documeny + * + * The original fields to be removed must be provided as the second argument. + * The removed fields should be present on the document, or an error will be + * thrown. + * + * Note: removing _all_ the fields in a document with `removeFields` is + * different from removing the whole document with [[MiniSearch.remove]] or + * [[MiniSearch.discard]]. The difference in the first case is that the + * document is still counted in [[MiniSearch.documentCount]], even if it is + * practically not searchable anymore. + * + * ## Example: + * + * const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] }) + * + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer', + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * miniSearch.removeFields(1, { + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * // The above is equivalent to: + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer' + * }) + * + * @param id The document ID + * @param toRemove The fields to remove + */ + removeFields (id: any, toRemove: T) { + const { storeFields, extractField } = this._options + const shortDocumentId = this._idToShortId.get(id) + + if (shortDocumentId == null) { + throw new Error(`MiniSearch: no document with ID ${id}`) + } + + this.removeFromIndex(shortDocumentId, toRemove, false) + + const storedFields = this._storedFields.get(shortDocumentId) + + for (const fieldName of storeFields) { + const fieldValue = extractField(toRemove, fieldName) + if (storedFields != null && fieldValue !== undefined) { + delete storedFields[fieldName] + } + } + } + + private removeFromIndex (shortId: number, document: T, removed: boolean) { + const { tokenize, processTerm, extractField, fields } = this._options + for (const field of fields) { const fieldValue = extractField(document, field) if (fieldValue == null) continue @@ -709,8 +831,13 @@ export default class MiniSearch { const tokens = tokenize(fieldValue.toString(), field) const fieldId = this._fieldIds[field] - const uniqueTerms = new Set(tokens).size - this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms) + const uniqueTerms = new Set(tokens) + uniqueTerms.delete('') + + if (this._fieldLength.get(shortId)?.[fieldId] == null) { + throw new Error(`MiniSearch: field ${field} does not exist on document with ID ${this._documentIds.get(shortId)}`) + } + this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms.size, removed) for (const term of tokens) { const processedTerm = processTerm(term, field) @@ -723,12 +850,6 @@ export default class MiniSearch { } } } - - this._storedFields.delete(shortId) - this._documentIds.delete(shortId) - this._idToShortId.delete(id) - this._fieldLength.delete(shortId) - this._documentCount -= 1 } /** @@ -1706,7 +1827,7 @@ export default class MiniSearch { /** * @ignore */ - private addFields (fields: string[]): void { + private addFieldIds (fields: string[]): void { for (let i = 0; i < fields.length; i++) { this._fieldIds[fields[i]] = i } @@ -1715,26 +1836,32 @@ export default class MiniSearch { /** * @ignore */ - private addFieldLength (documentId: number, fieldId: number, count: number, length: number): void { + private addFieldLength (documentId: number, fieldId: number, count: number, length: number, added: boolean): void { let fieldLengths = this._fieldLength.get(documentId) if (fieldLengths == null) this._fieldLength.set(documentId, fieldLengths = []) + const n = added ? 1 : 0 + fieldLengths[fieldId] = length const averageFieldLength = this._avgFieldLength[fieldId] || 0 - const totalFieldLength = (averageFieldLength * count) + length - this._avgFieldLength[fieldId] = totalFieldLength / (count + 1) + const totalFieldLength = (averageFieldLength * (count - n)) + length + this._avgFieldLength[fieldId] = totalFieldLength / count } /** * @ignore */ - private removeFieldLength (documentId: number, fieldId: number, count: number, length: number): void { + private removeFieldLength (documentId: number, fieldId: number, count: number, length: number, removed: boolean = true): void { + const fieldLengths = this._fieldLength.get(documentId) + delete fieldLengths?.[fieldId] + if (count === 1) { this._avgFieldLength[fieldId] = 0 return } + const n = removed ? 1 : 0 const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length - this._avgFieldLength[fieldId] = totalFieldLength / (count - 1) + this._avgFieldLength[fieldId] = totalFieldLength / (count - n) } /**