From e0ff658bc06877168d5ad5ab46ce4ab4e5afb112 Mon Sep 17 00:00:00 2001 From: skbeh <60107333+skbeh@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:19:24 +0000 Subject: [PATCH] Add unicode block and unicode script support to unicode check Allow user to block types of characters, so `defaultCharTest` is no longer needed. Also fix support to non-BMP characters by matching code points instead of `char`. --- .../api/checks/UnicodeCheck.java | 211 +++++++++++++----- .../common/configuration/Checks.java | 145 +++++++++--- .../plugin/modules/ProviderModule.java | 22 +- .../modules/checks/UnicodeTest.java | 68 +++--- 4 files changed, 327 insertions(+), 119 deletions(-) diff --git a/api/src/main/java/io/github/_4drian3d/chatregulator/api/checks/UnicodeCheck.java b/api/src/main/java/io/github/_4drian3d/chatregulator/api/checks/UnicodeCheck.java index 3060749..9e10252 100644 --- a/api/src/main/java/io/github/_4drian3d/chatregulator/api/checks/UnicodeCheck.java +++ b/api/src/main/java/io/github/_4drian3d/chatregulator/api/checks/UnicodeCheck.java @@ -1,79 +1,91 @@ package io.github._4drian3d.chatregulator.api.checks; import io.github._4drian3d.chatregulator.api.InfractionPlayer; -import io.github._4drian3d.chatregulator.api.annotations.Required; import io.github._4drian3d.chatregulator.api.enums.ControlType; import io.github._4drian3d.chatregulator.api.enums.DetectionMode; import io.github._4drian3d.chatregulator.api.enums.InfractionType; import io.github._4drian3d.chatregulator.api.result.CheckResult; import net.kyori.adventure.builder.AbstractBuilder; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; -import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.function.Predicate; +import java.util.stream.Collectors; -import static io.github._4drian3d.chatregulator.api.utils.Commands.SPACE; import static java.util.Objects.requireNonNull; +import static java.util.Objects.requireNonNullElse; /** * Check for invalid characters */ public final class UnicodeCheck implements Check { - private final char[] chars; - private final ControlType control; - private final Predicate charPredicate; - - private UnicodeCheck(char[] chars, ControlType control, DetectionMode mode) { - this.chars = chars; - this.control = control; - if (chars == null) { - this.charPredicate = UnicodeCheck::defaultCharTest; - } else { - this.charPredicate = (mode == DetectionMode.BLACKLIST) - ? c -> defaultCharTest(c) || charTest(c) - : c -> defaultCharTest(c) && !charTest(c); - } + private final @NotNull Set chars; + private final ControlType charControl; + private final Predicate charPredicate; + + private final @NotNull Set blocks; + private final ControlType blockControl; + private final Predicate blockPredicate; + + private final @NotNull Set scripts; + private final ControlType scriptControl; + private final Predicate scriptPredicate; + + private UnicodeCheck(Integer @NotNull [] chars, ControlType charControl, DetectionMode charMode, + Character.UnicodeBlock @NotNull [] blocks, ControlType blockControl, DetectionMode blockMode, + Character.UnicodeScript @NotNull [] scripts, ControlType scriptControl, DetectionMode scriptMode) { + this.chars = Set.of(chars); + this.charControl = charControl; + this.charPredicate = (charMode == DetectionMode.BLACKLIST) ? this.chars::contains : Predicate.not(this.chars::contains); + + this.blocks = Set.of(blocks); + this.blockControl = blockControl; + final Predicate blockPredicate = codePoint -> this.blocks.contains(Character.UnicodeBlock.of(codePoint)); + this.blockPredicate = (blockMode == DetectionMode.BLACKLIST) ? blockPredicate : blockPredicate.negate(); + + this.scripts = Set.of(scripts); + this.scriptControl = scriptControl; + final Predicate scriptPredicate = codePoint -> this.scripts.contains(Character.UnicodeScript.of(codePoint)); + this.scriptPredicate = (scriptMode == DetectionMode.BLACKLIST) ? scriptPredicate : scriptPredicate.negate(); } - public static boolean defaultCharTest(char c) { - if (c <= '¿') { - return false; + @Override + public @NotNull CheckResult check(@NotNull InfractionPlayer player, final @NotNull String string) { + final List codePointList = requireNonNull(string).codePoints().boxed().collect(Collectors.toList()); + boolean replaced = false; + + if (this.charControl == ControlType.BLOCK) { + if (codePointList.stream().anyMatch(this.charPredicate)) { + return CheckResult.denied(type()); + } + } else { + replaced |= codePointList.removeIf(this.charPredicate); } - return !(c <= 'þ'); - } - private boolean charTest(final char c) { - for (final char character : this.chars) { - if (character == c) { - return true; + if (this.blockControl == ControlType.BLOCK) { + if (codePointList.stream().anyMatch(this.blockPredicate)) { + return CheckResult.denied(type()); } + } else { + replaced |= codePointList.removeIf(this.blockPredicate); } - return false; - } - @Override - public @NotNull CheckResult check(@NotNull InfractionPlayer player, final @NotNull String string) { - final char[] charArray = requireNonNull(string).toCharArray(); - final Set results = new HashSet<>(charArray.length); - - for (final char character : charArray) { - if (charPredicate.test(character)) { - if (control == ControlType.BLOCK) { - return CheckResult.denied(type()); - } - results.add(character); + if (this.scriptControl == ControlType.BLOCK) { + if (codePointList.stream().anyMatch(this.scriptPredicate)) { + return CheckResult.denied(type()); } + } else { + replaced |= codePointList.removeIf(this.scriptPredicate); } - if (results.isEmpty()) { - return CheckResult.allowed(); + if (replaced) { + return CheckResult.modified(type(), codePointList.stream() + .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) + .toString()); } else { - String replaced = string; - for (final char character : results) { - replaced = replaced.replace(character, SPACE); - } - return CheckResult.modified(type(), replaced); + return CheckResult.allowed(); } } @@ -95,46 +107,125 @@ public static UnicodeCheck.Builder builder() { * Unicode Check Builder */ public static class Builder implements AbstractBuilder { - private char[] chars; - private ControlType control = ControlType.REPLACE; - private DetectionMode mode = DetectionMode.BLACKLIST; + private Integer @Nullable [] chars; + private ControlType charControl = ControlType.REPLACE; + private DetectionMode charMode = DetectionMode.BLACKLIST; + + private Character.UnicodeBlock @Nullable [] blocks; + private ControlType blockControl = ControlType.REPLACE; + private DetectionMode blockMode = DetectionMode.BLACKLIST; + + private Character.UnicodeScript @Nullable [] scripts; + private ControlType scriptControl = ControlType.REPLACE; + private DetectionMode scriptMode = DetectionMode.BLACKLIST; private Builder() { } /** - * Set the blocked characters + * Set the characters to check * * @param chars the characters * @return this */ - public Builder characters(final char @NotNull ... chars) { + public Builder characters(final @NotNull Integer @NotNull ... chars) { this.chars = chars; return this; } /** - * Set if the check can replace the infraction + * Set if the character check can replace the infraction + * + * @param control the control type + * @return this + */ + public Builder charControlType(final @NotNull ControlType control) { + this.charControl = control; + return this; + } + + /** + * Set only allowing or denying the characters + * + * @param mode the detection mode + * @return this + */ + public Builder charDetectionMode(final @NotNull DetectionMode mode) { + this.charMode = mode; + return this; + } + + /** + * Set the unicode blocks to check + * + * @param blocks the unicode blocks + * @return this + */ + public Builder blocks(final @NotNull Character.UnicodeBlock @NotNull ... blocks) { + this.blocks = blocks; + return this; + } + + /** + * Set if the unicode block check can replace the infraction * * @param control the control type * @return this */ - @Required - public Builder controlType(final @NotNull ControlType control) { - this.control = control; + public Builder blockControlType(final @NotNull ControlType control) { + this.blockControl = control; return this; } - @Required - public Builder detectionMode(final @NotNull DetectionMode mode) { - this.mode = mode; + /** + * Set only allowing or denying the unicode blocks + * + * @param mode the detection mode + * @return this + */ + public Builder blockDetectionMode(final @NotNull DetectionMode mode) { + this.blockMode = mode; + return this; + } + + /** + * Set the unicode scripts to check + * + * @param scripts the unicode scripts + * @return this + */ + public Builder scripts(final @NotNull Character.UnicodeScript @NotNull ... scripts) { + this.scripts = scripts; + return this; + } + + /** + * Set if the unicode script check can replace the infraction + * + * @param control the control type + * @return this + */ + public Builder scriptControlType(final @NotNull ControlType control) { + this.scriptControl = control; + return this; + } + + /** + * Set only allowing or denying the unicode blocks + * + * @param mode the detection mode + * @return this + */ + public Builder scriptDetectionMode(final @NotNull DetectionMode mode) { + this.scriptMode = mode; return this; } @Override public @NotNull UnicodeCheck build() { - requireNonNull(control); - return new UnicodeCheck(chars, control, mode); + return new UnicodeCheck(requireNonNullElse(chars, new Integer[]{}), charControl, charMode, + requireNonNullElse(blocks, new Character.UnicodeBlock[]{}), blockControl, blockMode, + requireNonNullElse(scripts, new Character.UnicodeScript[]{}), scriptControl, scriptMode); } } diff --git a/common/src/main/java/io/github/_4drian3d/chatregulator/common/configuration/Checks.java b/common/src/main/java/io/github/_4drian3d/chatregulator/common/configuration/Checks.java index 9a8d8c7..d1ae283 100644 --- a/common/src/main/java/io/github/_4drian3d/chatregulator/common/configuration/Checks.java +++ b/common/src/main/java/io/github/_4drian3d/chatregulator/common/configuration/Checks.java @@ -1,11 +1,13 @@ package io.github._4drian3d.chatregulator.common.configuration; import io.github._4drian3d.chatregulator.api.enums.*; +import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.spongepowered.configurate.objectmapping.ConfigSerializable; import org.spongepowered.configurate.objectmapping.meta.Comment; import org.spongepowered.configurate.objectmapping.meta.Setting; +import java.util.Arrays; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -287,70 +289,78 @@ public static class Commands extends CommandsConfig {} } @ConfigSerializable - public static class Unicode implements Toggleable, Warning, Executable, Controllable { - @Comment("Enable the Unicode Module") + public static class Unicode implements Toggleable, Warning, Executable { + @Comment("Enable the unicode module") private boolean enabled = true; @Comment(""" - Sets the form of warning - Available options: TITLE, ACTIONBAR, MESSAGE""") + Sets the form of warning + Available options: TITLE, ACTIONBAR, MESSAGE""") @Setting(value = "warning-type") private WarningType warningType = WarningType.MESSAGE; - @Comment(""" - Sets the control format - Available options: BLOCK, REPLACE""") - @Setting(value = "control-type") - private ControlType controlType = ControlType.BLOCK; - @Comment("Commands to be executed in the unicode module") private Unicode.Commands commands = new Unicode.Commands(); - @Comment("Additional Characters to allow") + @Comment("Additional characters to check") private Chars additionalChars = new Chars(); + @Comment("Additional unicode blocks to check") + private Blocks additionalBlocks = new Blocks(); + + @Comment("Additional unicode scripts to check") + private Scripts additionalScripts = new Scripts(); + @Override - public boolean enabled(){ + public boolean enabled() { return this.enabled; } @Override - public WarningType getWarningType(){ + public WarningType getWarningType() { return this.warningType; } @Override - public CommandsConfig getCommandsConfig(){ + public CommandsConfig getCommandsConfig() { return this.commands; } - @Override - public ControlType getControlType() { - return controlType; - } - public Chars additionalChars() { return this.additionalChars; } + public Blocks additionalBlocks() { + return this.additionalBlocks; + } + + public Scripts additionalScripts() { + return this.additionalScripts; + } + @ConfigSerializable public static class Commands extends CommandsConfig {} @ConfigSerializable - public static class Chars implements Toggleable { + public static class Chars implements Toggleable, Controllable { @Comment("Enables extra character check") private boolean enabled = false; @Comment("Sets the additional characters to check") - private char[] chars = {'ç'}; + private String[] chars = {"ç"}; @Comment(""" - Sets character checking mode - Modes Available: - BLACKLIST: If one of the configured characters is detected, the check will be activated as an illegal character - WHITELIST: If a character is detected as illegal but is within the configured characters, its detection as an illegal character will be skipped""") + Sets the control format + Available options: BLOCK, REPLACE""") + @Setting(value = "control-type") + private ControlType controlType = ControlType.BLOCK; + @Comment(""" + Sets character checking mode + Modes Available: + BLACKLIST: Deny characters that are within the configured characters + WHITELIST: Only allow characters that are within the configured characters""") private DetectionMode mode = DetectionMode.BLACKLIST; - public char[] chars() { - return this.chars; + public Integer[] chars() { + return Arrays.stream(this.chars).map(string -> string.codePointAt(0)).toArray(Integer[]::new); } @Override @@ -358,6 +368,87 @@ public boolean enabled() { return this.enabled; } + @Override + public ControlType getControlType() { + return controlType; + } + + public DetectionMode detectionMode() { + return this.mode; + } + + } + + @ConfigSerializable + public static class Blocks implements Toggleable, Controllable { + @Comment("Enables extra unicode block check") + private boolean enabled = false; + @Comment("Sets the additional unicode blocks to check") + private @NotNull String[] blocks = {Character.UnicodeBlock.PRIVATE_USE_AREA.toString()}; + @Comment(""" + Sets the control format + Available options: BLOCK, REPLACE""") + @Setting(value = "control-type") + private ControlType controlType = ControlType.BLOCK; + @Comment(""" + Sets unicode block checking mode + Modes Available: + BLACKLIST: Deny characters that are within one of the configured unicode blocks + WHITELIST: Only allow characters that are within one of the configured unicode blocks""") + private DetectionMode mode = DetectionMode.BLACKLIST; + + public @NotNull Character.UnicodeBlock[] blocks() { + return Arrays.stream(this.blocks).map(Character.UnicodeBlock::forName).toArray(Character.UnicodeBlock[]::new); + } + + @Override + public boolean enabled() { + return this.enabled; + } + + @Override + public ControlType getControlType() { + return controlType; + } + + public DetectionMode detectionMode() { + return this.mode; + } + + } + + @ConfigSerializable + public static class Scripts implements Toggleable, Controllable { + @Comment("Enables extra unicode script check") + private boolean enabled = false; + @Comment("Sets the additional unicode scripts to check") + private @NotNull String[] scripts = {Character.UnicodeScript.UNKNOWN.toString()}; + @Comment(""" + Sets the control format + Available options: BLOCK, REPLACE""") + @Setting(value = "control-type") + private ControlType controlType = ControlType.BLOCK; + @Comment(""" + Sets unicode script checking mode + Modes Available: + BLACKLIST: Deny characters that are within one of the configured unicode scripts + WHITELIST: Only allow characters that are within one of the configured unicode scripts""") + private DetectionMode mode = DetectionMode.BLACKLIST; + + public @NotNull Character.UnicodeScript[] scripts() { + return Arrays.stream(this.scripts).map(Character.UnicodeScript::valueOf).toArray(Character.UnicodeScript[]::new); + } + + @Override + public boolean enabled() { + return this.enabled; + } + + @Override + public ControlType getControlType() { + return controlType; + } + public DetectionMode detectionMode() { return this.mode; } diff --git a/plugin/src/main/java/io/github/_4drian3d/chatregulator/plugin/modules/ProviderModule.java b/plugin/src/main/java/io/github/_4drian3d/chatregulator/plugin/modules/ProviderModule.java index 5686fc9..8fd8a09 100644 --- a/plugin/src/main/java/io/github/_4drian3d/chatregulator/plugin/modules/ProviderModule.java +++ b/plugin/src/main/java/io/github/_4drian3d/chatregulator/plugin/modules/ProviderModule.java @@ -176,15 +176,23 @@ private CheckProvider unicode(final ConfigurationContainer final InfractionPlayerImpl infractionPlayer = (InfractionPlayerImpl) player; final Checks.Unicode config = configurationContainer.get().getUnicodeConfig(); if (infractionPlayer.isAllowed(InfractionType.UNICODE) && config.enabled()) { + UnicodeCheck.Builder builder = UnicodeCheck.builder(); if (config.additionalChars().enabled()) { - return UnicodeCheck.builder() - .characters(config.additionalChars().chars()) - .detectionMode(config.additionalChars().detectionMode()) - .controlType(config.getControlType()) - .build(); - } else { - return UnicodeCheck.builder().build(); + builder = builder.characters(config.additionalChars().chars()) + .charControlType(config.additionalChars().getControlType()) + .charDetectionMode(config.additionalChars().detectionMode()); } + if (config.additionalBlocks().enabled()) { + builder = builder.blocks(config.additionalBlocks().blocks()) + .blockControlType(config.additionalBlocks().getControlType()) + .blockDetectionMode(config.additionalBlocks().detectionMode()); + } + if (config.additionalScripts().enabled()) { + builder = builder.scripts(config.additionalScripts().scripts()) + .scriptControlType(config.additionalScripts().getControlType()) + .scriptDetectionMode(config.additionalScripts().detectionMode()); + } + return builder.build(); } return null; }; diff --git a/plugin/src/test/java/io/github/_4drian3d/chatregulator/modules/checks/UnicodeTest.java b/plugin/src/test/java/io/github/_4drian3d/chatregulator/modules/checks/UnicodeTest.java index 755d5b8..16b674d 100644 --- a/plugin/src/test/java/io/github/_4drian3d/chatregulator/modules/checks/UnicodeTest.java +++ b/plugin/src/test/java/io/github/_4drian3d/chatregulator/modules/checks/UnicodeTest.java @@ -15,13 +15,34 @@ class UnicodeTest { @Test - @DisplayName("Illegal Check") - void illegalTest() { + @DisplayName("Character Check") + void character() { + String illegal = "ñn't"; + String expected = "n't"; + + var builder = UnicodeCheck.builder() + .characters((int) 'ñ') + .charDetectionMode(DetectionMode.BLACKLIST); + + assertTrue(builder.charControlType(ControlType.BLOCK) + .build().check(TestsUtils.dummyPlayer(), illegal).isDenied()); + + CheckResult.ReplaceCheckResult replaceResult = assertInstanceOf(CheckResult.ReplaceCheckResult.class, + builder.charControlType(ControlType.REPLACE).build().check(TestsUtils.dummyPlayer(), illegal)); + assertTrue(replaceResult.shouldModify()); + assertEquals(expected, replaceResult.replaced()); + } + + @Test + @DisplayName("Unicode Block Check") + void blockTest() { String illegal = "ƕƘaea"; - String expected = " aea"; + String expected = "aea"; UnicodeCheck check = UnicodeCheck.builder() - .controlType(ControlType.REPLACE) + .blocks(Character.UnicodeBlock.LATIN_EXTENDED_B) + .blockControlType(ControlType.REPLACE) + .blockDetectionMode(DetectionMode.BLACKLIST) .build(); CheckResult result = check.check(TestsUtils.dummyPlayer(), illegal); @@ -32,36 +53,33 @@ void illegalTest() { } @Test - @DisplayName("Custom Check") - void custom() { - String illegal = "ñn't"; + @DisplayName("Unicode Script Check") + void scriptTest() { + String illegal = "\uD83D\uDE04\u2182#\u21D4\u2CC3\u250E\u23E9\u28BD\u25D7"; + String expected = "\u2182\u2CC3\u28BD"; - var result = UnicodeCheck.builder() - .characters('ñ') - .controlType(ControlType.BLOCK) - .detectionMode(DetectionMode.BLACKLIST) - .build() - .check(TestsUtils.dummyPlayer(), illegal); + UnicodeCheck check = UnicodeCheck.builder() + .scripts(Character.UnicodeScript.COMMON) + .scriptControlType(ControlType.REPLACE) + .scriptDetectionMode(DetectionMode.BLACKLIST) + .build(); + CheckResult result = check.check(TestsUtils.dummyPlayer(), illegal); - assertTrue(result.isDenied()); + assertTrue(result.shouldModify()); + + CheckResult.ReplaceCheckResult replaceResult = assertInstanceOf(CheckResult.ReplaceCheckResult.class, result); + assertEquals(expected, replaceResult.replaced()); } @ParameterizedTest @ValueSource(strings = {"todos los años", "ñandu hahahaha"}) void builderTest(String msg) { - UnicodeCheck.Builder builder = UnicodeCheck.builder() - .characters('ñ') - .controlType(ControlType.BLOCK); + UnicodeCheck.Builder builder = UnicodeCheck.builder().charControlType(ControlType.BLOCK); - assertTrue(builder.detectionMode(DetectionMode.BLACKLIST).build() + assertTrue(builder.characters((int) 'ñ').charDetectionMode(DetectionMode.BLACKLIST).build() .check(TestsUtils.dummyPlayer(), msg).isDenied()); - assertFalse(builder.detectionMode(DetectionMode.WHITELIST).build() + assertFalse(builder.characters("dhanolstuñ ".chars().boxed().toArray(Integer[]::new)) + .charDetectionMode(DetectionMode.WHITELIST).build() .check(TestsUtils.dummyPlayer(), msg).isDenied()); } - - @ParameterizedTest - @ValueSource(chars = {'a', 'h', 'b', 'g', 'e', 'd', 'l'}) - void testDefaultCharMethod(char character) { - assertFalse(UnicodeCheck.defaultCharTest(character)); - } }