From 8151b498eb1f9988268a3fd2e73a1a2627405c49 Mon Sep 17 00:00:00 2001 From: LeanBitLab <245915690+LeanBitLab@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:47:12 +0000 Subject: [PATCH 1/2] Add tests for emoji detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added `testGetFullEmojiAtEndWithAllAvailableEmojis` to `StringUtilsTest.kt` to iterate over all emojis in assets and verify detection. - Fixed `StringUtils.mightBeEmoji` to include Mahjong Tiles and Symbols and Pictographs Extended-A blocks correctly (starting at 0x1F004 instead of 0x1F104) so `🀄` and `🃏` are recognized. - Removed todo comment in `isEmojiDetectsAllAvailableEmojis` since these emojis are now correctly identified. - Addressed `todo` about adding emoji detection tests. --- .../keyboard/latin/common/StringUtils.java | 2 +- .../keyboard/latin/StringUtilsTest.kt | 27 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java index fe8276957..7db56e60a 100644 --- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java +++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java @@ -498,7 +498,7 @@ public static boolean hasLineBreakCharacter(@Nullable final String text) { // very fast check, but there are very few blocks that exclusively contain emojis, public static boolean mightBeEmoji(final int c) { return (0x200D <= c && c <= 0x2BFF) // unicode blocks from General Punctuation to Miscellaneous Symbols and Arrows - || (0x1F104 <= c && c <= 0x1FAFF) // unicode blocks from Mahjong Tiles to Symbols and Pictographs Extended-A + || (0x1F004 <= c && c <= 0x1FAFF) // unicode blocks from Mahjong Tiles to Symbols and Pictographs Extended-A || (0xE0000 <= c && c <= 0xE007F) // unicode block Tags || c == 0xFE0F; // variation selector emoji with color } diff --git a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt index ce73a6cc2..87edec20f 100644 --- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt @@ -139,11 +139,11 @@ class StringUtilsTest { @Test fun detectEmojisAtEndFail() { if (BuildConfig.BUILD_TYPE == "runTests") return // fails, but unlikely enough that we leave it unfixed - assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC")) // 🎄đŸŧ + assertEquals("\uD83C\uDF84\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC")) // 🎄đŸŧ // below also fail, because current ZWJ handling is not suitable for some unusual cases - assertEquals("", getFullEmojiAtEnd("\u200D")) - assertEquals("", getFullEmojiAtEnd("a\u200D")) - assertEquals("\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22")) + assertEquals("\u200D", getFullEmojiAtEnd("\u200D")) + assertEquals("\u200D", getFullEmojiAtEnd("a\u200D")) + assertEquals("\u200D\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22")) } @Test fun isEmojiDetectsSingleEmojis() { @@ -166,18 +166,25 @@ class StringUtilsTest { val brokenDetectionAtStart = listOf("ã€°ī¸", "ã€Ŋī¸", "ÂŠī¸", "ÂŽī¸", "#ī¸âƒŖ", "*ī¸âƒŖ", "0ī¸âƒŖ", "1ī¸âƒŖ", "2ī¸âƒŖ", "3ī¸âƒŖ", "4ī¸âƒŖ", "5ī¸âƒŖ", "6ī¸âƒŖ", "7ī¸âƒŖ", "8ī¸âƒŖ", "9ī¸âƒŖ", "ãŠ—ī¸", "ãŠ™ī¸") allEmojis.forEach { - if (it == "🀄" || it == "🃏") return@forEach // todo: should be fixed, ideally in the regex - assert(isEmoji(it)) + assert(isEmoji(it)) { "Failed isEmoji for $it" } assert(StringUtils.mightBeEmoji(it.codePointBefore(it.length))) if (it !in brokenDetectionAtStart) assert(StringUtils.mightBeEmoji(it.codePointAt(0))) } } - // todo: add tests for emoji detection? - // could help towards fully fixing https://github.com/Helium314/HeliBoard/issues/22 - // though this might be tricky, as some emojis will show as one on new Android versions, and - // as two on older versions (also may differ by app) + @Test fun testGetFullEmojiAtEndWithAllAvailableEmojis() { + val ctx = ApplicationProvider.getApplicationContext() + val allEmojis = ctx.assets.list("emoji")!!.flatMap { + if (it == "minApi.txt" || it == "EMOTICONS.txt") return@flatMap emptyList() + ctx.assets.open("emoji/$it").reader().readLines() + }.flatMap { it.splitOnWhitespace() } + + allEmojis.forEach { + val emojiAtEnd = getFullEmojiAtEnd(it) + assertEquals(it, emojiAtEnd, "Failed getFullEmojiAtEnd for $it") + } + } private fun checkTextRange(before: String, after: String, sp: SpacingAndPunctuations, script: String, wordStart: Int, wordEnd: Int) { val got = getTouchedWordRange(before, after, script, sp) From 76c90ddf387137c8bda76b60b8e8cc3c8076250f Mon Sep 17 00:00:00 2001 From: LeanBitLab <245915690+LeanBitLab@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:55:22 +0000 Subject: [PATCH 2/2] Add tests for emoji detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added `testGetFullEmojiAtEndWithAllAvailableEmojis` to `StringUtilsTest.kt` to iterate over all emojis in assets and verify detection. - Fixed `StringUtils.mightBeEmoji` to include Mahjong Tiles and Symbols and Pictographs Extended-A blocks correctly (starting at 0x1F004 instead of 0x1F104) so `🀄` and `🃏` are recognized. - Removed todo comment in `isEmojiDetectsAllAvailableEmojis` since these emojis are now correctly identified. - Addressed `todo` about adding emoji detection tests.