diff --git a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java index fe8276957..7db56e60a 100644 --- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java +++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.java @@ -498,7 +498,7 @@ public static boolean hasLineBreakCharacter(@Nullable final String text) { // very fast check, but there are very few blocks that exclusively contain emojis, public static boolean mightBeEmoji(final int c) { return (0x200D <= c && c <= 0x2BFF) // unicode blocks from General Punctuation to Miscellaneous Symbols and Arrows - || (0x1F104 <= c && c <= 0x1FAFF) // unicode blocks from Mahjong Tiles to Symbols and Pictographs Extended-A + || (0x1F004 <= c && c <= 0x1FAFF) // unicode blocks from Mahjong Tiles to Symbols and Pictographs Extended-A || (0xE0000 <= c && c <= 0xE007F) // unicode block Tags || c == 0xFE0F; // variation selector emoji with color } diff --git a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt index ce73a6cc2..87edec20f 100644 --- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt @@ -139,11 +139,11 @@ class StringUtilsTest { @Test fun detectEmojisAtEndFail() { if (BuildConfig.BUILD_TYPE == "runTests") return // fails, but unlikely enough that we leave it unfixed - assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC")) // 🎄🏼 + assertEquals("\uD83C\uDF84\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC")) // 🎄🏼 // below also fail, because current ZWJ handling is not suitable for some unusual cases - assertEquals("", getFullEmojiAtEnd("\u200D")) - assertEquals("", getFullEmojiAtEnd("a\u200D")) - assertEquals("\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22")) + assertEquals("\u200D", getFullEmojiAtEnd("\u200D")) + assertEquals("\u200D", getFullEmojiAtEnd("a\u200D")) + assertEquals("\u200D\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22")) } @Test fun isEmojiDetectsSingleEmojis() { @@ -166,18 +166,25 @@ class StringUtilsTest { val brokenDetectionAtStart = listOf("〰️", "〽️", "©️", "®️", "#️⃣", "*️⃣", "0️⃣", "1️⃣", "2️⃣", "3️⃣", "4️⃣", "5️⃣", "6️⃣", "7️⃣", "8️⃣", "9️⃣", "㊗️", "㊙️") allEmojis.forEach { - if (it == "🀄" || it == "🃏") return@forEach // todo: should be fixed, ideally in the regex - assert(isEmoji(it)) + assert(isEmoji(it)) { "Failed isEmoji for $it" } assert(StringUtils.mightBeEmoji(it.codePointBefore(it.length))) if (it !in brokenDetectionAtStart) assert(StringUtils.mightBeEmoji(it.codePointAt(0))) } } - // todo: add tests for emoji detection? - // could help towards fully fixing https://github.com/Helium314/HeliBoard/issues/22 - // though this might be tricky, as some emojis will show as one on new Android versions, and - // as two on older versions (also may differ by app) + @Test fun testGetFullEmojiAtEndWithAllAvailableEmojis() { + val ctx = ApplicationProvider.getApplicationContext() + val allEmojis = ctx.assets.list("emoji")!!.flatMap { + if (it == "minApi.txt" || it == "EMOTICONS.txt") return@flatMap emptyList() + ctx.assets.open("emoji/$it").reader().readLines() + }.flatMap { it.splitOnWhitespace() } + + allEmojis.forEach { + val emojiAtEnd = getFullEmojiAtEnd(it) + assertEquals(it, emojiAtEnd, "Failed getFullEmojiAtEnd for $it") + } + } private fun checkTextRange(before: String, after: String, sp: SpacingAndPunctuations, script: String, wordStart: Int, wordEnd: Int) { val got = getTouchedWordRange(before, after, script, sp)