From ed9129f294cdb0274eec9c7c46e49d34629628f7 Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Thu, 23 Apr 2026 12:49:53 +0200 Subject: [PATCH] Adjust ICU4J call sites for now build-time initialized parts of ICU4J. --- .../oracle/graal/python/PythonLanguage.java | 13 +++++ .../modules/re/SREModuleBuiltins.java | 12 ++--- .../builtins/objects/str/StringBuiltins.java | 47 ++++++++++--------- .../builtins/objects/str/StringUtils.java | 42 +---------------- 4 files changed, 42 insertions(+), 72 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java index 19bd3d0c2e..38c7969fce 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java @@ -55,6 +55,7 @@ import org.graalvm.options.OptionKey; import org.graalvm.options.OptionValues; import org.graalvm.polyglot.SandboxPolicy; +import org.graalvm.shadowed.com.ibm.icu.text.CaseMap; import com.oracle.graal.python.annotations.PythonOS; import com.oracle.graal.python.builtins.Python3Core; @@ -981,6 +982,7 @@ public long cacheKeyForBytecode(byte[] code) { @CompilationFinal private Object cachedTRegexLineBreakRegex; public Object getCachedTRegexLineBreakRegex(Node location, PythonContext context) { + CompilerAsserts.partialEvaluationConstant(this); if (cachedTRegexLineBreakRegex == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); cachedTRegexLineBreakRegex = context.getEnv().parseInternal(LINEBREAK_REGEX_SOURCE).call(location); @@ -988,6 +990,17 @@ public Object getCachedTRegexLineBreakRegex(Node location, PythonContext context return cachedTRegexLineBreakRegex; } + @CompilationFinal private CaseMap.Title cachedICUTitleCaser; + + public CaseMap.Title getCachedICUTitleCaser() { + CompilerAsserts.partialEvaluationConstant(this); + if (cachedICUTitleCaser == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + cachedICUTitleCaser = CaseMap.toTitle().wholeString().noBreakAdjustment(); + } + return cachedICUTitleCaser; + } + @Override protected boolean isThreadAccessAllowed(Thread thread, boolean singleThreaded) { if (singleThreaded) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/SREModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/SREModuleBuiltins.java index 54773ad15d..c367ac7e35 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/SREModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/SREModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -158,9 +158,8 @@ protected ArgumentClinicProvider getArgumentClinic() { } @Specialization - @TruffleBoundary static boolean isCased(Object module, int codepoint) { - return codepoint < 128 && UCharacter.isLetter(codepoint); + return 'A' <= codepoint && codepoint <= 'Z' || 'a' <= codepoint && codepoint <= 'z'; } } @@ -175,13 +174,8 @@ protected ArgumentClinicProvider getArgumentClinic() { } @Specialization - @TruffleBoundary static int toLower(Object module, int codepoint) { - if (codepoint >= 128) { - return codepoint; - } - - return UCharacter.toLowerCase(codepoint); + return 'A' <= codepoint && codepoint <= 'Z' ? codepoint | 0x20 : codepoint; } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java index 07089f6bf8..212aadf93b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java @@ -107,8 +107,8 @@ import com.oracle.graal.python.builtins.objects.slice.SliceNodes.CoerceToIntSlice; import com.oracle.graal.python.builtins.objects.slice.SliceNodes.ComputeIndices; import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.FormatNodeClinicProviderGen; -import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitNodeClinicProviderGen; import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitLinesNodeClinicProviderGen; +import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitNodeClinicProviderGen; import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToJavaStringCheckedNode; import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToTruffleStringChecked0Node; import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToTruffleStringChecked1Node; @@ -171,7 +171,6 @@ import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; @@ -1132,43 +1131,36 @@ static TruffleString doGeneric(VirtualFrame frame, Object self, Object table, @GenerateNodeFactory public abstract static class CapitalizeNode extends PythonUnaryBuiltinNode { - @CompilationFinal private static CaseMap.Title titlecaser; - @Specialization static TruffleString capitalize(TruffleString self, + @Bind PythonLanguage language, @Cached TruffleString.ToJavaStringNode toJavaStringNode, @Shared("js2ts") @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { if (self.isEmpty()) { return T_EMPTY_STRING; } else { - return fromJavaStringNode.execute(capitalizeImpl(toJavaStringNode.execute(self)), TS_ENCODING); + return fromJavaStringNode.execute(capitalizeImpl(language.getCachedICUTitleCaser(), toJavaStringNode.execute(self)), TS_ENCODING); } } @Specialization static TruffleString doGeneric(Object self, @Bind Node inliningTarget, + @Bind PythonLanguage language, @Cached CastToJavaStringCheckedNode castToJavaStringNode, @Shared("js2ts") @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { String s = castToJavaStringNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "capitalize", self); if (s.isEmpty()) { return T_EMPTY_STRING; } - return fromJavaStringNode.execute(capitalizeImpl(s), TS_ENCODING); - } - - private static String capitalizeImpl(String str) { - if (titlecaser == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - titlecaser = CaseMap.toTitle().wholeString().noBreakAdjustment(); - } - return apply(str); + return fromJavaStringNode.execute(capitalizeImpl(language.getCachedICUTitleCaser(), s), TS_ENCODING); } @TruffleBoundary - private static String apply(String str) { - return titlecaser.apply(Locale.ROOT, null, str); + private static String capitalizeImpl(CaseMap.Title titleCaser, String str) { + return titleCaser.apply(Locale.ROOT, null, str); } + } // str.partition @@ -2235,8 +2227,8 @@ static TruffleString doGeneric(VirtualFrame frame, Object selfObj, Object widthO abstract static class TitleNode extends PythonUnaryClinicBuiltinNode { @Specialization - @TruffleBoundary static TruffleString doString(TruffleString self, + @Bind PythonLanguage language, @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, @Cached TruffleStringIterator.NextNode nextNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @@ -2251,27 +2243,36 @@ static TruffleString doString(TruffleString self, int end = 0; while (it.hasNext()) { final int cp = nextNode.execute(it, TS_ENCODING); - if (!UCharacter.isLowerCase(cp) && !UCharacter.isUpperCase(cp)) { + if (notUpperOrLowerCase(cp)) { if (start == end) { appendCodePointNode.execute(sb, cp, 1, true); } else { - appendSegment(self, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end); + appendSegment(self, language, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end); } start = end + 1; } end++; } if (start != end) { - appendSegment(self, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end - 1); + appendSegment(self, language, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end - 1); } return toStringNode.execute(sb); } - private static void appendSegment(TruffleString self, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.SubstringNode substringNode, + private static void appendSegment(TruffleString self, PythonLanguage language, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.SubstringNode substringNode, TruffleString.ToJavaStringNode toJavaStringNode, TruffleString.FromJavaStringNode fromJavaStringNode, TruffleStringBuilderUTF32 sb, int start, int end) { TruffleString segment = substringNode.execute(self, start, end - start + 1, TS_ENCODING, true); - String titleSegment = UCharacter.toTitleCase(Locale.ROOT, toJavaStringNode.execute(segment), null); - appendStringNode.execute(sb, fromJavaStringNode.execute(titleSegment, TS_ENCODING)); + appendStringNode.execute(sb, fromJavaStringNode.execute(applyTitleCase(language.getCachedICUTitleCaser(), toJavaStringNode.execute(segment)), TS_ENCODING)); + } + + @TruffleBoundary + private static boolean notUpperOrLowerCase(int cp) { + return !UCharacter.isULowercase(cp) && !UCharacter.isUUppercase(cp); + } + + @TruffleBoundary + private static String applyTitleCase(CaseMap.Title titleCaser, String s) { + return titleCaser.apply(Locale.ROOT, null, s); } @Override diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java index 867f8e9652..298da48ed4 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java @@ -48,7 +48,6 @@ import java.util.List; import java.util.Locale; -import org.graalvm.nativeimage.ImageInfo; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacterCategory; import org.graalvm.shadowed.com.ibm.icu.lang.UProperty; @@ -221,17 +220,6 @@ public static TruffleString strip(TruffleString str, TruffleString chars, StripK @TruffleBoundary public static boolean isPrintable(int codepoint) { - if (ImageInfo.inImageBuildtimeCode()) { - // Executing ICU4J at image build time causes issues with runtime/build time - // initialization - assert codepoint < 0x100; - return codepoint >= 32; - } - return isPrintableICU(codepoint); - } - - @TruffleBoundary - private static boolean isPrintableICU(int codepoint) { // ICU's definition of printability is different from CPython, so we cannot use // UCharacter.isPrintable int category = UCharacter.getType(codepoint); @@ -252,32 +240,16 @@ private static boolean isPrintableICU(int codepoint) { @TruffleBoundary public static String toLowerCase(String self) { - if (ImageInfo.inImageBuildtimeCode()) { - // Avoid initializing ICU4J in image build - return self.toLowerCase(); - } return UCharacter.toLowerCase(Locale.ROOT, self); } @TruffleBoundary public static String toUpperCase(String str) { - if (ImageInfo.inImageBuildtimeCode()) { - // Avoid initializing ICU4J in image build - return str.toUpperCase(); - } return UCharacter.toUpperCase(Locale.ROOT, str); } @TruffleBoundary public static boolean isAlnum(int codePoint) { - if (ImageInfo.inImageBuildtimeCode()) { - // Avoid initializing ICU4J in image build - return Character.isLetterOrDigit(codePoint); - } - return isAlnumICU(codePoint); - } - - private static boolean isAlnumICU(int codePoint) { if (UCharacter.isLetter(codePoint) || UCharacter.isDigit(codePoint) || UCharacter.hasBinaryProperty(codePoint, UProperty.NUMERIC_TYPE)) { return true; } @@ -335,22 +307,12 @@ static boolean doString(TruffleString str, @TruffleBoundary static boolean isIdentifierStart(int codePoint) { - if (ImageInfo.inImageBuildtimeCode()) { - // Avoid initializing ICU4J at image build time - return Character.isUnicodeIdentifierStart(codePoint); - } else { - return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_START); - } + return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_START); } @TruffleBoundary static boolean isIdentifierPart(int codePoint) { - if (ImageInfo.inImageBuildtimeCode()) { - // Avoid initializing ICU4J at image build time - return Character.isUnicodeIdentifierPart(codePoint); - } else { - return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_CONTINUE); - } + return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_CONTINUE); } }