Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.graalvm.options.OptionKey;
import org.graalvm.options.OptionValues;
import org.graalvm.polyglot.SandboxPolicy;
import org.graalvm.shadowed.com.ibm.icu.text.CaseMap;

import com.oracle.graal.python.annotations.PythonOS;
import com.oracle.graal.python.builtins.Python3Core;
Expand Down Expand Up @@ -981,13 +982,25 @@ public long cacheKeyForBytecode(byte[] code) {
@CompilationFinal private Object cachedTRegexLineBreakRegex;

public Object getCachedTRegexLineBreakRegex(Node location, PythonContext context) {
CompilerAsserts.partialEvaluationConstant(this);
if (cachedTRegexLineBreakRegex == null) {
CompilerDirectives.transferToInterpreterAndInvalidate();
cachedTRegexLineBreakRegex = context.getEnv().parseInternal(LINEBREAK_REGEX_SOURCE).call(location);
}
return cachedTRegexLineBreakRegex;
}

@CompilationFinal private CaseMap.Title cachedICUTitleCaser;

public CaseMap.Title getCachedICUTitleCaser() {
CompilerAsserts.partialEvaluationConstant(this);
if (cachedICUTitleCaser == null) {
CompilerDirectives.transferToInterpreterAndInvalidate();
cachedICUTitleCaser = CaseMap.toTitle().wholeString().noBreakAdjustment();
}
return cachedICUTitleCaser;
}

@Override
protected boolean isThreadAccessAllowed(Thread thread, boolean singleThreaded) {
if (singleThreaded) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
Expand Down Expand Up @@ -158,9 +158,8 @@ protected ArgumentClinicProvider getArgumentClinic() {
}

@Specialization
@TruffleBoundary
static boolean isCased(Object module, int codepoint) {
return codepoint < 128 && UCharacter.isLetter(codepoint);
return 'A' <= codepoint && codepoint <= 'Z' || 'a' <= codepoint && codepoint <= 'z';
}
}

Expand All @@ -175,13 +174,8 @@ protected ArgumentClinicProvider getArgumentClinic() {
}

@Specialization
@TruffleBoundary
static int toLower(Object module, int codepoint) {
if (codepoint >= 128) {
return codepoint;
}

return UCharacter.toLowerCase(codepoint);
return 'A' <= codepoint && codepoint <= 'Z' ? codepoint | 0x20 : codepoint;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@
import com.oracle.graal.python.builtins.objects.slice.SliceNodes.CoerceToIntSlice;
import com.oracle.graal.python.builtins.objects.slice.SliceNodes.ComputeIndices;
import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.FormatNodeClinicProviderGen;
import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitNodeClinicProviderGen;
import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitLinesNodeClinicProviderGen;
import com.oracle.graal.python.builtins.objects.str.StringBuiltinsClinicProviders.SplitNodeClinicProviderGen;
import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToJavaStringCheckedNode;
import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToTruffleStringChecked0Node;
import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToTruffleStringChecked1Node;
Expand Down Expand Up @@ -171,7 +171,6 @@
import com.oracle.graal.python.runtime.object.PFactory;
import com.oracle.truffle.api.CompilerAsserts;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Bind;
import com.oracle.truffle.api.dsl.Cached;
Expand Down Expand Up @@ -1132,43 +1131,36 @@ static TruffleString doGeneric(VirtualFrame frame, Object self, Object table,
@GenerateNodeFactory
public abstract static class CapitalizeNode extends PythonUnaryBuiltinNode {

@CompilationFinal private static CaseMap.Title titlecaser;

@Specialization
static TruffleString capitalize(TruffleString self,
@Bind PythonLanguage language,
@Cached TruffleString.ToJavaStringNode toJavaStringNode,
@Shared("js2ts") @Cached TruffleString.FromJavaStringNode fromJavaStringNode) {
if (self.isEmpty()) {
return T_EMPTY_STRING;
} else {
return fromJavaStringNode.execute(capitalizeImpl(toJavaStringNode.execute(self)), TS_ENCODING);
return fromJavaStringNode.execute(capitalizeImpl(language.getCachedICUTitleCaser(), toJavaStringNode.execute(self)), TS_ENCODING);
}
}

@Specialization
static TruffleString doGeneric(Object self,
@Bind Node inliningTarget,
@Bind PythonLanguage language,
@Cached CastToJavaStringCheckedNode castToJavaStringNode,
@Shared("js2ts") @Cached TruffleString.FromJavaStringNode fromJavaStringNode) {
String s = castToJavaStringNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "capitalize", self);
if (s.isEmpty()) {
return T_EMPTY_STRING;
}
return fromJavaStringNode.execute(capitalizeImpl(s), TS_ENCODING);
}

private static String capitalizeImpl(String str) {
if (titlecaser == null) {
CompilerDirectives.transferToInterpreterAndInvalidate();
titlecaser = CaseMap.toTitle().wholeString().noBreakAdjustment();
}
return apply(str);
return fromJavaStringNode.execute(capitalizeImpl(language.getCachedICUTitleCaser(), s), TS_ENCODING);
}

@TruffleBoundary
private static String apply(String str) {
return titlecaser.apply(Locale.ROOT, null, str);
private static String capitalizeImpl(CaseMap.Title titleCaser, String str) {
return titleCaser.apply(Locale.ROOT, null, str);
}

}

// str.partition
Expand Down Expand Up @@ -2235,8 +2227,8 @@ static TruffleString doGeneric(VirtualFrame frame, Object selfObj, Object widthO
abstract static class TitleNode extends PythonUnaryClinicBuiltinNode {

@Specialization
@TruffleBoundary
static TruffleString doString(TruffleString self,
@Bind PythonLanguage language,
@Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode,
@Cached TruffleStringIterator.NextNode nextNode,
@Cached TruffleStringBuilder.AppendStringNode appendStringNode,
Expand All @@ -2251,27 +2243,36 @@ static TruffleString doString(TruffleString self,
int end = 0;
while (it.hasNext()) {
final int cp = nextNode.execute(it, TS_ENCODING);
if (!UCharacter.isLowerCase(cp) && !UCharacter.isUpperCase(cp)) {
if (notUpperOrLowerCase(cp)) {
if (start == end) {
appendCodePointNode.execute(sb, cp, 1, true);
} else {
appendSegment(self, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end);
appendSegment(self, language, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end);
}
start = end + 1;
}
end++;
}
if (start != end) {
appendSegment(self, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end - 1);
appendSegment(self, language, appendStringNode, substringNode, toJavaStringNode, fromJavaStringNode, sb, start, end - 1);
}
return toStringNode.execute(sb);
}

private static void appendSegment(TruffleString self, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.SubstringNode substringNode,
private static void appendSegment(TruffleString self, PythonLanguage language, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.SubstringNode substringNode,
TruffleString.ToJavaStringNode toJavaStringNode, TruffleString.FromJavaStringNode fromJavaStringNode, TruffleStringBuilderUTF32 sb, int start, int end) {
TruffleString segment = substringNode.execute(self, start, end - start + 1, TS_ENCODING, true);
String titleSegment = UCharacter.toTitleCase(Locale.ROOT, toJavaStringNode.execute(segment), null);
appendStringNode.execute(sb, fromJavaStringNode.execute(titleSegment, TS_ENCODING));
appendStringNode.execute(sb, fromJavaStringNode.execute(applyTitleCase(language.getCachedICUTitleCaser(), toJavaStringNode.execute(segment)), TS_ENCODING));
}

@TruffleBoundary
private static boolean notUpperOrLowerCase(int cp) {
return !UCharacter.isULowercase(cp) && !UCharacter.isUUppercase(cp);
}

@TruffleBoundary
private static String applyTitleCase(CaseMap.Title titleCaser, String s) {
return titleCaser.apply(Locale.ROOT, null, s);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import java.util.List;
import java.util.Locale;

import org.graalvm.nativeimage.ImageInfo;
import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter;
import org.graalvm.shadowed.com.ibm.icu.lang.UCharacterCategory;
import org.graalvm.shadowed.com.ibm.icu.lang.UProperty;
Expand Down Expand Up @@ -221,17 +220,6 @@ public static TruffleString strip(TruffleString str, TruffleString chars, StripK

@TruffleBoundary
public static boolean isPrintable(int codepoint) {
if (ImageInfo.inImageBuildtimeCode()) {
// Executing ICU4J at image build time causes issues with runtime/build time
// initialization
assert codepoint < 0x100;
return codepoint >= 32;
}
return isPrintableICU(codepoint);
}

@TruffleBoundary
private static boolean isPrintableICU(int codepoint) {
// ICU's definition of printability is different from CPython, so we cannot use
// UCharacter.isPrintable
int category = UCharacter.getType(codepoint);
Expand All @@ -252,32 +240,16 @@ private static boolean isPrintableICU(int codepoint) {

@TruffleBoundary
public static String toLowerCase(String self) {
if (ImageInfo.inImageBuildtimeCode()) {
// Avoid initializing ICU4J in image build
return self.toLowerCase();
}
return UCharacter.toLowerCase(Locale.ROOT, self);
}

@TruffleBoundary
public static String toUpperCase(String str) {
if (ImageInfo.inImageBuildtimeCode()) {
// Avoid initializing ICU4J in image build
return str.toUpperCase();
}
return UCharacter.toUpperCase(Locale.ROOT, str);
}

@TruffleBoundary
public static boolean isAlnum(int codePoint) {
if (ImageInfo.inImageBuildtimeCode()) {
// Avoid initializing ICU4J in image build
return Character.isLetterOrDigit(codePoint);
}
return isAlnumICU(codePoint);
}

private static boolean isAlnumICU(int codePoint) {
if (UCharacter.isLetter(codePoint) || UCharacter.isDigit(codePoint) || UCharacter.hasBinaryProperty(codePoint, UProperty.NUMERIC_TYPE)) {
return true;
}
Expand Down Expand Up @@ -335,22 +307,12 @@ static boolean doString(TruffleString str,

@TruffleBoundary
static boolean isIdentifierStart(int codePoint) {
if (ImageInfo.inImageBuildtimeCode()) {
// Avoid initializing ICU4J at image build time
return Character.isUnicodeIdentifierStart(codePoint);
} else {
return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_START);
}
return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_START);
}

@TruffleBoundary
static boolean isIdentifierPart(int codePoint) {
if (ImageInfo.inImageBuildtimeCode()) {
// Avoid initializing ICU4J at image build time
return Character.isUnicodeIdentifierPart(codePoint);
} else {
return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_CONTINUE);
}
return UCharacter.hasBinaryProperty(codePoint, UProperty.XID_CONTINUE);
}
}

Expand Down
Loading