From efc01d023a2d571b459ceaa7c45cd0c5b6bd19d7 Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Wed, 11 Sep 2024 15:16:18 +0200 Subject: [PATCH 1/6] Ignore Eclipse *.launch configurations --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4feb914f9..4448aa892 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ .externalToolBuilders/* .project .pydevproject +*.launch # Netbeans files nbproject From c9379b35f8d0e11ab076f4f052d7f63b0e105c0a Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Fri, 20 Sep 2024 11:55:03 +0200 Subject: [PATCH 2/6] Initial failing tests --- .../core/JavaLangStringConstructor.java | 14 +++ .../python/core/JavaLangStringProvider.java | 90 +++++++++++++++++++ .../python/core/PyUnicodeComparisonTest.java | 69 ++++++++++++++ .../org/python/core/PyUnicodeReplaceTest.java | 41 +++++++++ 4 files changed, 214 insertions(+) create mode 100644 tests/java/org/python/core/JavaLangStringConstructor.java create mode 100644 tests/java/org/python/core/JavaLangStringProvider.java create mode 100644 tests/java/org/python/core/PyUnicodeComparisonTest.java create mode 100644 tests/java/org/python/core/PyUnicodeReplaceTest.java diff --git a/tests/java/org/python/core/JavaLangStringConstructor.java b/tests/java/org/python/core/JavaLangStringConstructor.java new file mode 100644 index 000000000..b28720dd0 --- /dev/null +++ b/tests/java/org/python/core/JavaLangStringConstructor.java @@ -0,0 +1,14 @@ +package org.python.core; + +public final class JavaLangStringConstructor { + + private final String constructedValue; + + public JavaLangStringConstructor(String value) { + this.constructedValue = value; + } + + public String getConstructedValue() { + return constructedValue; + } +} diff --git a/tests/java/org/python/core/JavaLangStringProvider.java b/tests/java/org/python/core/JavaLangStringProvider.java new file mode 100644 index 000000000..18b6bb260 --- /dev/null +++ b/tests/java/org/python/core/JavaLangStringProvider.java @@ -0,0 +1,90 @@ +package org.python.core; + +public final class JavaLangStringProvider { + + private static final String SMALL_O_UMLAUT = "\u00F6"; + private static final String RIGHT_SINGLE_QUOTATION_MARK = "\u2019"; + + private static final String BEAUTIFUL = "sch" + SMALL_O_UMLAUT + "n"; + private static final String START_OF_BEAUTIFUL = "sch" + SMALL_O_UMLAUT; + private static final String END_OF_BEAUTIFUL = SMALL_O_UMLAUT + "n"; + + private static final String JEANNE_DARC = "Jeanne d" + RIGHT_SINGLE_QUOTATION_MARK + "Arc"; + private static final String START_OF_JEANNE_DARC = "Jeanne d" + RIGHT_SINGLE_QUOTATION_MARK + "A"; + private static final String END_OF_JEANNE_DARC = "d" + RIGHT_SINGLE_QUOTATION_MARK + "Arc"; + + private static final String BEAUTIFUL_JEANNE_DARC = BEAUTIFUL + "e" + JEANNE_DARC; + + /** + * Provides a single small o umlaut + */ + public static final String getSmallOUmlaut() { + return SMALL_O_UMLAUT; + } + + /** + * Provides the word 'beautiful' in German, using a small o umlaut + */ + public static final String getBeautiful() { + return BEAUTIFUL; + } + + /** + * Provides the start of 'beautiful' in German, using a small o umlaut + */ + public static final String getStartOfBeautiful() { + return START_OF_BEAUTIFUL; + } + + /** + * Provides the end of 'beautiful' in German, using a small o umlaut + */ + public static final String getEndOfBeautiful() { + return END_OF_BEAUTIFUL; + } + + /** + * Provides the word 'more beautiful' in German, using a small o umlaut + */ + public static final String getMoreBeautiful() { + return BEAUTIFUL + "er"; + } + + /** + * Provides the right single quotation mark + * + * @see "https://www.compart.com/en/unicode/U+2019" + */ + public static final String getRightSingleQuotationMark() { + return RIGHT_SINGLE_QUOTATION_MARK; + } + + /** + * Provides the name of Jeanne d'Arc, but using a right single quotation mark as apostrophe + */ + public static final String getJeanneDArc() { + return JEANNE_DARC; + } + + /** + * Provides the start of Jeanne d'Arc, including a right single quotation mark as apostrophe + */ + public static final String getStartOfJeanneDArc() { + return START_OF_JEANNE_DARC; + } + + /** + * Provides the end of Jeanne d'Arc, including a right single quotation mark as apostrophe + */ + public static final String getEndOfJeanneDArc() { + return END_OF_JEANNE_DARC; + } + + /** + * Provides beautiful Jeanne d'Arc, a mixture of Umlaut and Unicode + */ + public static final String getBeautifulJeanneDArc() { + return BEAUTIFUL_JEANNE_DARC; + } + +} diff --git a/tests/java/org/python/core/PyUnicodeComparisonTest.java b/tests/java/org/python/core/PyUnicodeComparisonTest.java new file mode 100644 index 000000000..02c30424f --- /dev/null +++ b/tests/java/org/python/core/PyUnicodeComparisonTest.java @@ -0,0 +1,69 @@ +package org.python.core; + +import static java.lang.String.format; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.python.core.JavaLangStringProvider.getBeautiful; + +import org.junit.Test; +import org.python.util.PythonInterpreter; + +public class PyUnicodeComparisonTest { + + // variable name + private static final String RESULT = "result"; + + private String compare() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringProvider\n"); + b.append("value = JavaLangStringProvider.getBeautiful()\n"); + b.append("result = False\n"); + b.append("if value == '%s':\n"); + b.append(" result = True\n"); + return format(b.toString(), getBeautiful()); + } + + + @Test + public void testCompare() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(compare()); + assertResultBoolean(true, interpreter); + } + } + + private String compareConstructed() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringConstructor\n"); + b.append("value = JavaLangStringConstructor('%s').getConstructedValue()\n"); + b.append("result = False\n"); + b.append("if value == '%s':\n"); + b.append(" result = True\n"); + return format(b.toString(), getBeautiful(), getBeautiful()); + } + + @Test + public void testCompareConstructed() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(compareConstructed()); + assertResultBoolean(true, interpreter); + } + } + + + private void assertResultBoolean(boolean expected, PythonInterpreter interpreter) { + Object resultObject = interpreter.get(RESULT); + if (resultObject instanceof PyBoolean) { + PyBoolean result = (PyBoolean) resultObject; + if (expected) { + assertTrue("result:", result.getBooleanValue()); + } else { + assertFalse("result:", result.getBooleanValue()); + } + } else { + fail("expected result to be PyBoolean but was " + resultObject.getClass().getName()); + } + } + +} diff --git a/tests/java/org/python/core/PyUnicodeReplaceTest.java b/tests/java/org/python/core/PyUnicodeReplaceTest.java new file mode 100644 index 000000000..bec4d0f5f --- /dev/null +++ b/tests/java/org/python/core/PyUnicodeReplaceTest.java @@ -0,0 +1,41 @@ +package org.python.core; + +import static java.lang.String.format; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.python.core.JavaLangStringProvider.getSmallOUmlaut; + +import org.junit.Test; +import org.python.util.PythonInterpreter; + +public class PyUnicodeReplaceTest { + + // variable name + private static final String RESULT = "result"; + + private String replace() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringProvider\n"); + b.append("result = JavaLangStringProvider.getBeautiful().replace('%s', 'oe')\n"); + return format(b.toString(), getSmallOUmlaut()); + } + + @Test + public void testReplace() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(replace()); + assertResultEquals("schoen", interpreter); + } + } + + private void assertResultEquals(String expected, PythonInterpreter interpreter) { + Object resultObject = interpreter.get(RESULT); + if (resultObject instanceof PyString) { + PyString result = (PyString) resultObject; + assertEquals(expected, result.getString()); + } else { + fail("expected result to be PyString but was " + resultObject.getClass().getName()); + } + } + +} From 99756b53c435e09cf0f389b2755f651778ec124f Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Fri, 27 Sep 2024 14:14:22 +0200 Subject: [PATCH 3/6] Improve the failure message --- tests/java/org/python/core/PyUnicodeComparisonTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/java/org/python/core/PyUnicodeComparisonTest.java b/tests/java/org/python/core/PyUnicodeComparisonTest.java index 02c30424f..f200e4cd9 100644 --- a/tests/java/org/python/core/PyUnicodeComparisonTest.java +++ b/tests/java/org/python/core/PyUnicodeComparisonTest.java @@ -57,9 +57,9 @@ private void assertResultBoolean(boolean expected, PythonInterpreter interpreter if (resultObject instanceof PyBoolean) { PyBoolean result = (PyBoolean) resultObject; if (expected) { - assertTrue("result:", result.getBooleanValue()); + assertTrue("expected result to be True, but was False", result.getBooleanValue()); } else { - assertFalse("result:", result.getBooleanValue()); + assertFalse("expected result to be False, but was True", result.getBooleanValue()); } } else { fail("expected result to be PyBoolean but was " + resultObject.getClass().getName()); From 58d568b374674115ba54f2f3d36383c9e7b2f24c Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Fri, 27 Sep 2024 15:58:13 +0200 Subject: [PATCH 4/6] Make charsFitWidth() publicly available, fix javadoc --- src/org/python/core/PyString.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java index 1f9700cfd..d884543c3 100644 --- a/src/org/python/core/PyString.java +++ b/src/org/python/core/PyString.java @@ -114,10 +114,10 @@ public PyString(char c) { * 2width-1. We use this to test for "byte-like" or ASCII. * * @param s string to test - * @param width number of bits within which each character must fit (<16) + * @param width number of bits within which each character must fit (<16) * @return true if and only if every character has a code less than 2^width */ - static boolean charsFitWidth(String s, int width) { + public static boolean charsFitWidth(String s, int width) { final int N = s.length(); From 18bd6aa08d4d0b4da97d4013a79096dcaf5c55cf Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Fri, 27 Sep 2024 15:58:57 +0200 Subject: [PATCH 5/6] Enforce unicode if the string does not fit into a real PyString --- src/org/python/antlr/GrammarActions.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java index fc268e33d..dbcfc20e2 100644 --- a/src/org/python/antlr/GrammarActions.java +++ b/src/org/python/antlr/GrammarActions.java @@ -447,7 +447,12 @@ class StringPair { StringPair(String s, boolean unicode) { this.s = s; - this.unicode = unicode; + // enforce unicode if the string does not fit into a real PyString + if (!PyString.charsFitWidth(s, 7)) { + this.unicode = true; + } else { + this.unicode = unicode; + } } String getString() { return s; From 30b36e62b0b06495f3e7ac80b7288db80e7e9f4f Mon Sep 17 00:00:00 2001 From: Otmar Humbel Date: Thu, 17 Apr 2025 13:18:10 +0200 Subject: [PATCH 6/6] Another try to fix the dilemma - sadly without success --- src/org/python/antlr/GrammarActions.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java index dbcfc20e2..9f98e98e0 100644 --- a/src/org/python/antlr/GrammarActions.java +++ b/src/org/python/antlr/GrammarActions.java @@ -445,13 +445,12 @@ class StringPair { private String s; private boolean unicode; - StringPair(String s, boolean unicode) { + StringPair(String s, boolean unicode, String encoding) { this.s = s; - // enforce unicode if the string does not fit into a real PyString - if (!PyString.charsFitWidth(s, 7)) { + this.unicode = unicode; + // in case of no encoding, enforce unicode if the string does not fit into a real PyString + if (encoding == null && !unicode && !PyString.charsFitWidth(s, 7)) { this.unicode = true; - } else { - this.unicode = unicode; } } String getString() { @@ -539,7 +538,7 @@ StringPair extractString(Token t, String encoding, boolean unicodeLiterals) { // Plain unicode: already decoded, just handle escapes string = PyString.decode_UnicodeEscape(string, start, end, "strict", ustring); } - return new StringPair(string, ustring); + return new StringPair(string, ustring, encoding); } Token extractStringToken(List s) {