diff --git a/eclipse-scout-core/src/encoder/PlainTextEncoder.ts b/eclipse-scout-core/src/encoder/PlainTextEncoder.ts index f24a7d13b8a..4552e169ab7 100644 --- a/eclipse-scout-core/src/encoder/PlainTextEncoder.ts +++ b/eclipse-scout-core/src/encoder/PlainTextEncoder.ts @@ -17,7 +17,8 @@ export interface PlainTextEncoderOptions { compact?: boolean; /** - * Calls string.trim(). White space at the beginning and the end of the text gets removed. Default is false. + * Calls string.trim(). Empty lines at the beginning and the end of the text get removed. Default is false. + * Spaces are always removed. */ trim?: boolean; diff --git a/eclipse-scout-core/test/encoder/PlainTextEncoderSpec.ts b/eclipse-scout-core/test/encoder/PlainTextEncoderSpec.ts index a58c93b068b..c4d6c27cbab 100644 --- a/eclipse-scout-core/test/encoder/PlainTextEncoderSpec.ts +++ b/eclipse-scout-core/test/encoder/PlainTextEncoderSpec.ts @@ -15,6 +15,7 @@ describe('PlainTextEncoder', () => { let encoder = new PlainTextEncoder(); it('converts HTML to plain text', () => { + expect(encoder.encode(null)).toBe(null); expect(encoder.encode('')).toBe(''); let htmlText = 'hello'; @@ -113,6 +114,15 @@ describe('PlainTextEncoder', () => { it('removes leading and trailing newlines if configured', () => { let htmlText = '\n\nHello!\n\n'; + expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n'); + expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!'); + + htmlText = '\n\n Hello! \n\n'; + expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n'); + expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!'); + + htmlText = ' \n \n Hello! \n \n '; + expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n'); expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!'); }); @@ -259,4 +269,23 @@ describe('PlainTextEncoder', () => { htmlText = '<\tabc attr=\'someText\'>'; expect(encoder.removeAttributeValues(htmlText)).toBe('<\tabc attr=\'someText\'>'); }); + + it('trims lines, but preserves other white-space', () => { + expect(encoder.encode('hello')).toBe('hello'); + expect(encoder.encode('one\ntwo')).toBe('one\ntwo'); + expect(encoder.encode('one\r\ntwo')).toBe('one\ntwo'); + expect(encoder.encode('one\rtwo')).toBe('one\ntwo'); + expect(encoder.encode('one two')).toBe('one two'); + expect(encoder.encode('one   two')).toBe('one\u00a0\u00a0 two'); + expect(encoder.encode('one two')).toBe('one \t two'); + expect(encoder.encode(' one \n two \n three ')).toBe('one\ntwo\nthree'); + expect(encoder.encode('a\n  b\n    c')).toBe('a\n\u00A0\u00A0b\n\u00A0\u00A0\u00A0\u00A0c'); + }); + + it('decodes special characters', () => { + expect(encoder.encode('&amp;')).toBe('&'); + expect(encoder.encode('&&amp;amp;')).toBe('&&amp;'); + expect(encoder.encode('Hellö!')).toBe('Hellö!'); + expect(encoder.encode('a<br>b')).toBe('a
b'); + }); }); diff --git a/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlEntitiesTest.java b/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlEntitiesTest.java new file mode 100644 index 00000000000..f4fce4b36d9 --- /dev/null +++ b/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlEntitiesTest.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2010, 2025 BSI Business Systems Integration AG + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.eclipse.scout.rt.platform.html; + +import static org.junit.Assert.*; + +import org.eclipse.scout.rt.platform.BEANS; +import org.eclipse.scout.rt.testing.platform.runner.PlatformTestRunner; +import org.junit.Test; +import org.junit.runner.RunWith; + +@SuppressWarnings({"ConcatenationWithEmptyString", "SpellCheckingInspection", "TextBlockMigration"}) +@RunWith(PlatformTestRunner.class) +public class HtmlEntitiesTest { + + @Test + public void testUnescapeAll_Empty() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertNull(entities.unescapeAll(null)); + assertEquals("", entities.unescapeAll("")); + assertEquals(" ", entities.unescapeAll(" ")); + } + + @Test + public void testUnescapeAll_Named() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertEquals("ß", entities.unescapeAll("ß")); + assertEquals("Ü", entities.unescapeAll("Ü")); + assertEquals("&", entities.unescapeAll("&")); + assertEquals("A&&Z", entities.unescapeAll("A&&Z")); + assertEquals("ä", entities.unescapeAll("&auml;")); + assertEquals("auml;", entities.unescapeAll("auml;")); + assertEquals("drag&drop", entities.unescapeAll("drag&drop")); + } + + @Test + public void testUnescapeAll_NumericDecimal() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertEquals("'", entities.unescapeAll("'")); + assertEquals("€", entities.unescapeAll("€")); + assertEquals("A€€Z", entities.unescapeAll("A€€Z")); + } + + @Test + public void testUnescapeAll_NumericHex() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertEquals("B", entities.unescapeAll("B")); + assertEquals("B", entities.unescapeAll("B")); + assertEquals("?", entities.unescapeAll("?")); + assertEquals("?", entities.unescapeAll("?")); + assertEquals("🦕", entities.unescapeAll("🦕")); + assertEquals("A🦕🦕Z", entities.unescapeAll("A🦕🦕Z")); + } + + @Test + public void testUnescapeAll_ComplexStrings() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertEquals("text'text", entities.unescapeAll("text'text")); + assertEquals("'€", entities.unescapeAll("'€")); + assertEquals("/ˈʊmlaʊt/", entities.unescapeAll("/ˈʊmlaʊt/")); + assertEquals("Kühlflüssigkeitsüberlaufbehälter", entities.unescapeAll("Kühlflüssigkeitsüberlaufbehälter")); + assertEquals(" [Viele Kühe machen Mühe!] ", entities.unescapeAll(" [Viele Kühe machen Mühe!] ")); + assertEquals("🤸🏾‍♀️", entities.unescapeAll("🤸🏾‍♀️")); + assertEquals("🏴‍☠️", entities.unescapeAll("🏴‍☠️")); + assertEquals("" + + "Face with Tears of Joy Emoji: \uD83D\uDE02\n" + + "Party Popper Emoji: \uD83C\uDF89\n" + + "Man Technologist: Medium-light Skin Tone: \uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBB", + entities.unescapeAll("" + + "Face with Tears of Joy Emoji: 😂\n" + + "Party Popper Emoji: 🎉\n" + + "Man Technologist: Medium-light Skin Tone: 👨🏼‍💻")); + assertEquals("
Zürich

", entities.unescapeAll("
Zürich

")); + assertEquals("Qu’est-ce que c’est?", entities.unescapeAll("Qu’est-ce que c’est?")); + } + + @Test + public void testUnescapeAll_Invalid() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + assertEquals("ä &doesNotExist; é", entities.unescapeAll("ä &doesNotExist; é")); // invalid entity name + assertEquals("a   b", entities.unescapeAll("a   b")); // not terminated by ';' + assertEquals("drag&drop", entities.unescapeAll("drag&drop")); // not an entity name + assertEquals("drag & drop", entities.unescapeAll("drag & drop")); // not an entity name + assertEquals("& auml;", entities.unescapeAll("& auml;")); // space after '&' + assertEquals("ä ;", entities.unescapeAll("ä ;")); // space before ';' + assertEquals("ä,", entities.unescapeAll("ä,")); // not terminated by ';' + assertEquals("�", entities.unescapeAll("�")); // not a valid code point + assertEquals("&39;", entities.unescapeAll("&39;")); // missing '#' + assertEquals("&#a0;", entities.unescapeAll("&#a0;")); // missing 'x' + } + + @Test + public void testDecodeNamedCharacterReference() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + + assertEquals("ä", entities.decodeNamedCharacterReference("ä")); + assertEquals("'", entities.decodeNamedCharacterReference("'")); + + assertNull(entities.decodeNamedCharacterReference(null)); + assertNull(entities.decodeNamedCharacterReference("")); + assertNull(entities.decodeNamedCharacterReference("&doesNotExist;")); + assertNull(entities.decodeNamedCharacterReference("äü")); + assertNull(entities.decodeNamedCharacterReference("auml")); + assertNull(entities.decodeNamedCharacterReference("'")); + } + + @Test + public void testDecodeNumericCharacterReference() { + final HtmlEntities entities = BEANS.get(HtmlEntities.class); + + assertEquals("ä", entities.decodeNumericCharacterReference("ä")); + assertEquals("ä", entities.decodeNumericCharacterReference("ä")); + assertEquals("'", entities.decodeNumericCharacterReference("'")); + assertEquals("'", entities.decodeNumericCharacterReference("'")); + + assertNull(entities.decodeNumericCharacterReference(null)); + assertNull(entities.decodeNumericCharacterReference("")); + assertNull(entities.decodeNumericCharacterReference("&doesNotExist;")); + assertNull(entities.decodeNumericCharacterReference("ä'")); + assertNull(entities.decodeNumericCharacterReference("#39;")); + assertNull(entities.decodeNumericCharacterReference("&#E4;")); + assertNull(entities.decodeNumericCharacterReference("&#hello;")); + assertNull(entities.decodeNumericCharacterReference("�")); + } +} diff --git a/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlHelperTest.java b/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlHelperTest.java index dad3aa36793..26381a45169 100644 --- a/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlHelperTest.java +++ b/org.eclipse.scout.rt.platform.test/src/test/java/org/eclipse/scout/rt/platform/html/HtmlHelperTest.java @@ -20,6 +20,7 @@ /** * @since 5.2 */ +@SuppressWarnings({"SpellCheckingInspection", "ConcatenationWithEmptyString", "TextBlockMigration", "UnnecessaryUnicodeEscape"}) @RunWith(PlatformTestRunner.class) public class HtmlHelperTest { @@ -40,12 +41,13 @@ public void testToPlainText() { assertEquals("one two", helper.toPlainText("one\r\ntwo")); assertEquals("onetwo", helper.toPlainText("one\rtwo")); assertEquals("hell<", helper.toPlainText("hell<")); - assertEquals("one two", helper.toPlainText("one   two")); - assertEquals("hellö", helper.toPlainText("hellö")); // [?] not all entities are replaced + assertEquals("hellö", helper.toPlainText("hellö")); assertEquals("one\ttwo", helper.toPlainText("one two")); assertEquals("one \t two", helper.toPlainText("one two")); assertEquals("one\ttwo", helper.toPlainText("one" + StringUtility.HTML_ENCODED_TAB + "two")); - assertEquals("one two", helper.toPlainText("one    two")); // HTML5 spec allows for mixed case hex values. + assertEquals("one two", helper.toPlainText("one two")); + assertEquals("one\u00A0\u00A0 two", helper.toPlainText("one   two")); + assertEquals("one\u00A0\u00A0\u00A0\u00A0two", helper.toPlainText("one    two")); // HTML5 spec allows for mixed case hex values. assertEquals("one\t\ttwo", helper.toPlainText("one two")); // HTML5 spec allows for mixed case hex values. assertEquals("Unterraschungsfeier", helper.toPlainText("
Unterraschungsfeier
")); // Formating tags within a single word. assertEquals("Header 1\nHeader 2", helper.toPlainText("

Header 1

Header 2

")); // Headers @@ -71,15 +73,27 @@ public void testToPlainText() { assertEquals("one & two", helper.toPlainText("one & two")); assertEquals("one & two", helper.toPlainText("one & two")); assertEquals("one & two\nthree", helper.toPlainText("one & twothree")); // [?] invalid , has no end tag + assertEquals("&", helper.toPlainText("&amp;")); + assertEquals("ä", helper.toPlainText("&auml;")); + assertEquals("B", helper.toPlainText("&#x42;")); assertEquals("three", helper.toPlainText("one & twothree")); assertEquals("Unterraschungsfeier", helper.toPlainText("
Unterraschungsfeier
")); + assertEquals("Z1\nZ2", helper.toPlainText("

Z1

Z2

")); + assertEquals("Guten Tag\n\n\u00A0\n\nZeile 2", helper.toPlainText("

Guten Tag

 

Zeile 2

")); + assertEquals("’", helper.toPlainText("’")); + assertEquals("+", helper.toPlainText("+")); + assertEquals("+", helper.toPlainText("+")); + assertEquals("+", helper.toPlainText("+")); + assertEquals("+s;", helper.toPlainText("+s;")); // Line breaks assertEquals("a\nb", helper.toPlainText("a
b")); assertEquals("a\nb", helper.toPlainText("a
b")); assertEquals("a\nb", helper.toPlainText("a
b")); - assertEquals("a \nb", helper.toPlainText("a 
b")); + assertEquals("a\u00A0\nb", helper.toPlainText("a 
b")); + assertEquals("a \u00A0\nb", helper.toPlainText("a  
b ")); + assertEquals("a\n\u00A0\u00A0b\n\u00A0\u00A0\u00A0\u00A0c", helper.toPlainText("a
  b
    c")); assertEquals("line", helper.toPlainText("
line")); // [?] assertEquals("line1\nx\nline2", helper.toPlainText("

line1
\nx

line2

")); assertEquals("line1 x\nline2", helper.toPlainText("
line1\nx
line2
")); // [?] diff --git a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample.html b/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample.html deleted file mode 100644 index b086dca8476..00000000000 --- a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample.html +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - -Lorem - Ipsum - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent
Nam: * Tempor
Claritas: * Voluptaria/vide
Facilisi: * Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

-Est ea phaedrum , mea tation voluptatum contentiones no.
-
-Nec te omittam qualisque, delectus periculis argumentum est no. -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent

Nam: *

Tempor
Claritas: *

Voluptaria

/vide
Facilisi: * Te mel

modo meis

ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

Lorem: -http://www.eclipse.org/legal/

- - diff --git a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyAndHeadTagMissing.html b/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyAndHeadTagMissing.html deleted file mode 100644 index d2b2cd8501d..00000000000 --- a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyAndHeadTagMissing.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - - -Lorem - Ipsum - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent
Nam: * Tempor
Claritas: * Voluptaria/vide
Facilisi: * Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

-Est ea phaedrum , mea tation voluptatum contentiones no.
-
-Nec te omittam qualisque, delectus periculis argumentum est no. -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent

Nam: *

Tempor
Claritas: *

Voluptaria

/vide
Facilisi: * Te mel

modo meis

ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

Lorem: -http://www.eclipse.org/legal/

- diff --git a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyTagMissing.html b/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyTagMissing.html deleted file mode 100644 index 3d99a904c6a..00000000000 --- a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_bodyTagMissing.html +++ /dev/null @@ -1,239 +0,0 @@ - - - - - - -Lorem - Ipsum - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent
Nam: * Tempor
Claritas: * Voluptaria/vide
Facilisi: * Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

-Est ea phaedrum , mea tation voluptatum contentiones no.
-
-Nec te omittam qualisque, delectus periculis argumentum est no. -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent

Nam: *

Tempor
Claritas: *

Voluptaria

/vide
Facilisi: * Te mel

modo meis

ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

Lorem: -http://www.eclipse.org/legal/

- diff --git a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_headTagMissing.html b/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_headTagMissing.html deleted file mode 100644 index 5d01ec41166..00000000000 --- a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlExample_headTagMissing.html +++ /dev/null @@ -1,239 +0,0 @@ - - - - - -Lorem - Ipsum - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent
Nam: * Tempor
Claritas: * Voluptaria/vide
Facilisi: * Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

-Est ea phaedrum , mea tation voluptatum contentiones no.
-
-Nec te omittam qualisque, delectus periculis argumentum est no. -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Ut wisi enim ad minim veniam
TypiHabent

Nam: *

Tempor
Claritas: *

Voluptaria

/vide
Facilisi: * Te mel

modo meis

ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu.
-
-Sonet aliquam suscipit
-Justo putent usu
-Ex. Quidam
 
Luptatum
QuodsiMenandri
Explicari: * Nonumes
Rationibus: * Constituam
Noster: * Pri
Posse / Facer: Iisque has 26
MEA: 1598
Malis: Perpetua Duo
Utamur: Graece
Cum: * 0265134987
Ridens: * x.postulant@referrentur.org
 
-

Lorem: -http://www.eclipse.org/legal/

- - diff --git a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlToPlain_expectedResult.txt b/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlToPlain_expectedResult.txt deleted file mode 100644 index 981802c2f74..00000000000 --- a/org.eclipse.scout.rt.platform.test/src/test/resources/org/eclipse/scout/rt/platform/util/htmlToPlain_expectedResult.txt +++ /dev/null @@ -1,50 +0,0 @@ -Ut wisi enim ad minim veniam | -Typi | Habent | -Nam: * | Tempor | -Claritas: * | Voluptaria/vide | -Facilisi: * | Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu. - -Sonet aliquam suscipit -Justo putent usu -Ex. Quidam | - | -Luptatum | -Quodsi | Menandri | -Explicari: * | Nonumes | -Rationibus: * | Constituam | -Noster: * | Pri | -Posse / Facer: | Iisque has 26 | -MEA: | 1598 | -Malis: | Perpetua Duo | -Utamur: | Graece | -Cum: * | 0265134987 | -Ridens: * | x.postulant@referrentur.org | - | - -Est ea phaedrum , mea tation voluptatum contentiones no. - -Nec te omittam qualisque, delectus periculis argumentum est no. -Ut wisi enim ad minim veniam | -Typi | Habent | -Nam: * | Tempor | -Claritas: * | Voluptaria/vide | -Facilisi: * | Te mel modo meis ullamcorper. Eos verear dissentiet in, eam dico solum accusamus eu. - -Sonet aliquam suscipit -Justo putent usu -Ex. Quidam | - | -Luptatum | -Quodsi | Menandri | -Explicari: * | Nonumes | -Rationibus: * | Constituam | -Noster: * | Pri | -Posse / Facer: | Iisque has 26 | -MEA: | 1598 | -Malis: | Perpetua Duo | -Utamur: | Graece | -Cum: * | 0265134987 | -Ridens: * | x.postulant@referrentur.org | - | - -Lorem: http://www.eclipse.org/legal/ diff --git a/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlEntities.java b/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlEntities.java new file mode 100644 index 00000000000..50bd71d8d67 --- /dev/null +++ b/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlEntities.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2010, 2025 BSI Business Systems Integration AG + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.eclipse.scout.rt.platform.html; + +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.eclipse.scout.rt.platform.ApplicationScoped; +import org.eclipse.scout.rt.platform.exception.ProcessingException; +import org.eclipse.scout.rt.platform.util.IOUtility; +import org.json.JSONObject; + +@ApplicationScoped +public class HtmlEntities { + + /** + * Key = Escaped character name (e.g. {@code "ö"}) + *
+ * Value = Unescaped character (e.g. {@code "ö"}) + */ + private final Map m_namedCharacterReferenceMap = new HashMap<>(); + + public HtmlEntities() { + loadNamedCharacterReferenceMap(); + } + + protected void loadNamedCharacterReferenceMap() { + String content; + try (InputStream is = HtmlEntities.class.getResourceAsStream("entities.json")) { + content = IOUtility.readStringUTF8(is); + } + catch (Exception e) { + throw new ProcessingException("Unable to read entities.json", e); + } + JSONObject json = new JSONObject(content); + for (String key : json.keySet()) { + m_namedCharacterReferenceMap.put(key, json.getJSONObject(key).getString("characters")); + } + } + + protected Map getNamedCharacterReferenceMap() { + return m_namedCharacterReferenceMap; + } + + /** + * Converts every valid character reference + * (named or numeric) in the given string to its corresponding Unicode character and returns the result. Invalid or incomplete + * references are left unchanged. If the given string is {@code null}, {@code null} is returned. + */ + public String unescapeAll(String s) { + if (s == null) { + return null; + } + StringBuilder sb = new StringBuilder(s); + int start = 0; + + while (start < sb.length()) { + start = sb.indexOf("&", start); + if (start == -1) { + break; + } + int end = sb.indexOf(";", start); + if (end == -1) { + break; + } + + String encoded = sb.substring(start, end + 1); + String decoded = decodeNamedCharacterReference(encoded); // Named character reference + if (decoded == null) { + decoded = decodeNumericCharacterReference(encoded); // Numeric character reference + } + if (decoded == null) { + start = end + 1; + } + else { + sb.replace(start, end + 1, decoded); + start = start + decoded.length(); + } + } + + return sb.toString(); + } + + /** + * Converts a single named character reference of the form "&name;" to its corresponding Unicode character. + * If no mapping exists for the given string, {@code null} is returned. + */ + public String decodeNamedCharacterReference(String ref) { + return m_namedCharacterReferenceMap.get(ref); + } + + /** + * Converts a single numeric character reference of + * the form "&#decimalValue;" or "&#xhexValue;" to its corresponding Unicode character. + * If the given string does not match the format or the value is not a valid code point, {@code null} is returned. + */ + public String decodeNumericCharacterReference(String ref) { + try { + if (ref.startsWith("&#x") || ref.startsWith("&#X")) { + String hex = ref.substring(3, ref.length() - 1); + return Character.toString(Integer.parseInt(hex, 16)); + } + if (ref.startsWith("&#")) { + String decimal = ref.substring(2, ref.length() - 1); + return Character.toString(Integer.parseInt(decimal)); + } + } + catch (Exception e) { + // invalid character reference -> ignore (don't replace entity) + } + return null; + } +} diff --git a/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlHelper.java b/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlHelper.java index 41fab5c21e9..227c8170fdb 100644 --- a/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlHelper.java +++ b/org.eclipse.scout.rt.platform/src/main/java/org/eclipse/scout/rt/platform/html/HtmlHelper.java @@ -9,10 +9,10 @@ */ package org.eclipse.scout.rt.platform.html; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.scout.rt.platform.ApplicationScoped; +import org.eclipse.scout.rt.platform.BEANS; import org.eclipse.scout.rt.platform.util.StringUtility; /** @@ -30,8 +30,6 @@ public class HtmlHelper { private static final Pattern HTML_COMMENT = Pattern.compile("