Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion eclipse-scout-core/src/encoder/PlainTextEncoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ export interface PlainTextEncoderOptions {
compact?: boolean;

/**
* Calls string.trim(). White space at the beginning and the end of the text gets removed. Default is false.
* Calls string.trim(). Empty lines at the beginning and the end of the text get removed. Default is false.
* Spaces are always removed.
*/
trim?: boolean;

Expand Down
29 changes: 29 additions & 0 deletions eclipse-scout-core/test/encoder/PlainTextEncoderSpec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ describe('PlainTextEncoder', () => {
let encoder = new PlainTextEncoder();

it('converts HTML to plain text', () => {
expect(encoder.encode(null)).toBe(null);
expect(encoder.encode('')).toBe('');

let htmlText = '<b>hello</b>';
Expand Down Expand Up @@ -113,6 +114,15 @@ describe('PlainTextEncoder', () => {

it('removes leading and trailing newlines if configured', () => {
let htmlText = '\n\nHello!\n\n';
expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n');
expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!');

htmlText = '\n\n Hello! \n\n';
expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n');
expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!');

htmlText = ' \n \n Hello! \n \n ';
expect(encoder.encode(htmlText, {trim: false})).toBe('\n\nHello!\n\n');
expect(encoder.encode(htmlText, {trim: true})).toBe('Hello!');
});

Expand Down Expand Up @@ -259,4 +269,23 @@ describe('PlainTextEncoder', () => {
htmlText = '<\tabc attr=\'someText\'>';
expect(encoder.removeAttributeValues(htmlText)).toBe('<\tabc attr=\'someText\'>');
});

it('trims lines, but preserves other white-space', () => {
expect(encoder.encode('hello')).toBe('hello');
expect(encoder.encode('one\ntwo')).toBe('one\ntwo');
expect(encoder.encode('one\r\ntwo')).toBe('one\ntwo');
expect(encoder.encode('one\rtwo')).toBe('one\ntwo');
expect(encoder.encode('one two')).toBe('one two');
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these special spaces? in this case may add a comment? otherwise I would expect that duplicated spaces are deleted..

expect(encoder.encode('one&nbsp;&nbsp; two')).toBe('one\u00a0\u00a0 two');
expect(encoder.encode('one &#9; two')).toBe('one \t two');
expect(encoder.encode(' one \n two \n three ')).toBe('one\ntwo\nthree');
expect(encoder.encode('a\n&nbsp;&nbsp;b\n&nbsp;&nbsp;&nbsp;&nbsp;c')).toBe('a\n\u00A0\u00A0b\n\u00A0\u00A0\u00A0\u00A0c');
});

it('decodes special characters', () => {
expect(encoder.encode('&amp;amp;')).toBe('&amp;');
expect(encoder.encode('&amp;&amp;amp;amp;')).toBe('&&amp;amp;');
expect(encoder.encode('Hell&ouml;!')).toBe('Hellö!');
expect(encoder.encode('a&lt;br&gt;b')).toBe('a<br>b');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Copyright (c) 2010, 2025 BSI Business Systems Integration AG
*
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.eclipse.scout.rt.platform.html;

import static org.junit.Assert.*;

import org.eclipse.scout.rt.platform.BEANS;
import org.eclipse.scout.rt.testing.platform.runner.PlatformTestRunner;
import org.junit.Test;
import org.junit.runner.RunWith;

@SuppressWarnings({"ConcatenationWithEmptyString", "SpellCheckingInspection", "TextBlockMigration"})
@RunWith(PlatformTestRunner.class)
public class HtmlEntitiesTest {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may add some of these tests to ts spec? (plain text encoder should do same transformations)


@Test
public void testUnescapeAll_Empty() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertNull(entities.unescapeAll(null));
assertEquals("", entities.unescapeAll(""));
assertEquals(" ", entities.unescapeAll(" "));
}

@Test
public void testUnescapeAll_Named() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertEquals("ß", entities.unescapeAll("&szlig;"));
assertEquals("Ü", entities.unescapeAll("&Uuml;"));
assertEquals("&", entities.unescapeAll("&amp;"));
assertEquals("A&&Z", entities.unescapeAll("A&amp;&amp;Z"));
assertEquals("&auml;", entities.unescapeAll("&amp;auml;"));
assertEquals("auml;", entities.unescapeAll("auml;"));
assertEquals("drag&drop", entities.unescapeAll("drag&drop"));
}

@Test
public void testUnescapeAll_NumericDecimal() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertEquals("'", entities.unescapeAll("&#39;"));
assertEquals("€", entities.unescapeAll("&#8364;"));
assertEquals("A€€Z", entities.unescapeAll("A&#8364;&#8364;Z"));
}

@Test
public void testUnescapeAll_NumericHex() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertEquals("B", entities.unescapeAll("&#x42;"));
assertEquals("B", entities.unescapeAll("&#X42;"));
assertEquals("?", entities.unescapeAll("&#x3F;"));
assertEquals("?", entities.unescapeAll("&#X3f;"));
assertEquals("🦕", entities.unescapeAll("&#x1F995;"));
assertEquals("A🦕🦕Z", entities.unescapeAll("A&#x1F995;&#x1F995;Z"));
}

@Test
public void testUnescapeAll_ComplexStrings() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertEquals("text'text", entities.unescapeAll("text&#39;text"));
assertEquals("'€", entities.unescapeAll("&#39;&#8364;"));
assertEquals("/ˈʊmlaʊt/", entities.unescapeAll("/&#x02C8;&#x028A;mla&#x028A;t/"));
assertEquals("Kühlflüssigkeitsüberlaufbehälter", entities.unescapeAll("K&uuml;hlfl&uuml;ssigkeits&uuml;berlaufbeh&auml;lter"));
assertEquals(" [Viele Kühe machen Mühe!] ", entities.unescapeAll(" &#X5B;&#86;ie&#x6c;e Kühe machen M&uuml;he&#x21;&#93; "));
assertEquals("🤸🏾‍♀️", entities.unescapeAll("&#x1F938;&#x1F3FE;&zwj;&#x2640;&#xFE0F;"));
assertEquals("🏴‍☠️", entities.unescapeAll("&#x1F3F4;&#x200D;&#x2620;&#xFE0F;"));
assertEquals(""
+ "Face with Tears of Joy Emoji: \uD83D\uDE02\n"
+ "Party Popper Emoji: \uD83C\uDF89\n"
+ "Man Technologist: Medium-light Skin Tone: \uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBB",
entities.unescapeAll(""
+ "Face with Tears of Joy Emoji: &#128514;\n"
+ "Party Popper Emoji: &#127881;\n"
+ "Man Technologist: Medium-light Skin Tone: &#128104;&#127996;&zwj;&#128187;"));
assertEquals("<body><div data-value=\"¡Hola!\">Zürich</div><hr></body>", entities.unescapeAll("<body><div data-value=\"&#161;Hola!\">Z&uuml;rich</div><hr></body>"));
assertEquals("Qu’est-ce que c’est?", entities.unescapeAll("Qu&#8217;est-ce que c&#8217;est?"));
}

@Test
public void testUnescapeAll_Invalid() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);
assertEquals("ä &doesNotExist; é", entities.unescapeAll("&auml; &doesNotExist; &eacute;")); // invalid entity name
assertEquals("a &nbsp b", entities.unescapeAll("a &nbsp b")); // not terminated by ';'
assertEquals("drag&drop", entities.unescapeAll("drag&drop")); // not an entity name
assertEquals("drag & drop", entities.unescapeAll("drag & drop")); // not an entity name
assertEquals("& auml;", entities.unescapeAll("& auml;")); // space after '&'
assertEquals("&auml ;", entities.unescapeAll("&auml ;")); // space before ';'
assertEquals("&auml,", entities.unescapeAll("&auml,")); // not terminated by ';'
assertEquals("&#1234567890;", entities.unescapeAll("&#1234567890;")); // not a valid code point
assertEquals("&39;", entities.unescapeAll("&39;")); // missing '#'
assertEquals("&#a0;", entities.unescapeAll("&#a0;")); // missing 'x'
}

@Test
public void testDecodeNamedCharacterReference() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);

assertEquals("ä", entities.decodeNamedCharacterReference("&auml;"));
assertEquals("'", entities.decodeNamedCharacterReference("&apos;"));

assertNull(entities.decodeNamedCharacterReference(null));
assertNull(entities.decodeNamedCharacterReference(""));
assertNull(entities.decodeNamedCharacterReference("&doesNotExist;"));
assertNull(entities.decodeNamedCharacterReference("&auml;&uuml;"));
assertNull(entities.decodeNamedCharacterReference("auml"));
assertNull(entities.decodeNamedCharacterReference("&#39;"));
}

@Test
public void testDecodeNumericCharacterReference() {
final HtmlEntities entities = BEANS.get(HtmlEntities.class);

assertEquals("ä", entities.decodeNumericCharacterReference("&#228;"));
assertEquals("ä", entities.decodeNumericCharacterReference("&#xE4;"));
assertEquals("'", entities.decodeNumericCharacterReference("&#39;"));
assertEquals("'", entities.decodeNumericCharacterReference("&#x27;"));

assertNull(entities.decodeNumericCharacterReference(null));
assertNull(entities.decodeNumericCharacterReference(""));
assertNull(entities.decodeNumericCharacterReference("&doesNotExist;"));
assertNull(entities.decodeNumericCharacterReference("&#228;&#39;"));
assertNull(entities.decodeNumericCharacterReference("#39;"));
assertNull(entities.decodeNumericCharacterReference("&#E4;"));
assertNull(entities.decodeNumericCharacterReference("&#hello;"));
assertNull(entities.decodeNumericCharacterReference("&#1234567890;"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
/**
* @since 5.2
*/
@SuppressWarnings({"SpellCheckingInspection", "ConcatenationWithEmptyString", "TextBlockMigration", "UnnecessaryUnicodeEscape"})
@RunWith(PlatformTestRunner.class)
public class HtmlHelperTest {

Expand All @@ -40,12 +41,13 @@ public void testToPlainText() {
assertEquals("one two", helper.toPlainText("one\r\ntwo"));
assertEquals("onetwo", helper.toPlainText("one\rtwo"));
assertEquals("hell<", helper.toPlainText("hell&lt;"));
assertEquals("one two", helper.toPlainText("one&nbsp;&nbsp; two"));
assertEquals("hell&ouml;", helper.toPlainText("hell&ouml;")); // [?] not all entities are replaced
assertEquals("hellö", helper.toPlainText("hell&ouml;"));
assertEquals("one\ttwo", helper.toPlainText("one&#9;two"));
assertEquals("one \t two", helper.toPlainText("one &#9; two"));
assertEquals("one\ttwo", helper.toPlainText("one" + StringUtility.HTML_ENCODED_TAB + "two"));
assertEquals("one two", helper.toPlainText("one&#160;&#xa0;&#Xa0;&#xA0;two")); // HTML5 spec allows for mixed case hex values.
assertEquals("one two", helper.toPlainText("one two"));
assertEquals("one\u00A0\u00A0 two", helper.toPlainText("one&nbsp;&nbsp; two"));
assertEquals("one\u00A0\u00A0\u00A0\u00A0two", helper.toPlainText("one&#160;&#xa0;&#Xa0;&#xA0;two")); // HTML5 spec allows for mixed case hex values.
assertEquals("one\t\ttwo", helper.toPlainText("one&#x9;&#X9;two")); // HTML5 spec allows for mixed case hex values.
assertEquals("Unterraschungsfeier", helper.toPlainText("<div class=\"rte-line\">Unter<u>rasch</u>u<span class=\"rte-highlight\" style=\"background-color: rgb(255, 219, 157)\">ngs</span>feier<br></div>")); // Formating tags within a single word.
assertEquals("Header 1\nHeader 2", helper.toPlainText("<h1>Header 1</h1><h1>Header 2</h1>")); // Headers
Expand All @@ -71,15 +73,27 @@ public void testToPlainText() {
assertEquals("one & two", helper.toPlainText("<html><head>one & two</html>"));
assertEquals("one & two", helper.toPlainText("<html><head>one &amp; two</html>"));
assertEquals("one & two\nthree", helper.toPlainText("<html><head>one &amp; two</head><body>three</html>")); // [?] invalid <body>, has no end tag
assertEquals("&amp;", helper.toPlainText("&amp;amp;"));
assertEquals("&auml;", helper.toPlainText("&amp;auml;"));
assertEquals("&#x42;", helper.toPlainText("&amp;#x42;"));
assertEquals("three", helper.toPlainText("<html><head>one &amp; two</head><body>three</body></html>"));
assertEquals("Unterraschungsfeier",
helper.toPlainText("<html><body><div class=\"rte-line\">Unter<u>rasch</u>u<span class=\"rte-highlight\" style=\"background-color: rgb(255, 219, 157)\">ngs</span>feier<br></div></body></html>"));
assertEquals("Z1\nZ2", helper.toPlainText("<p>Z1</p><span></span><p>Z2</p>"));
assertEquals("Guten Tag\n\n\u00A0\n\nZeile 2", helper.toPlainText("<html><body><div><div><p>Guten Tag<o:p></o:p></p></div><div><p><o:p>&nbsp;</o:p></span></p></div><div><p><span>Zeile 2<o:p></o:p></span></p></div></div></body></html>"));
assertEquals("’", helper.toPlainText("&#8217;"));
assertEquals("+", helper.toPlainText("&#43;"));
assertEquals("+", helper.toPlainText("&#x2B;"));
assertEquals("+", helper.toPlainText("&#X2B;"));
assertEquals("&#X2Bs;", helper.toPlainText("&#X2Bs;"));

// Line breaks
assertEquals("a\nb", helper.toPlainText("a<br>b"));
assertEquals("a\nb", helper.toPlainText("a <br/> b"));
assertEquals("a\nb", helper.toPlainText("a <br/> b"));
assertEquals("a \nb", helper.toPlainText("a&nbsp;<br/> b"));
assertEquals("a\u00A0\nb", helper.toPlainText("a&nbsp;<br/> b"));
assertEquals("a \u00A0\nb", helper.toPlainText("a &nbsp; <br/> b "));
assertEquals("a\n\u00A0\u00A0b\n\u00A0\u00A0\u00A0\u00A0c", helper.toPlainText("a<br>&nbsp;&nbsp;b<br>&nbsp;&nbsp;&nbsp;&nbsp;c"));
assertEquals("line", helper.toPlainText("<br/>line")); // [?]
assertEquals("line1\nx\nline2", helper.toPlainText("<p>line1<br>\nx</p><p>line2</p>"));
assertEquals("line1 x\nline2", helper.toPlainText("<div>line1\nx</div><div>line2</div>")); // [?]
Expand Down
Loading