Skip to content

Commit

Permalink
danfickle#549 Fix soft hyphen characters are visible
Browse files Browse the repository at this point in the history
  • Loading branch information
schrader committed Oct 9, 2020
1 parent 83ae4e9 commit e8db381
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import com.openhtmltopdf.render.FSFont;
import com.openhtmltopdf.render.FSFontMetrics;
import com.openhtmltopdf.render.JustificationInfo;
import com.openhtmltopdf.util.OpenUtil;

import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable;
import static com.openhtmltopdf.util.OpenUtil.isCodePointPrintable;

public interface TextRenderer {

Expand All @@ -43,11 +43,7 @@ static String getEffectivePrintableString(String input) {
}

StringBuilder effective = new StringBuilder(input.length());
for (int i = 0; i < input.length(); i++) {
if (isCodePointPrintable(input.codePointAt(i))) {
effective.append(input.charAt(i));
}
}
input.codePoints().filter(OpenUtil::isCodePointPrintable).forEach(effective::appendCodePoint);

return effective.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ public static boolean isCodePointPrintable(int codePoint) {
*/
public static boolean areAllCharactersPrintable(String str) {
Objects.requireNonNull(str, "str");
return IntStream.range(0, str.length()).allMatch(idx -> isCodePointPrintable(str.codePointAt(idx)));
return str.codePoints().allMatch(OpenUtil::isCodePointPrintable);
}

public static Integer parseIntegerOrNull(String possibleInteger) {
try {
return Integer.parseInt(possibleInteger);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.openhtmltopdf.util;

import org.junit.Test;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.*;

/**
* @author schrader
*/
public class OpenUtilTest {

@Test
public void areAllCharactersPrintable() {
String text = "abc 123 \uD844\uDCC1";
boolean printable = OpenUtil.areAllCharactersPrintable(text);
assertThat(printable, is(true));
}

}
62 changes: 62 additions & 0 deletions openhtmltopdf-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
</license>
</licenses>

<properties>
<jmh.version>1.25.2</jmh.version>
<uberjar.name>benchmarks</uberjar.name>
</properties>

<dependencies>
<dependency>
<groupId>com.openhtmltopdf</groupId>
Expand Down Expand Up @@ -115,6 +120,18 @@
<version>1.13.1</version>
</dependency>

<!-- JMH Benchmarks -->
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down Expand Up @@ -154,6 +171,51 @@
</systemProperties>
</configuration>
</plugin>
<!-- workaround for: 'java.lang.IllegalStateException: endPosTable already set'.
see: https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8067747
Thrown while compiling JMH classes
-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</plugin>
<!-- Create an all-in JAR for running JMH benchmarks -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<finalName>${uberjar.name}</finalName>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.openjdk.jmh.Main</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<!--
Shading signed JARs will fail without this.
http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar
-->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.openhtmltopdf.benchmark;

import com.openhtmltopdf.util.Diagnostic;
import com.openhtmltopdf.util.XRLogger;

import java.util.logging.Level;

/**
* @author schrader
*/
class NoopLogger implements XRLogger {
@Override
public void log(String where, Level level, String msg) {

}

@Override
public void log(String where, Level level, String msg, Throwable th) {

}

@Override
public void setLevel(String logger, Level level) {

}

@Override
public boolean isLogLevelEnabled(Diagnostic diagnostic) {
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package com.openhtmltopdf.benchmark;

import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import com.openhtmltopdf.util.XRLog;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.util.Charsets;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
* @author schrader
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
public class RenderTextBenchmark {

public static void main(String[] args) throws Exception {
Options opt = new OptionsBuilder()
.include(RenderTextBenchmark.class.getSimpleName())
.forks(1)
.build();

new Runner(opt).run();
}

private Map<String, String> contents = new HashMap<>();

@Setup
public void setUp() {
XRLog.setLoggerImpl(new NoopLogger());

Arrays.asList(
"/benchmark/render-text-plain.html",
"/benchmark/render-text-soft-hyphens.html"
).forEach(path -> contents.put(path, readContent(path)));
}

@Benchmark
public void renderText_Plain() throws Exception {
runRenderer(contents.get("/benchmark/render-text-plain.html"));
}

@Benchmark
public void renderText_SoftHyphens() throws Exception {
runRenderer(contents.get("/benchmark/render-text-soft-hyphens.html"));
}

private void runRenderer(String html) throws IOException {
ByteArrayOutputStream actual = new ByteArrayOutputStream();

PdfRendererBuilder builder = new PdfRendererBuilder();
builder.withHtmlContent(html, null);
builder.toStream(actual);
builder.useFastMode();
builder.testMode(true);

builder.run();
}

private String readContent(String path) {
try (InputStream htmlIs = RenderTextBenchmark.class.getResourceAsStream(path)) {
byte[] htmlBytes = IOUtils.toByteArray(htmlIs);
return new String(htmlBytes, Charsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html PUBLIC
"-//OPENHTMLTOPDF//DOC XHTML Character Entities Only 1.0//EN" "">
<html>
<head>
<style>
</style>
</head>
<body>
<p>Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular.</p>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html PUBLIC
"-//OPENHTMLTOPDF//DOC XHTML Character Entities Only 1.0//EN" "">
<html>
<head>
<style>
</style>
</head>
<body>
<p>Li Eu­ro­pan lin­gues es mem­bres del sam fa­mi­lie. Lor se­pa­rat exi­sten­tie es un myth. Por sci­en­tie, mu­si­ca, spo­rt etc.</p>
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.openhtmltopdf.extend.OutputDevice;
import com.openhtmltopdf.extend.OutputDeviceGraphicsDrawer;
import com.openhtmltopdf.extend.StructureType;
import com.openhtmltopdf.extend.TextRenderer;
import com.openhtmltopdf.layout.SharedContext;
import com.openhtmltopdf.outputdevice.helper.FontResolverHelper;
import com.openhtmltopdf.pdfboxout.PdfBoxFontResolver.FontDescription;
Expand Down Expand Up @@ -395,22 +396,22 @@ private AffineTransform normalizeMatrix(AffineTransform current) {

public void drawString(String s, float x, float y, JustificationInfo info) {
PDFont firstFont = _font.getFontDescription().get(0).getFont();


String effectiveString = TextRenderer.getEffectivePrintableString(s);

// First check if the string contains printable characters only and
// will print with the current font entirely.
try {
if (areAllCharactersPrintable(s)) {
firstFont.encode(s);
// We got here, so all is good.
drawStringFast(s, x, y, info, _font.getFontDescription().get(0), _font.getSize2D());
return;
}
firstFont.getStringWidth(effectiveString);
// We got here, so all is good.
drawStringFast(effectiveString, x, y, info, _font.getFontDescription().get(0), _font.getSize2D());
return;
}
catch (Exception e) {
// Fallthrough, we'll have to process the string into font runs.
}

List<FontRun> fontRuns = PdfBoxTextRenderer.divideIntoFontRuns(_font, s, _reorderer);
List<FontRun> fontRuns = PdfBoxTextRenderer.divideIntoFontRuns(_font, effectiveString, _reorderer);

float xOffset = 0f;
for (FontRun run : fontRuns) {
Expand Down
Loading

0 comments on commit e8db381

Please sign in to comment.