diff --git a/NAPS2.App.Tests/Appium/ImportAndSaveTests.cs b/NAPS2.App.Tests/Appium/ImportAndSaveTests.cs
index d7707c2c0..3b7d914d2 100644
--- a/NAPS2.App.Tests/Appium/ImportAndSaveTests.cs
+++ b/NAPS2.App.Tests/Appium/ImportAndSaveTests.cs
@@ -52,7 +52,7 @@ public class ImportAndSaveTests : AppiumTests
         PdfAsserts.AssertContainsTextOnce("Page one.", path);
         PdfAsserts.AssertContainsTextOnce("Page two.", path);
         PdfAsserts.AssertContainsTextOnce("ADVERTISEMENT.", path);
-        PdfAsserts.AssertContainsTextOnce("Patch Code separator sheet geometry", path);
+        PdfAsserts.AssertContainsTextOnce("Sized for printing unscaled", path);
         AppTestHelper.AssertNoErrorLog(FolderPath);
     }
 
diff --git a/NAPS2.Sdk.Tests/ContextualTests.cs b/NAPS2.Sdk.Tests/ContextualTests.cs
index 3e876cbf4..c22d63b09 100644
--- a/NAPS2.Sdk.Tests/ContextualTests.cs
+++ b/NAPS2.Sdk.Tests/ContextualTests.cs
@@ -89,10 +89,13 @@ public class ContextualTests : IDisposable
                     var ocrImage = ImageContext.Load(path);
                     await Task.Delay(delay);
 
-                    OcrResult CreateOcrResult(string text) => new((0, 0, 100, 100),
-                        ImmutableList.Create(
+                    OcrResult CreateOcrResult(string text)
+                    {
+                        var list = ImmutableList.Create(
                             new OcrResultElement(text, ocrParams.LanguageCode!, false,
-                                (10, 10, 10, 10))));
+                                (10, 10, 10, 10), 0, 10, ImmutableList<OcrResultElement>.Empty));
+                        return new((0, 0, 100, 100), list, list);
+                    }
 
                     if (ocrTextByImage != null)
                     {
diff --git a/NAPS2.Sdk.Tests/Ocr/OcrRequestQueueTests.cs b/NAPS2.Sdk.Tests/Ocr/OcrRequestQueueTests.cs
index a11918fa9..2ee436300 100644
--- a/NAPS2.Sdk.Tests/Ocr/OcrRequestQueueTests.cs
+++ b/NAPS2.Sdk.Tests/Ocr/OcrRequestQueueTests.cs
@@ -375,8 +375,8 @@ public class OcrRequestQueueTests : ContextualTests
 
     private static OcrResult CreateOcrResult()
     {
-        var uniqueElement = new OcrResultElement(Guid.NewGuid().ToString(), "eng", false, (0, 0, 1, 1));
-        return new OcrResult((0, 0, 1, 1), ImmutableList<OcrResultElement>.Empty.Add(uniqueElement));
+        var uniqueElement = new OcrResultElement(Guid.NewGuid().ToString(), "eng", false, (0, 0, 1, 1), 0, 10, ImmutableList<OcrResultElement>.Empty);
+        return new OcrResult((0, 0, 1, 1), ImmutableList.Create(uniqueElement), ImmutableList.Create(uniqueElement));
     }
 
     private static OcrParams CreateOcrParams()
diff --git a/NAPS2.Sdk.Tests/Ocr/TesseractOcrEngineTests.cs b/NAPS2.Sdk.Tests/Ocr/TesseractOcrEngineTests.cs
index c7ad33b2c..4f86ae0dc 100644
--- a/NAPS2.Sdk.Tests/Ocr/TesseractOcrEngineTests.cs
+++ b/NAPS2.Sdk.Tests/Ocr/TesseractOcrEngineTests.cs
@@ -26,17 +26,17 @@ public class TesseractOcrEngineTests : ContextualTests
         var ocrParams = new OcrParams("eng", OcrMode.Fast, 0);
         var result = await _engine.ProcessImage(ScanningContext, _testImagePath, ocrParams, CancellationToken.None);
         Assert.NotNull(result);
-        Assert.NotEmpty(result.Elements);
-        foreach (var element in result.Elements)
+        Assert.NotEmpty(result.Words);
+        foreach (var element in result.Words)
         {
             Assert.Equal("eng", element.LanguageCode);
             Assert.False(element.RightToLeft);
         }
-        Assert.Equal("ADVERTISEMENT.", result.Elements[0].Text);
-        Assert.InRange(result.Elements[0].Bounds.x, 139, 149);
-        Assert.InRange(result.Elements[0].Bounds.y, 26, 36);
-        Assert.InRange(result.Elements[0].Bounds.w, 237, 247);
-        Assert.InRange(result.Elements[0].Bounds.h, 17, 27);
+        Assert.Equal("ADVERTISEMENT.", result.Words[0].Text);
+        Assert.InRange(result.Words[0].Bounds.x, 139, 149);
+        Assert.InRange(result.Words[0].Bounds.y, 26, 36);
+        Assert.InRange(result.Words[0].Bounds.w, 237, 247);
+        Assert.InRange(result.Words[0].Bounds.h, 17, 27);
     }
 
     [Fact]
@@ -44,13 +44,13 @@ public class TesseractOcrEngineTests : ContextualTests
     {
         var result = await _engine.ProcessImage(ScanningContext, _testImagePathHebrew, new OcrParams("heb", OcrMode.Fast, 0), CancellationToken.None);
         Assert.NotNull(result);
-        Assert.NotEmpty(result.Elements);
-        foreach (var element in result.Elements)
+        Assert.NotEmpty(result.Words);
+        foreach (var element in result.Words)
         {
             Assert.Equal("heb", element.LanguageCode);
             Assert.True(element.RightToLeft);
         }
-        Assert.Equal("הקדמת", result.Elements[0].Text);
+        Assert.Equal("הקדמת", result.Words[0].Text);
     }
 
     [Fact(Skip = "flaky")]
@@ -97,6 +97,6 @@ public class TesseractOcrEngineTests : ContextualTests
         var mode = OcrMode.Best;
         var result = await _engine.ProcessImage(ScanningContext, _testImagePath, new OcrParams("eng", mode, 0), CancellationToken.None);
         Assert.NotNull(result);
-        Assert.Equal("ADVERTISEMENT.", result.Elements[0].Text);
+        Assert.Equal("ADVERTISEMENT.", result.Words[0].Text);
     }
 }
\ No newline at end of file
diff --git a/NAPS2.Sdk/Ocr/OcrResult.cs b/NAPS2.Sdk/Ocr/OcrResult.cs
index add3eae2c..0eda6dee2 100644
--- a/NAPS2.Sdk/Ocr/OcrResult.cs
+++ b/NAPS2.Sdk/Ocr/OcrResult.cs
@@ -5,15 +5,14 @@ namespace NAPS2.Ocr;
 /// <summary>
 /// The result of an OCR request. Contains a set of elements that represent text segments. 
 /// </summary>
-public class OcrResult
+public class OcrResult(
+    (int x, int y, int w, int h) pageBounds,
+    ImmutableList<OcrResultElement> words,
+    ImmutableList<OcrResultElement> lines)
 {
-    public OcrResult((int x, int y, int w, int h) pageBounds, ImmutableList<OcrResultElement> elements)
-    {
-        PageBounds = pageBounds;
-        Elements = elements;
-    }
+    public (int x, int y, int w, int h) PageBounds { get; } = pageBounds;
 
-    public (int x, int y, int w, int h) PageBounds { get; }
+    public ImmutableList<OcrResultElement> Words { get; } = words;
 
-    public ImmutableList<OcrResultElement> Elements { get; }
+    public ImmutableList<OcrResultElement> Lines { get; } = lines;
 }
\ No newline at end of file
diff --git a/NAPS2.Sdk/Ocr/OcrResultElement.cs b/NAPS2.Sdk/Ocr/OcrResultElement.cs
index ddbedee3a..ad312367f 100644
--- a/NAPS2.Sdk/Ocr/OcrResultElement.cs
+++ b/NAPS2.Sdk/Ocr/OcrResultElement.cs
@@ -1,6 +1,15 @@
-﻿namespace NAPS2.Ocr;
+﻿using System.Collections.Immutable;
+
+namespace NAPS2.Ocr;
 
 /// <summary>
 /// A element in the result of an OCR request that represents a text segment.
 /// </summary>
-public record OcrResultElement(string Text, string LanguageCode, bool RightToLeft, (int x, int y, int w, int h) Bounds);
\ No newline at end of file
+public record OcrResultElement(
+    string Text,
+    string LanguageCode,
+    bool RightToLeft,
+    (int x, int y, int w, int h) Bounds,
+    int Baseline,
+    int FontSize,
+    ImmutableList<OcrResultElement> Children);
\ No newline at end of file
diff --git a/NAPS2.Sdk/Ocr/TesseractOcrEngine.cs b/NAPS2.Sdk/Ocr/TesseractOcrEngine.cs
index 17de9e249..5fa1cb0df 100644
--- a/NAPS2.Sdk/Ocr/TesseractOcrEngine.cs
+++ b/NAPS2.Sdk/Ocr/TesseractOcrEngine.cs
@@ -4,6 +4,7 @@ using System.Xml;
 using Microsoft.Extensions.Logging;
 using NAPS2.Scan;
 using NAPS2.Unmanaged;
+using Bounds = (int x, int y, int w, int h);
 
 namespace NAPS2.Ocr;
 
@@ -74,10 +75,11 @@ public class TesseractOcrEngine : IOcrEngine
             {
                 PreProcessImage(scanningContext, imagePath);
             }
+            var configVals = "-c tessedit_create_hocr=1 -c hocr_font_info=1";
             var startInfo = new ProcessStartInfo
             {
                 FileName = _tesseractPath,
-                Arguments = $"\"{imagePath}\" \"{tempHocrFilePath}\" -l {ocrParams.LanguageCode} hocr",
+                Arguments = $"\"{imagePath}\" \"{tempHocrFilePath}\" -l {ocrParams.LanguageCode} {configVals}",
                 UseShellExecute = false,
                 CreateNoWindow = true,
                 RedirectStandardOutput = true,
@@ -92,8 +94,6 @@ public class TesseractOcrEngine : IOcrEngine
                     languageDataPath = Path.Combine(languageDataPath, subfolder);
                 }
                 startInfo.EnvironmentVariables["TESSDATA_PREFIX"] = languageDataPath;
-                var tessdata = new DirectoryInfo(languageDataPath);
-                EnsureHocrConfigExists(tessdata);
             }
             var tesseractProcess = Process.Start(startInfo);
             if (tesseractProcess == null)
@@ -150,22 +150,7 @@ public class TesseractOcrEngine : IOcrEngine
                 }
 #endif
             XDocument hocrDocument = XDocument.Load(tempHocrFilePathWithExt);
-            var pageBounds = hocrDocument.Descendants()
-                .Where(x => x.Attributes("class").Any(y => y.Value == "ocr_page"))
-                .Select(x => GetBounds(x.Attribute("title")))
-                .First();
-            var elements = hocrDocument.Descendants()
-                .Where(x => x.Attributes("class").Any(y => y.Value == "ocrx_word"))
-                .Where(x => !string.IsNullOrWhiteSpace(x.Value))
-                .Select(x =>
-                {
-                    var text = x.Value;
-                    var lang = GetNearestAncestorAttribute(x, "lang") ?? "";
-                    var rtl = GetNearestAncestorAttribute(x, "dir") == "rtl";
-                    var bounds = GetBounds(x.Attribute("title"));
-                    return new OcrResultElement(text, lang, rtl, bounds);
-                }).ToImmutableList();
-            return new OcrResult(pageBounds, elements);
+            return CreateOcrResult(hocrDocument);
         }
         catch (XmlException e)
         {
@@ -211,57 +196,136 @@ public class TesseractOcrEngine : IOcrEngine
         }
     }
 
+    private OcrResult CreateOcrResult(XDocument hocrDocument)
+    {
+        var pageBounds = hocrDocument.Descendants()
+            .Where(element => GetClass(element) == "ocr_page")
+            .Select(GetBounds)
+            .First();
+        var words = new List<OcrResultElement>();
+        var lines = new List<OcrResultElement>();
+        foreach (var lineElement in hocrDocument.Descendants()
+                     .Where(element => GetClass(element) is "ocr_line" or "ocr_header" or "ocr_textfloat"))
+        {
+            var lineBounds = GetBounds(lineElement);
+            var lineAngle = GetTextAngle(lineElement);
+            bool isRotated = lineAngle is >= 45 or <= -45;
+            var baselineParams = GetBaselineParams(lineElement);
+            var lineWords = lineElement.Descendants()
+                .Where(element => GetClass(element) == "ocrx_word")
+                .Where(element => !string.IsNullOrWhiteSpace(element.Value))
+                .Select(wordElement =>
+                {
+                    var wordBounds = GetBounds(wordElement);
+                    return new OcrResultElement(
+                        wordElement.Value,
+                        GetNearestAncestorAttribute(wordElement, "lang") ?? "",
+                        GetNearestAncestorAttribute(wordElement, "dir") == "rtl",
+                        wordBounds,
+                        // TODO: Maybe we can properly handle rotated text?
+                        isRotated
+                            ? wordBounds.y + wordBounds.h
+                            : CalculateBaseline(baselineParams, lineBounds, wordBounds),
+                        GetFontSize(wordElement),
+                        ImmutableList<OcrResultElement>.Empty);
+                }).ToImmutableList();
+            if (lineWords.Count == 0) continue;
+            words.AddRange(lineWords);
+            lines.Add(lineWords[0] with
+            {
+                Text = string.Join(" ", lineWords.Select(x => x.Text)),
+                Bounds = lineBounds,
+                Baseline = CalculateBaseline(baselineParams, lineBounds, lineBounds),
+                Children = lineWords
+            });
+        }
+        return new OcrResult(pageBounds, words.ToImmutableList(), lines.ToImmutableList());
+    }
+
     private static string? GetNearestAncestorAttribute(XElement x, string attributeName)
     {
         var ancestor = x.AncestorsAndSelf().FirstOrDefault(x => x.Attribute(attributeName) != null);
         return ancestor?.Attribute(attributeName)?.Value;
     }
 
-    private void EnsureHocrConfigExists(DirectoryInfo tessdata)
+    private string? GetClass(XElement? element)
     {
-        try
-        {
-            var configDir = new DirectoryInfo(Path.Combine(tessdata.FullName, "configs"));
-            if (!configDir.Exists)
-            {
-                configDir.Create();
-            }
-            var hocrConfigFile = new FileInfo(Path.Combine(configDir.FullName, "hocr"));
-            if (!hocrConfigFile.Exists)
-            {
-                using var writer = hocrConfigFile.CreateText();
-                writer.Write("tessedit_create_hocr 1");
-            }
-        }
-        catch (Exception)
-        {
-            // Possibly contention over creating the file. As long as it's created assume everything is okay.
-            if (!File.Exists(Path.Combine(tessdata.FullName, "configs", "hocr")))
-            {
-                throw;
-            }
-        }
+        return element?.Attribute("class")?.Value;
     }
 
-    private (int x, int y, int w, int h) GetBounds(XAttribute? titleAttr)
+    private bool ParseData(XElement? element, string dataKey, int dataCount, out string[] parts)
     {
-        var bounds = (0, 0, 0, 0);
+        parts = Array.Empty<string>();
+        var titleAttr = element?.Attribute("title");
         if (titleAttr != null)
         {
             foreach (var param in titleAttr.Value.Split(';'))
             {
-                string[] parts = param.Trim().Split(' ');
-                if (parts.Length == 5 && parts[0] == "bbox")
+                parts = param.Trim().Split(' ');
+                if (parts[0] == dataKey && parts.Length == dataCount + 1)
                 {
-                    int x1 = int.Parse(parts[1]), y1 = int.Parse(parts[2]);
-                    int x2 = int.Parse(parts[3]), y2 = int.Parse(parts[4]);
-                    bounds = (x1, y1, x2 - x1, y2 - y1);
+                    return true;
                 }
             }
         }
+        return false;
+    }
+
+    private Bounds GetBounds(XElement? element)
+    {
+        var bounds = (0, 0, 0, 0);
+        if (ParseData(element, "bbox", 4, out string[] parts))
+        {
+            int x1 = int.Parse(parts[1]), y1 = int.Parse(parts[2]);
+            int x2 = int.Parse(parts[3]), y2 = int.Parse(parts[4]);
+            bounds = (x1, y1, x2 - x1, y2 - y1);
+        }
         return bounds;
     }
 
+    private int GetFontSize(XElement? element)
+    {
+        int fontSize = 0;
+        if (ParseData(element, "x_fsize", 1, out string[] parts))
+        {
+            fontSize = int.Parse(parts[1]);
+        }
+        return fontSize;
+    }
+
+    private (float m, float b) GetBaselineParams(XElement? element)
+    {
+        float m = 0;
+        float b = 0;
+        if (ParseData(element, "baseline", 2, out string[] parts))
+        {
+            m = float.Parse(parts[1]);
+            b = float.Parse(parts[2]);
+        }
+        return (m, b);
+    }
+
+    private float GetTextAngle(XElement? element)
+    {
+        float angle = 0;
+        if (ParseData(element, "textangle", 1, out string[] parts))
+        {
+            angle = float.Parse(parts[1]);
+        }
+        return angle;
+    }
+
+    private int CalculateBaseline((float m, float b) baselineParams, Bounds lineBounds, Bounds elementBounds)
+    {
+        // The line baseline is a linear equation (y=mx + b), so we calculate the word baseline from the
+        // word offset to the left side of the line.
+        float midpoint = elementBounds.x + elementBounds.w / 2f;
+        int relativeBaseline = (int) Math.Round(baselineParams.b +
+                                                baselineParams.m * (midpoint - lineBounds.x));
+        int absoluteBaseline = relativeBaseline + lineBounds.y + lineBounds.h;
+        return absoluteBaseline;
+    }
+
     // TODO: Consider adding back CanProcess, or otherwise using this code to get the languages from a system engine
 //     private void CheckIfInstalled()
 //     {
diff --git a/NAPS2.Sdk/Pdf/PdfExporter.cs b/NAPS2.Sdk/Pdf/PdfExporter.cs
index ebb837274..f73923fa9 100644
--- a/NAPS2.Sdk/Pdf/PdfExporter.cs
+++ b/NAPS2.Sdk/Pdf/PdfExporter.cs
@@ -7,12 +7,12 @@ using NAPS2.Ocr;
 using NAPS2.Pdf.Pdfium;
 using NAPS2.Scan;
 using PdfSharpCore.Drawing;
-using PdfSharpCore.Drawing.Layout;
 using PdfSharpCore.Pdf;
 using PdfSharpCore.Pdf.IO;
 using PdfSharpCore.Pdf.Security;
 using PdfDocument = PdfSharpCore.Pdf.PdfDocument;
 using PdfPage = PdfSharpCore.Pdf.PdfPage;
+using Alphabet = NAPS2.Pdf.PdfFontPicker.Alphabet;
 
 namespace NAPS2.Pdf;
 
@@ -398,20 +398,19 @@ public class PdfExporter
     private static void DrawOcrTextOnPage(PdfPage page, OcrResult ocrResult)
     {
 #if DEBUG && DEBUGOCR
-            using XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Append);
+        using XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Append);
 #else
         using XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend);
 #endif
-        var tf = new XTextFormatter(gfx);
-        foreach (var element in ocrResult.Elements)
+        foreach (var info in GetOcrTextToDraw(page, ocrResult, gfx))
         {
-            var info = GetTextDrawInfo(page, gfx, ocrResult, element);
-            if (info == null) continue;
+            var font = new XFont(info.FontFamily, info.FontSize, XFontStyle.Regular,
+                new XPdfFontOptions(PdfFontEncoding.Unicode));
 #if DEBUG && DEBUGOCR
             gfx.DrawRectangle(new XPen(XColor.FromArgb(255, 0, 0)), info.Bounds);
-            tf.DrawString(info.Text, info.Font, XBrushes.Blue, info.Bounds);
+            gfx.DrawString(info.Text, font, XBrushes.Blue, info.X, info.Y, XStringFormats.BaseLineLeft);
 #else
-            tf.DrawString(info.Text, info.Font, XBrushes.Transparent, info.Bounds);
+            gfx.DrawString(info.Text, font, XBrushes.Transparent, info.X, info.Y, XStringFormats.BaseLineLeft);
 #endif
         }
     }
@@ -420,13 +419,9 @@ public class PdfExporter
         Pdfium.PdfPage pdfiumPage, PdfiumFontSubsets fontSubsets, OcrResult ocrResult)
     {
         using XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend);
-        foreach (var element in ocrResult.Elements)
+        foreach (var info in GetOcrTextToDraw(page, ocrResult, gfx))
         {
-            var info = GetTextDrawInfo(page, gfx, ocrResult, element);
-            if (info == null) continue;
-
-            var fontName = PdfFontPicker.GetBestFont(element.LanguageCode);
-            var textObj = pdfiumDocument.NewText(fontSubsets[fontName], info.FontSize);
+            var textObj = pdfiumDocument.NewText(fontSubsets[info.FontFamily], info.FontSize);
 #if DEBUG && DEBUGOCR
             textObj.FillColor = (0, 0, 255, 255);
 #else
@@ -435,34 +430,78 @@ public class PdfExporter
             textObj.SetText(info.Text);
             // This ends up being slightly different alignment then the PdfSharp-based text. Maybe at some point we can
             // try to make them identical, although it's not perfect to begin with.
-            textObj.Matrix = new PdfMatrix(1, 0, 0, 1, info.X, (float) page.Height - (info.Y + info.TextHeight));
+            textObj.Matrix = new PdfMatrix(1, 0, 0, 1, info.X, (float) page.Height - info.Y);
             pdfiumPage.InsertObject(textObj);
         }
         pdfiumPage.GenerateContent();
     }
 
-    private static TextDrawInfo? GetTextDrawInfo(PdfPage page, XGraphics gfx, OcrResult ocrResult,
-        OcrResultElement element)
+    private static IEnumerable<TextDrawInfo> GetOcrTextToDraw(PdfPage page, OcrResult ocrResult, XGraphics gfx)
     {
-        if (string.IsNullOrEmpty(element.Text)) return null;
+        double hAdjust = page.Width / ocrResult.PageBounds.w;
+        double vAdjust = page.Height / ocrResult.PageBounds.h;
+        foreach (var line in ocrResult.Lines)
+        {
+            var lineFontFamily = PdfFontPicker.GetBestFont(line.LanguageCode);
+            var lineFontSize = line.FontSize;
+            // Chinese/Japanese/Korean languages don't need font size alignment as words are generally just 1 char
+            if (!IsCjk(line.LanguageCode))
+            {
+                // Only measure words with at least 3 characters to avoid noise
+                var eligibleWords = line.Children.Where(word => word.Text.Length >= 3).ToList();
+                if (eligibleWords.Count > 1)
+                {
+                    // In case Tesseract underestimated the font size, keep increasing it as long as all words are still
+                    // within their bounds.
+                    while (true)
+                    {
+                        var font = new XFont(lineFontFamily, lineFontSize + 1, XFontStyle.Regular);
+                        if (eligibleWords.All(word => gfx.MeasureString(word.Text, font).Width < word.Bounds.w * hAdjust))
+                        {
+                            lineFontSize++;
+                        }
+                        else
+                        {
+                            break;
+                        }
+                    }
+                }
+            }
+            for (int i = 0; i < line.Children.Count; i++)
+            {
+                var word = line.Children[i];
+                if (string.IsNullOrEmpty(word.Text)) continue;
 
-        var adjustedBounds = AdjustBounds(element.Bounds, (float) page.Width / ocrResult.PageBounds.w,
-            (float) page.Height / ocrResult.PageBounds.h);
-        var adjustedFontSize = CalculateFontSize(element, adjustedBounds, gfx);
-        // Special case to avoid accidentally recognizing big lines as dashes/underscores
-        if (adjustedFontSize > 100 && (element.Text == "-" || element.Text == "_")) return null;
-        var font = new XFont(PdfFontPicker.GetBestFont(element.LanguageCode), adjustedFontSize, XFontStyle.Regular,
-            new XPdfFontOptions(PdfFontEncoding.Unicode));
-        var adjustedTextSize = gfx.MeasureString(element.Text, font);
-        var verticalOffset = (adjustedBounds.Height - adjustedTextSize.Height) / 2;
-        var horizontalOffset = (adjustedBounds.Width - adjustedTextSize.Width) / 2;
-        adjustedBounds.Offset((float) horizontalOffset, (float) verticalOffset);
+                var rightBound = i + 1 < line.Children.Count ? line.Children[i + 1].Bounds.x : -1;
+                var adjustedRightBound = rightBound * hAdjust;
+                var adjustedX = word.Bounds.x * hAdjust;
+                var adjustedY = word.Baseline * vAdjust;
 
-        return new TextDrawInfo(
-            element.RightToLeft ? ReverseText(element.Text) : element.Text,
-            font,
-            adjustedBounds,
-            adjustedTextSize);
+                // We make sure there's enough distance between this word and the next to fit a space (" "), so that
+                // when you Ctrl+A and Ctrl+C in a PDF file, the words don't blend together
+                var wordFontSize = ClampFontSizeByRightBound(word, lineFontSize, adjustedX, adjustedRightBound, gfx);
+
+                // Special case to avoid accidentally recognizing big lines as dashes/underscores
+                if (wordFontSize > 100 && (word.Text == "-" || word.Text == "_")) continue;
+
+                yield return new TextDrawInfo(
+                    word.RightToLeft ? ReverseText(word.Text) : word.Text,
+                    lineFontFamily,
+                    wordFontSize,
+                    (int) Math.Round(adjustedX),
+                    (int) Math.Round(adjustedY));
+            }
+        }
+    }
+
+    private static bool IsCjk(string langCode)
+    {
+        var alphabet = PdfFontPicker.MapLanguageCodeToAlphabet(langCode);
+        return alphabet is
+            Alphabet.ChineseSimplified or
+            Alphabet.ChineseTraditional or
+            Alphabet.Japanese or
+            Alphabet.Korean;
     }
 
     private static string ReverseText(string text)
@@ -527,18 +566,34 @@ public class PdfExporter
         return (realWidth, realHeight);
     }
 
-    private static XRect AdjustBounds((int x, int y, int w, int h) bounds, float hAdjust, float vAdjust) =>
-        new XRect(bounds.x * hAdjust, bounds.y * vAdjust, bounds.w * hAdjust, bounds.h * vAdjust);
-
-    private static int CalculateFontSize(OcrResultElement element, XRect adjustedBounds, XGraphics gfx)
+    private static int ClampFontSizeByRightBound(OcrResultElement element, int initialFontSize, double x,
+        double rightBound,
+        XGraphics gfx)
     {
-        int fontSizeGuess = Math.Max(1, (int) (adjustedBounds.Height));
+        var fontSize = initialFontSize;
+        if (IsCjk(element.LanguageCode))
+        {
+            // No word separators so no need to ensure space between words
+            return fontSize;
+        }
+        if (rightBound < 0)
+        {
+            // No word to the right
+            return fontSize;
+        }
         var fontFamily = PdfFontPicker.GetBestFont(element.LanguageCode);
-        var measuredBoundsForGuess =
-            gfx.MeasureString(element.Text, new XFont(fontFamily, fontSizeGuess, XFontStyle.Regular));
-        double adjustmentFactor = adjustedBounds.Width / measuredBoundsForGuess.Width;
-        int adjustedFontSize = Math.Max(1, (int) Math.Floor(fontSizeGuess * adjustmentFactor));
-        return adjustedFontSize;
+        while (fontSize > 2)
+        {
+            var spaceWidth = gfx.MeasureString(" ", new XFont(fontFamily, fontSize, XFontStyle.Regular)).Width;
+            var measuredBounds =
+                gfx.MeasureString(element.Text, new XFont(fontFamily, fontSize, XFontStyle.Regular));
+            if (measuredBounds.Width + x <= rightBound - spaceWidth)
+            {
+                break;
+            }
+            fontSize--;
+        }
+        return fontSize;
     }
 
     private static bool IsPdfStorage(IImageStorage storage) => storage switch
@@ -548,16 +603,7 @@ public class PdfExporter
         _ => false
     };
 
-    private record TextDrawInfo(string Text, XFont Font, XRect Bounds, XSize TextSize)
-    {
-        public int FontSize => (int) Font.Size;
-        public float X => (float) Bounds.X;
-        public float Y => (float) Bounds.Y;
-        public float Width => (float) Bounds.Width;
-        public float Height => (float) Bounds.Height;
-        public float TextWidth => (float) TextSize.Width;
-        public float TextHeight => (float) TextSize.Height;
-    }
+    private record TextDrawInfo(string Text, string FontFamily, int FontSize, int X, int Y);
 
     private class PageExportState
     {
diff --git a/NAPS2.Sdk/Pdf/PdfiumFontSubsets.cs b/NAPS2.Sdk/Pdf/PdfiumFontSubsets.cs
index 8c6d6cb35..178d88231 100644
--- a/NAPS2.Sdk/Pdf/PdfiumFontSubsets.cs
+++ b/NAPS2.Sdk/Pdf/PdfiumFontSubsets.cs
@@ -14,7 +14,7 @@ internal class PdfiumFontSubsets : IDisposable
     public PdfiumFontSubsets(PdfDocument pdfiumDocument, IEnumerable<OcrResult?> ocrResults)
     {
         var fontSubsetBuilders = new Dictionary<string, FontSubsetBuilder>();
-        foreach (var element in ocrResults.WhereNotNull().SelectMany(result => result.Elements))
+        foreach (var element in ocrResults.WhereNotNull().SelectMany(result => result.Words))
         {
             // Map the OCR language to a font that supports its glyphs
             var fontName = PdfFontPicker.GetBestFont(element.LanguageCode);