Switch from PdfSharp to a PdfSharpCore fork, using custom rendering logic

As well as helping move towards cross-platform support, this results in massive performance gains (which can probably be improved even further). However there is some extra complexity that hopefully we can pare down later.
This commit is contained in:
Ben Olden-Cooligan 2022-08-03 19:44:51 -07:00
parent c0f50af463
commit 59223dfc7a
28 changed files with 355 additions and 182 deletions

View File

@ -104,7 +104,10 @@ public class GdiImage : IMemoryImage
public IMemoryImage Clone()
{
return new GdiImage((Bitmap) Bitmap.Clone());
var newImage = new GdiImage((Bitmap) Bitmap.Clone());
// TODO: We want to make original file format more consistent when copying around and transforming images
newImage._originalFileFormat = _originalFileFormat;
return newImage;
}
public IMemoryImage SafeClone()
@ -124,6 +127,7 @@ public class GdiImage : IMemoryImage
g.DrawImage(Bitmap, 0, 0, Width, Height);
var newImage = new GdiImage(newBitmap);
newImage.OriginalFileFormat = OriginalFileFormat;
newImage.SetResolution(HorizontalResolution, VerticalResolution);
return newImage;
}

View File

@ -133,8 +133,9 @@ public abstract class ImageContext
public abstract IMemoryImage Create(int width, int height, ImagePixelFormat pixelFormat);
public string SaveSmallestFormat(string pathWithoutExtension, IMemoryImage image, BitDepth bitDepth,
bool lossless, int quality, out ImageFileFormat imageFileFormat, bool encodeOnce = false)
bool lossless, int quality, out ImageFileFormat imageFileFormat)
{
// TODO: Should we save directly to the file?
var memoryStream = SaveSmallestFormatToMemoryStream(image, bitDepth, lossless, quality, out imageFileFormat);
var ext = imageFileFormat == ImageFileFormat.Png ? ".png" : ".jpg";
var path = pathWithoutExtension + ext;
@ -144,46 +145,25 @@ public abstract class ImageContext
}
public MemoryStream SaveSmallestFormatToMemoryStream(IMemoryImage image, BitDepth bitDepth, bool lossless,
int quality, out ImageFileFormat imageFileFormat, bool encodeOnce = false)
int quality, out ImageFileFormat imageFileFormat)
{
// Store the image in as little space as possible
if (image.PixelFormat == ImagePixelFormat.BW1)
var exportFormat = GetExportFormat(image, bitDepth, lossless);
if (exportFormat.FileFormat == ImageFileFormat.Png)
{
// Already encoded as 1-bit
imageFileFormat = ImageFileFormat.Png;
if (exportFormat.PixelFormat == ImagePixelFormat.BW1 && image.PixelFormat != ImagePixelFormat.BW1)
{
using var bwImage = PerformTransform(image.Clone(), new BlackWhiteTransform());
return bwImage.SaveToMemoryStream(ImageFileFormat.Png);
}
return image.SaveToMemoryStream(ImageFileFormat.Png);
}
if (bitDepth == BitDepth.BlackAndWhite)
if (exportFormat.FileFormat == ImageFileFormat.Jpeg)
{
// Convert to a 1-bit bitmap before saving to help compression
// This is lossless and takes up minimal storage (best of both worlds), so highQuality is irrelevant
using var bwImage = PerformTransform(image.Clone(), new BlackWhiteTransform());
imageFileFormat = ImageFileFormat.Png;
return bwImage.SaveToMemoryStream(ImageFileFormat.Png);
// Note that if a black and white image comes from native WIA, bitDepth is unknown,
// so the image will be png-encoded below instead of using a 1-bit bitmap
}
if (lossless || image.OriginalFileFormat == ImageFileFormat.Png)
{
// Store as PNG
// Lossless, but some images (color/grayscale) take up lots of storage
imageFileFormat = ImageFileFormat.Png;
return image.SaveToMemoryStream(ImageFileFormat.Png);
}
if (image.OriginalFileFormat == ImageFileFormat.Jpeg)
{
// Store as JPEG
// Since the image was originally in JPEG format, PNG is unlikely to have size benefits
imageFileFormat = ImageFileFormat.Jpeg;
return image.SaveToMemoryStream(ImageFileFormat.Jpeg);
return image.SaveToMemoryStream(ImageFileFormat.Jpeg, quality);
}
if (encodeOnce)
{
// If the caller doesn't want to do an extra encode for the chance of a smaller image, just go with jpeg
imageFileFormat = ImageFileFormat.Jpeg;
return image.SaveToMemoryStream(ImageFileFormat.Jpeg);
}
// Store as PNG/JPEG depending on which is smaller
// Save as PNG/JPEG depending on which is smaller
var pngEncoded = image.SaveToMemoryStream(ImageFileFormat.Png);
var jpegEncoded = image.SaveToMemoryStream(ImageFileFormat.Jpeg, quality);
if (pngEncoded.Length <= jpegEncoded.Length)
@ -196,4 +176,37 @@ public abstract class ImageContext
imageFileFormat = ImageFileFormat.Jpeg;
return jpegEncoded;
}
public ImageExportFormat GetExportFormat(IMemoryImage image, BitDepth bitDepth, bool lossless)
{
// Store the image in as little space as possible
if (image.PixelFormat == ImagePixelFormat.BW1)
{
// Already encoded as 1-bit
return new ImageExportFormat(ImageFileFormat.Png, ImagePixelFormat.BW1);
}
if (bitDepth == BitDepth.BlackAndWhite)
{
// Convert to a 1-bit bitmap before saving to help compression
// This is lossless and takes up minimal storage (best of both worlds), so highQuality is irrelevant
// Note that if a black and white image comes from native WIA, bitDepth is unknown,
// so the image will be png-encoded below instead of using a 1-bit bitmap
return new ImageExportFormat(ImageFileFormat.Png, ImagePixelFormat.BW1);
}
// TODO: Also for ARGB32? Or is OriginalFileFormat enough if we populate that more consistently?
if (lossless || image.OriginalFileFormat == ImageFileFormat.Png)
{
// Store as PNG
// Lossless, but some images (color/grayscale) take up lots of storage
return new ImageExportFormat(ImageFileFormat.Png, image.PixelFormat);
}
if (image.OriginalFileFormat == ImageFileFormat.Jpeg)
{
// Store as JPEG
// Since the image was originally in JPEG format, PNG is unlikely to have size benefits
return new ImageExportFormat(ImageFileFormat.Jpeg, ImagePixelFormat.RGB24);
}
// No inherent preference for Jpeg or Png, the caller can decide
return new ImageExportFormat(ImageFileFormat.Unspecified, ImagePixelFormat.RGB24);
}
}

View File

@ -0,0 +1,3 @@
namespace NAPS2.Images.Storage;
public record ImageExportFormat(ImageFileFormat FileFormat, ImagePixelFormat PixelFormat);

View File

@ -11,9 +11,8 @@
<Import Project="..\NAPS2.Setup\LibUsers.targets" />
<ItemGroup>
<Reference Include="PdfSharp, Version=1.50.4589.0, Culture=neutral, PublicKeyToken=f94615aa0424f9eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\NAPS2.Setup\lib\PdfSharp.dll</HintPath>
<Reference Include="PdfSharpCore">
<HintPath>..\NAPS2.Setup\lib\PdfSharpCore.dll</HintPath>
</Reference>
<ProjectReference Include="..\NAPS2.Sdk.Tests\NAPS2.Sdk.Tests.csproj" />

View File

@ -9,12 +9,12 @@ namespace NAPS2.Sdk.Tests.Asserts;
public static class ImageAsserts
{
// JPEG artifacts seem to consistently create a RMSE of about 2.5.
// TODO: Use PNG or some other way to do a precise comparison.
public const double GENERAL_RMSE_THRESHOLD = 3.5;
public const double NULL_RMSE_THRESHOLD = 0.6;
private const double RESOLUTION_THRESHOLD = 0.1;
// TODO: See if we can narrow this down
private const double RESOLUTION_THRESHOLD = 0.05;
private const double DIMENSIONS_THRESHOLD = 0.05;

View File

@ -2,8 +2,8 @@
using NAPS2.Images.Gdi;
using NAPS2.ImportExport.Pdf;
using NAPS2.ImportExport.Pdf.Pdfium;
using PdfSharp.Pdf.IO;
using PdfSharp.Pdf.Security;
using PdfSharpCore.Pdf.IO;
using PdfSharpCore.Pdf.Security;
using Xunit;
namespace NAPS2.Sdk.Tests.Asserts;
@ -103,7 +103,8 @@ public static class PdfAsserts
using var page = doc.GetPage(pageIndex);
using var obj = PdfiumImageExtractor.GetSingleImageObject(page);
Assert.NotNull(obj);
Assert.True(obj.HasFilters(filters));
Assert.True(obj.HasImageFilters(filters),
$"Expected filters: {string.Join(",", filters)}, actual: {string.Join(",", obj.GetImageFilters())}");
}
}
}

View File

@ -1,7 +1,6 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
@ -230,6 +229,16 @@ namespace NAPS2.Sdk.Tests {
}
}
/// <summary>
/// Looks up a localized resource of type System.Drawing.Bitmap.
/// </summary>
internal static System.Drawing.Bitmap color_image_mask {
get {
object obj = ResourceManager.GetObject("color_image_mask", resourceCulture);
return ((System.Drawing.Bitmap)(obj));
}
}
/// <summary>
/// Looks up a localized resource of type System.Drawing.Bitmap.
/// </summary>

View File

@ -211,6 +211,9 @@
<data name="color_image_alpha" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\color_image_alpha.png;System.Drawing.Bitmap, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a</value>
</data>
<data name="color_image_mask" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\color_image_mask.png;System.Drawing.Bitmap, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a</value>
</data>
<data name="color_image_png" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\color_image.png;System.Drawing.Bitmap, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a</value>
</data>

View File

@ -23,11 +23,12 @@ public class PdfATests : ContextualTests
Parallel.ForEach(testCases, testCase =>
{
using var image = CreateScannedImage();
pdfExporter.Export(testCase.fileName, new[] { image }, new PdfExportParams
var path = Path.Combine(FolderPath, testCase.fileName);
pdfExporter.Export(path, new[] { image }, new PdfExportParams
{
Compat = testCase.pdfCompat
}).Wait();
PdfAsserts.AssertCompliant(testCase.profile, testCase.fileName);
PdfAsserts.AssertCompliant(testCase.profile, path);
});
}
}

View File

@ -25,7 +25,7 @@ public class PdfBenchmarkTests : ContextualTests
{
var filePath = Path.Combine(FolderPath, "test");
using var image = ScanningContext.CreateProcessedImage(new GdiImage(ImageResources.color_image_huge),
BitDepth.Color, true, -1, Enumerable.Empty<Transform>());
BitDepth.Color, false, -1, Enumerable.Empty<Transform>());
var pdfExporter = new PdfExporter(ScanningContext);
await pdfExporter.Export(filePath + ".pdf", new[] { image }, new PdfExportParams());
@ -61,7 +61,7 @@ public class PdfBenchmarkTests : ContextualTests
{
var filePath = Path.Combine(FolderPath, "test");
using var image = ScanningContext.CreateProcessedImage(new GdiImage(ImageResources.color_image_huge),
BitDepth.Color, true, -1, Enumerable.Empty<Transform>());
BitDepth.Color, false, -1, Enumerable.Empty<Transform>());
var pdfExporter = new PdfiumPdfExporter(ScanningContext);
await pdfExporter.Export(filePath + ".pdf", new[] { image }, new PdfExportParams());

View File

@ -45,6 +45,40 @@ public class PdfExporterTests : ContextualTests
PdfAsserts.AssertImageFilter(filePath, 0, "FlateDecode");
}
[Theory]
[ClassData(typeof(StorageAwareTestData))]
public async Task ExportAlphaImage(StorageConfig storageConfig)
{
storageConfig.Apply(this);
var filePath = Path.Combine(FolderPath, "test.pdf");
using var image = ScanningContext.CreateProcessedImage(
new GdiImage(ImageResources.color_image_alpha), BitDepth.Color, false, -1);
await _exporter.Export(filePath, new[] { image }, new PdfExportParams());
// TODO: This assert is broken as pdfium rendering doesn't work for images with masks yet
// PdfAsserts.AssertImages(filePath, ImageResources.color_image_alpha);
PdfAsserts.AssertImageFilter(filePath, 0, "FlateDecode");
}
[Theory]
[ClassData(typeof(StorageAwareTestData))]
public async Task ExportMaskedImage(StorageConfig storageConfig)
{
storageConfig.Apply(this);
var filePath = Path.Combine(FolderPath, "test.pdf");
using var image = ScanningContext.CreateProcessedImage(
new GdiImage(ImageResources.color_image_mask), BitDepth.Color, false, -1);
await _exporter.Export(filePath, new[] { image }, new PdfExportParams());
// TODO: This assert is broken as pdfium rendering doesn't work for images with masks yet
// PdfAsserts.AssertImages(filePath, ImageResources.color_image_alpha);
PdfAsserts.AssertImageFilter(filePath, 0, "FlateDecode");
}
[Theory]
[ClassData(typeof(StorageAwareTestData))]
public async Task ExportBlackAndWhiteImage(StorageConfig storageConfig)

View File

@ -11,9 +11,8 @@
<Import Project="..\NAPS2.Setup\SdkUsers.targets" />
<ItemGroup>
<Reference Include="PdfSharp, Version=1.50.4589.0, Culture=neutral, PublicKeyToken=f94615aa0424f9eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\NAPS2.Setup\lib\PdfSharp.dll</HintPath>
<Reference Include="PdfSharpCore">
<HintPath>..\NAPS2.Setup\lib\PdfSharpCore.dll</HintPath>
</Reference>
<ProjectReference Include="..\NAPS2.Sdk\NAPS2.Sdk.csproj" />

Binary file not shown.

Before

Width:  |  Height:  |  Size: 403 KiB

After

Width:  |  Height:  |  Size: 410 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 403 KiB

View File

@ -1,6 +1,6 @@
using System.Text;
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharpCore.Pdf;
using PdfSharpCore.Pdf.Advanced;
namespace NAPS2.ImportExport.Pdf;
@ -45,7 +45,7 @@ public static class PdfAHelper
xmlns:pdfaid=""http://www.aiim.org/pdfa/ns/id/""
dc:format=""application/pdf""
pdf:Keywords=""{info.Keywords}""
pdf:Producer=""{PdfSharp.ProductVersionInfo.Producer}""
pdf:Producer=""{PdfSharpCore.ProductVersionInfo.Producer}""
xmp:CreateDate=""{info.CreationDate:yyyy'-'MM'-'dd'T'HH':'mm':'ssK}""
xmp:ModifyDate=""{info.ModificationDate:yyyy'-'MM'-'dd'T'HH':'mm':'ssK}""
xmp:CreatorTool=""{info.Creator}""

View File

@ -1,32 +1,21 @@
using System.Globalization;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using NAPS2.ImportExport.Pdf.Pdfium;
using NAPS2.Ocr;
using NAPS2.Scan;
using PdfSharp.Drawing;
using PdfSharp.Drawing.Layout;
using PdfSharp.Fonts;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
using PdfSharp.Pdf.Security;
using PdfDocument = PdfSharp.Pdf.PdfDocument;
using PdfPage = PdfSharp.Pdf.PdfPage;
using PdfSharpCore.Drawing;
using PdfSharpCore.Drawing.Layout;
using PdfSharpCore.Pdf;
using PdfSharpCore.Pdf.IO;
using PdfSharpCore.Pdf.Security;
using PdfDocument = PdfSharpCore.Pdf.PdfDocument;
using PdfPage = PdfSharpCore.Pdf.PdfPage;
namespace NAPS2.ImportExport.Pdf;
public class PdfExporter : IPdfExporter
{
static PdfExporter()
{
if (PlatformCompat.System.UseUnixFontResolver)
{
GlobalFontSettings.FontResolver = new UnixFontResolver();
}
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
private readonly ScanningContext _scanningContext;
public PdfExporter(ScanningContext scanningContext)
@ -60,64 +49,76 @@ public class PdfExporter : IPdfExporter
var imagePages = new List<PageExportState>();
var pdfPages = new List<PageExportState>();
int pageIndex = 0;
foreach (var image in images)
{
var pageState = new PageExportState(
image, pageIndex++, document, ocrEngine, ocrParams, cancelToken, exportParams.Compat);
// TODO: To improve our ability to passthrough, we could consider using Pdfium to apply the transform to
// the underlying PDF file. For example, doing color shifting on individual text + image objects, or
// applying matrix changes.
// TODO: We also can consider doing this even for scanned image transforms - e.g. for deskew, maybe
// rather than rasterize that, rely on the pdf to do the skew transform, which should render better at
// different scaling.
if (IsPdfStorage(image.Storage) && image.TransformState == TransformState.Empty)
{
pdfPages.Add(pageState);
}
else
{
imagePages.Add(pageState);
}
}
// TODO: Parallelize later
// TODO: Cancellation and progress reporting
var imagePagesPipeline = ocrEngine != null
? Pipeline.For(imagePages)
try
{
int pageIndex = 0;
foreach (var image in images)
{
var pageState = new PageExportState(
image, pageIndex++, document, ocrEngine, ocrParams, cancelToken, exportParams.Compat);
// TODO: To improve our ability to passthrough, we could consider using Pdfium to apply the transform to
// the underlying PDF file. For example, doing color shifting on individual text + image objects, or
// applying matrix changes.
// TODO: We also can consider doing this even for scanned image transforms - e.g. for deskew, maybe
// rather than rasterize that, rely on the pdf to do the skew transform, which should render better at
// different scaling.
if (IsPdfStorage(image.Storage) && image.TransformState == TransformState.Empty)
{
pdfPages.Add(pageState);
}
else
{
imagePages.Add(pageState);
}
}
// TODO: Parallelize later
// TODO: Cancellation and progress reporting
var imagePagesPipeline = ocrEngine != null
? Pipeline.For(imagePages)
.Step(RenderStep)
.Step(InitOcrStep)
.Step(WaitForOcrStep)
.Step(WriteToPdfSharpStep)
.Run()
: Pipeline.For(imagePages)
.Step(RenderStep)
.Step(WriteToPdfSharpStep)
.Run();
var pdfPagesPrePipeline = ocrEngine != null
? Pipeline.For(pdfPages).Step(CheckIfOcrNeededStep).Run()
: Task.FromResult(pdfPages);
await pdfPagesPrePipeline;
var pdfPagesOcrPipeline = Pipeline.For(pdfPages.Where(x => x.NeedsOcr))
.Step(RenderStep)
.Step(InitOcrStep)
.Step(WaitForOcrStep)
.Step(WriteToPdfSharpStep)
.Run()
: Pipeline.For(imagePages)
.Step(RenderStep)
.Step(WriteToPdfSharpStep)
.Run();
var pdfPagesPrePipeline = ocrEngine != null
? Pipeline.For(pdfPages).Step(CheckIfOcrNeededStep).Run()
: Task.FromResult(pdfPages);
await imagePagesPipeline;
await pdfPagesOcrPipeline;
await pdfPagesPrePipeline;
// TODO: Doing in memory as that's presumably faster than IO, but of course that's quite a bit of memory use potentially...
var stream = FinalizeAndSaveDocument(document, exportParams, out var placeholderPage);
var pdfPagesOcrPipeline = Pipeline.For(pdfPages.Where(x => x.NeedsOcr))
.Step(RenderStep)
.Step(InitOcrStep)
.Step(WaitForOcrStep)
.Step(WriteToPdfSharpStep)
.Run();
await imagePagesPipeline;
await pdfPagesOcrPipeline;
// TODO: Doing in memory as that's presumably faster than IO, but of course that's quite a bit of memory use potentially...
var stream = FinalizeAndSaveDocument(document, exportParams, out var placeholderPage);
var passthroughPages = pdfPages.Where(x => !x.NeedsOcr).ToList();
// TODO: We probably should just use PdfSharp to import the pages, as long as it supports it - just need some tests to cover the case when pdfsharp fails to load a pdf
// Although it makes me wonder if there are any cases where PdfSharp can mess up an imported file without an error...
MergePassthroughPages(stream, path, passthroughPages, exportParams, placeholderPage);
var passthroughPages = pdfPages.Where(x => !x.NeedsOcr).ToList();
// TODO: We probably should just use PdfSharp to import the pages, as long as it supports it - just need some tests to cover the case when pdfsharp fails to load a pdf
// Although it makes me wonder if there are any cases where PdfSharp can mess up an imported file without an error...
MergePassthroughPages(stream, path, passthroughPages, exportParams, placeholderPage);
}
finally
{
// TODO: Easier way to handle this?
foreach (var state in imagePages.Concat(pdfPages))
{
state.RenderedImage?.Dispose();
}
}
return true;
});
@ -230,11 +231,10 @@ public class PdfExporter : IPdfExporter
private PageExportState RenderStep(PageExportState state)
{
using var renderedImage = _scanningContext.ImageContext.Render(state.Image);
var renderedImage = _scanningContext.ImageContext.Render(state.Image);
var metadata = state.Image.Metadata;
state.RenderedStream = _scanningContext.ImageContext.SaveSmallestFormatToMemoryStream(
renderedImage, metadata.BitDepth, metadata.Lossless, -1, out var fileFormat, true);
state.FileFormat = fileFormat;
state.RenderedImage = renderedImage;
state.FileFormat = ImageFileFormat.Jpeg;
return state;
}
@ -243,10 +243,22 @@ public class PdfExporter : IPdfExporter
// TODO: Try and avoid locking somehow
lock (state.Document)
{
using var img = XImage.FromStream(state.RenderedStream);
// TODO: We need to serialize page adding somehow
PdfPage page = state.Document.AddPage();
DrawImageOnPage(page, img, state.Compat);
// TODO: Is there any way we can clean this up?
var exportFormat = _scanningContext.ImageContext.GetExportFormat(
state.RenderedImage!, state.Image.Metadata.BitDepth, state.Image.Metadata.Lossless);
if (exportFormat.FileFormat == ImageFileFormat.Unspecified)
{
exportFormat = exportFormat with { FileFormat = ImageFileFormat.Jpeg };
}
if (exportFormat.PixelFormat == ImagePixelFormat.BW1 &&
state.RenderedImage!.PixelFormat != ImagePixelFormat.BW1)
{
state.RenderedImage =
_scanningContext.ImageContext.PerformTransform(state.RenderedImage, new BlackWhiteTransform());
}
DrawImageOnPage(page, state.RenderedImage!, exportFormat, state.Compat);
// TODO: Maybe split this up to a different step
if (state.OcrTask?.Result != null)
{
@ -314,9 +326,7 @@ public class PdfExporter : IPdfExporter
// Save the image to a file for use in OCR.
// We don't need to delete this file as long as we pass it to OcrRequestQueue.Enqueue, which takes
// ownership and guarantees its eventual deletion.
using var fileStream = new FileStream(ocrTempFilePath, FileMode.CreateNew);
state.RenderedStream!.Seek(0, SeekOrigin.Begin);
state.RenderedStream.CopyTo(fileStream);
state.RenderedImage!.Save(ocrTempFilePath);
}
// Start OCR
@ -406,20 +416,21 @@ public class PdfExporter : IPdfExporter
return string.Concat(elements);
}
private static void DrawImageOnPage(PdfPage page, XImage img, PdfCompat compat)
private void DrawImageOnPage(PdfPage page, IMemoryImage image, ImageExportFormat exportFormat, PdfCompat compat)
{
using var xImage = XImage.FromImageSource(new ImageSource(image, exportFormat));
if (compat != PdfCompat.Default)
{
img.Interpolate = false;
xImage.Interpolate = false;
}
var (realWidth, realHeight) = GetRealSize(img);
var (realWidth, realHeight) = GetRealSize(image);
page.Width = realWidth;
page.Height = realHeight;
using XGraphics gfx = XGraphics.FromPdfPage(page);
gfx.DrawImage(img, 0, 0, realWidth, realHeight);
gfx.DrawImage(xImage, 0, 0, realWidth, realHeight);
}
private static (int width, int height) GetRealSize(XImage img)
private static (int width, int height) GetRealSize(IMemoryImage img)
{
double hAdjust = 72 / img.HorizontalResolution;
double vAdjust = 72 / img.VerticalResolution;
@ -427,8 +438,8 @@ public class PdfExporter : IPdfExporter
{
hAdjust = vAdjust = 0.75;
}
double realWidth = img.PixelWidth * hAdjust;
double realHeight = img.PixelHeight * vAdjust;
double realWidth = img.Width * hAdjust;
double realHeight = img.Height * vAdjust;
return ((int) realWidth, (int) realHeight);
}
@ -476,41 +487,104 @@ public class PdfExporter : IPdfExporter
public PdfCompat Compat { get; }
public bool NeedsOcr { get; set; }
public MemoryStream? RenderedStream { get; set; }
public IMemoryImage? RenderedImage { get; set; }
public ImageFileFormat FileFormat { get; set; }
public Task<OcrResult?>? OcrTask { get; set; }
public PdfDocument? PageDocument { get; set; }
}
private class UnixFontResolver : IFontResolver
private class ImageSource : IImageSource
{
private byte[]? _fontData;
private readonly IMemoryImage _image;
private readonly ImageExportFormat _exportFormat;
public FontResolverInfo ResolveTypeface(string familyName, bool isBold, bool isItalic)
public ImageSource(IMemoryImage image, ImageExportFormat exportFormat)
{
return new FontResolverInfo(familyName, isBold, isItalic);
_image = image;
_exportFormat = exportFormat;
}
public byte[] GetFont(string faceName)
public void SaveAsJpeg(MemoryStream ms)
{
if (_fontData == null)
_image.Save(ms, ImageFileFormat.Jpeg);
}
public unsafe void SaveAsPdfBitmap(MemoryStream ms)
{
var bytesPerPixel = _image.PixelFormat switch
{
var proc = Process.Start(new ProcessStartInfo
ImagePixelFormat.ARGB32 => 4,
ImagePixelFormat.RGB24 => 3,
_ => throw new InvalidOperationException("Expected 24 or 32 bit bitmap")
};
int height = _image.Height;
int width = _image.Width;
ms.SetLength(height * width * bytesPerPixel);
var buffer = ms.GetBuffer();
using var data = _image.Lock(LockMode.ReadOnly, out var scan0, out var stride);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
FileName = "fc-list",
RedirectStandardOutput = true,
UseShellExecute = false
});
if (proc == null)
{
throw new InvalidOperationException("Could not get font data from fc-list");
var pixelData = (byte*) (scan0 + y * stride + x * bytesPerPixel);
int bufferIndex = ((height - y - 1) * width + x) * bytesPerPixel;
buffer[bufferIndex] = *pixelData;
buffer[bufferIndex + 1] = *(pixelData + 1);
buffer[bufferIndex + 2] = *(pixelData + 2);
if (bytesPerPixel == 4)
{
buffer[bufferIndex + 3] = *(pixelData + 3);
}
}
var fonts = proc.StandardOutput.ReadToEnd().Split('\n').Select(x => x.Split(':')[0]);
// TODO: Maybe add more fonts here?
var freeserif = fonts.First(f => f.EndsWith("FreeSerif.ttf", StringComparison.OrdinalIgnoreCase));
_fontData = File.ReadAllBytes(freeserif);
}
return _fontData;
}
public unsafe void SaveAsPdfIndexedBitmap(MemoryStream ms)
{
if (_image.PixelFormat != ImagePixelFormat.BW1)
throw new InvalidOperationException("Expected 1 bit bitmap");
int height = _image.Height;
int width = _image.Width;
int bytesPerRow = (width - 1) / 8 + 1;
ms.SetLength(height * bytesPerRow);
var buffer = ms.GetBuffer();
using var data = _image.Lock(LockMode.ReadOnly, out var scan0, out var stride);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < bytesPerRow; x++)
{
var pixelData = (byte*) (scan0 + y * stride + x);
buffer[(height - y - 1) * bytesPerRow + x] = *pixelData;
}
}
}
public int Width => _image.Width;
public int Height => _image.Height;
public string? Name => null;
public XImageFormat ImageFormat
{
get
{
if (_exportFormat.FileFormat == ImageFileFormat.Jpeg)
{
return XImageFormat.Jpeg;
}
if (_exportFormat.PixelFormat == ImagePixelFormat.BW1)
{
return XImageFormat.Indexed;
}
if (_exportFormat.PixelFormat == ImagePixelFormat.ARGB32)
{
return XImageFormat.Argb32;
}
if (_exportFormat.PixelFormat == ImagePixelFormat.RGB24)
{
return XImageFormat.Rgb24;
}
throw new Exception("Unsupported pixel format");
}
}
}
}

View File

@ -17,6 +17,8 @@ public class PdfPage : NativePdfiumObject
public float Height => Native.FPDF_GetPageHeightF(Handle);
public bool HasTransparency => Native.FPDFPage_HasTransparency(Handle);
public PdfText GetText()
{
return new PdfText(Native.FPDFText_LoadPage(Handle));

View File

@ -5,17 +5,21 @@ namespace NAPS2.ImportExport.Pdf.Pdfium;
public class PdfPageObject : NativePdfiumObject
{
private readonly PdfDocument _document;
private readonly PdfPage? _page;
private readonly bool _owned;
internal PdfPageObject(IntPtr handle, PdfDocument document, PdfPage? page, bool owned) : base(handle)
{
_document = document;
_page = page;
Document = document;
Page = page;
_owned = owned;
}
public PdfDocument Document { get; }
public PdfPage? Page { get; }
public bool HasTransparency => Native.FPDFPageObj_HasTransparency(Handle);
public void SetBitmap(PdfBitmap bitmap)
{
if (!Native.FPDFImageObj_SetBitmap(IntPtr.Zero, 0, Handle, bitmap.Handle))
@ -104,7 +108,7 @@ public class PdfPageObject : NativePdfiumObject
public PdfBitmap GetRenderedBitmap()
{
return new PdfBitmap(
Native.FPDFImageObj_GetRenderedBitmap(_document.Handle, _page?.Handle ?? IntPtr.Zero, Handle));
Native.FPDFImageObj_GetRenderedBitmap(Document.Handle, Page?.Handle ?? IntPtr.Zero, Handle));
}
public byte[] GetImageDataRaw()
@ -128,7 +132,7 @@ public class PdfPageObject : NativePdfiumObject
get
{
var metadata = new PdfImageMetadata();
Native.FPDFImageObj_GetImageMetadata(Handle, _page?.Handle ?? IntPtr.Zero, ref metadata);
Native.FPDFImageObj_GetImageMetadata(Handle, Page?.Handle ?? IntPtr.Zero, ref metadata);
return metadata;
}
}
@ -151,7 +155,7 @@ public class PdfPageObject : NativePdfiumObject
}
}
public bool HasFilters(params string[] filters)
public bool HasImageFilters(params string[] filters)
{
if (filters.Length != ImageFilterCount)
{
@ -166,4 +170,15 @@ public class PdfPageObject : NativePdfiumObject
}
return true;
}
// TODO: Maybe clean up all these filter methods
public string[] GetImageFilters()
{
var filters = new string[ImageFilterCount];
for (int i = 0; i < filters.Length; i++)
{
filters[i] = GetImageFilter(i);
}
return filters;
}
}

View File

@ -133,6 +133,8 @@ public class PdfiumNativeLibrary : Unmanaged.NativeLibrary
public delegate bool FPDFPageObj_SetMatrix_delegate(IntPtr page_object, ref PdfMatrix matrix);
public delegate bool FPDFPageObj_HasTransparency_delegate(IntPtr page_object);
public delegate bool FPDFImageObj_LoadJpegFile_delegate(IntPtr pages, int count, IntPtr image_object,
ref FPDF_FileAccess file_access);
@ -145,6 +147,8 @@ public class PdfiumNativeLibrary : Unmanaged.NativeLibrary
public delegate bool FPDFPage_RemoveObject_delegate(IntPtr page, IntPtr page_obj);
public delegate bool FPDFPage_HasTransparency_delegate(IntPtr page);
public delegate IntPtr FPDFImageObj_GetBitmap_delegate(IntPtr image_object);
public delegate IntPtr FPDFImageObj_GetImageDataRaw_delegate(IntPtr image_object, byte[]? buffer, IntPtr buflen);
@ -214,6 +218,7 @@ public class PdfiumNativeLibrary : Unmanaged.NativeLibrary
public FPDFPage_New_delegate FPDFPage_New => Load<FPDFPage_New_delegate>();
public FPDFPage_GenerateContent_delegate FPDFPage_GenerateContent => Load<FPDFPage_GenerateContent_delegate>();
public FPDFPageObj_SetMatrix_delegate FPDFPageObj_SetMatrix => Load<FPDFPageObj_SetMatrix_delegate>();
public FPDFPageObj_HasTransparency_delegate FPDFPageObj_HasTransparency => Load<FPDFPageObj_HasTransparency_delegate>();
public FPDFImageObj_LoadJpegFile_delegate FPDFImageObj_LoadJpegFile => Load<FPDFImageObj_LoadJpegFile_delegate>();
public FPDFImageObj_LoadJpegFileInline_delegate FPDFImageObj_LoadJpegFileInline =>
@ -222,6 +227,7 @@ public class PdfiumNativeLibrary : Unmanaged.NativeLibrary
public FPDFPage_CountObjects_delegate FPDFPage_CountObjects => Load<FPDFPage_CountObjects_delegate>();
public FPDFPage_GetObject_delegate FPDFPage_GetObject => Load<FPDFPage_GetObject_delegate>();
public FPDFPage_RemoveObject_delegate FPDFPage_RemoveObject => Load<FPDFPage_RemoveObject_delegate>();
public FPDFPage_HasTransparency_delegate FPDFPage_HasTransparency => Load<FPDFPage_HasTransparency_delegate>();
public FPDFImageObj_GetBitmap_delegate FPDFImageObj_GetBitmap => Load<FPDFImageObj_GetBitmap_delegate>();
public FPDFImageObj_GetImageDataRaw_delegate FPDFImageObj_GetImageDataRaw => Load<FPDFImageObj_GetImageDataRaw_delegate>();
public FPDFImageObj_GetImageDataDecoded_delegate FPDFImageObj_GetImageDataDecoded => Load<FPDFImageObj_GetImageDataDecoded_delegate>();

View File

@ -11,6 +11,7 @@ public class PdfiumBitmapFactory
_imageContext = imageContext;
}
// TODO: Not sure what to do with these two methods, probably just move them back to the calling classes
public unsafe IMemoryImage CopyPdfBitmapToNewImage(PdfBitmap pdfBitmap, PdfImageMetadata imageMetadata)
{
var dstImage = _imageContext.Create(pdfBitmap.Width, pdfBitmap.Height, pdfBitmap.Format);
@ -42,7 +43,7 @@ public class PdfiumBitmapFactory
int heightInPx = (int) Math.Round(heightInInches * dpi);
var bitmap = _imageContext.Create(widthInPx, heightInPx, ImagePixelFormat.RGB24);
bitmap.SetResolution(dpi, dpi);
bitmap.SetResolution((int) Math.Round(dpi), (int) Math.Round(dpi));
using var bitmapData = bitmap.Lock(LockMode.ReadWrite, out var scan0, out var stride);
using var pdfiumBitmap = PdfBitmap.CreateFromPointerBgr(widthInPx, heightInPx, scan0, stride);
pdfiumBitmap.FillRect(0, 0, widthInPx, heightInPx, PdfBitmap.WHITE);
@ -50,6 +51,7 @@ public class PdfiumBitmapFactory
return bitmap;
}
// TODO: Move the below methods into a non-pdfium specific class and include the export counterparts.
public unsafe IMemoryImage LoadRawRgb(byte[] buffer, PdfImageMetadata metadata)
{
var image = _imageContext.Create(metadata.Width, metadata.Height, ImagePixelFormat.RGB24);
@ -74,7 +76,7 @@ public class PdfiumBitmapFactory
public unsafe IMemoryImage LoadRawBlackAndWhite(byte[] buffer, PdfImageMetadata metadata)
{
var image = _imageContext.Create(metadata.Width, metadata.Height, ImagePixelFormat.RGB24);
var image = _imageContext.Create(metadata.Width, metadata.Height, ImagePixelFormat.BW1);
image.OriginalFileFormat = ImageFileFormat.Png;
using var data = image.Lock(LockMode.WriteOnly, out var scan0, out var stride);
@ -123,14 +125,14 @@ public class PdfiumBitmapFactory
{
Write(stream, buffer.Length + 0x10);
}
Write(stream, TiffBeforeData);
Write(stream, buffer);
if (buffer.Length % 2 == 1)
{
Write(stream, new byte[] { 0x00 });
}
Write(stream, TiffBeforeWidth);
Write(stream, metadata.Width);
Write(stream, TiffBeforeHeight);
@ -141,7 +143,7 @@ public class PdfiumBitmapFactory
Write(stream, buffer.Length);
Write(stream, TiffTrailer);
stream.Seek(0, SeekOrigin.Begin);
// TODO: If we need a TIFF hint for loading, it should go here.
return _imageContext.Load(stream);
}

View File

@ -13,7 +13,7 @@ public class PdfiumImageExtractor
var image = GetImageFromObject(imageContext, imageObj, metadata);
if (image != null)
{
image.SetResolution(metadata.HorizontalDpi, metadata.VerticalDpi);
image.SetResolution((int) Math.Round(metadata.HorizontalDpi), (int) Math.Round(metadata.VerticalDpi));
return image;
}
}
@ -26,12 +26,25 @@ public class PdfiumImageExtractor
PdfImageMetadata metadata)
{
var bitmapFactory = new PdfiumBitmapFactory(imageContext);
// TODO: This condition is never actually true for some reason, we need to use this code path if there is either a monochrome mask or softmask
// TODO: Might need a pdfium fix.
if (imageObj.HasTransparency)
{
// If the image has transparency, that implies the bitmap has a mask, so we need to use GetRenderedBitmap
// to apply the mask and get the correct image.
using var pdfBitmap = imageObj.GetRenderedBitmap();
if (pdfBitmap.Format is ImagePixelFormat.RGB24 or ImagePixelFormat.ARGB32)
{
return bitmapFactory.CopyPdfBitmapToNewImage(pdfBitmap, metadata);
}
return null;
}
// First try and read the raw image data, this is most efficient if we can handle it
if (imageObj.HasFilters("DCTDecode"))
if (imageObj.HasImageFilters("DCTDecode"))
{
return imageContext.Load(new MemoryStream(imageObj.GetImageDataRaw()));
}
if (imageObj.HasFilters("FlateDecode"))
if (imageObj.HasImageFilters("FlateDecode"))
{
if (metadata.BitsPerPixel == 24 && metadata.Colorspace == Colorspace.DeviceRgb)
{
@ -43,7 +56,7 @@ public class PdfiumImageExtractor
return bitmapFactory.LoadRawBlackAndWhite(imageObj.GetImageDataDecoded(), metadata);
}
}
if (imageObj.HasFilters("CCITTFaxDecode"))
if (imageObj.HasImageFilters("CCITTFaxDecode"))
{
return bitmapFactory.LoadRawCcitt(imageObj.GetImageDataDecoded(), metadata);
}
@ -52,6 +65,7 @@ public class PdfiumImageExtractor
// TODO: Maybe add support for black & white here too, with tests
// TODO: Also this won't have test coverage if everything is covered by the "raw" tests, maybe either find a
// test case or just have a switch to test this specifically
// TODO: Is 32 bit even possible here? As alpha is implemented with masks
if (metadata.BitsPerPixel == 24 || metadata.BitsPerPixel == 32)
{
using var pdfBitmap = imageObj.GetBitmap();

View File

@ -31,9 +31,11 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SharpZipLib" Version="1.3.3" />
<PackageReference Include="SixLabors.Fonts" Version="1.0.0-beta18" />
<PackageReference Include="ZXing.Net.Bindings.Windows.Compatibility" Version="0.16.7" />
<Reference Include="PdfSharp">
<HintPath>..\NAPS2.Setup\lib\PdfSharp.dll</HintPath>
<Reference Include="PdfSharpCore">
<HintPath>..\NAPS2.Setup\lib\PdfSharpCore.dll</HintPath>
</Reference>
<PackageReference Include="Grpc.Tools" Version="2.26.0" PrivateAssets="all" />

View File

@ -10,8 +10,6 @@ public interface ISystemCompat
bool CanUseWin32 { get; }
bool UseUnixFontResolver { get; }
bool IsWia20Supported { get; }
bool UseSystemTesseract { get; }

View File

@ -17,8 +17,6 @@ public class LinuxSystemCompat : ISystemCompat
public bool CanUseWin32 => false;
public bool UseUnixFontResolver => true;
public bool UseSystemTesseract => true;
public string? TesseractExecutablePath => null;

View File

@ -17,8 +17,6 @@ public class MacSystemCompat : ISystemCompat
public bool CanUseWin32 => false;
public bool UseUnixFontResolver => true;
public bool UseSystemTesseract => true;
public string? TesseractExecutablePath => null;

View File

@ -15,8 +15,6 @@ public abstract class WindowsSystemCompat : ISystemCompat
public bool CanUseWin32 => true;
public bool UseUnixFontResolver => false;
public bool UseSystemTesseract => false;
public abstract string? TesseractExecutablePath { get; }

Binary file not shown.

Binary file not shown.