diff --git a/NAPS2.Sdk.Tests/Asserts/PdfAsserts.cs b/NAPS2.Sdk.Tests/Asserts/PdfAsserts.cs index 2053982b0..17c5d67ce 100644 --- a/NAPS2.Sdk.Tests/Asserts/PdfAsserts.cs +++ b/NAPS2.Sdk.Tests/Asserts/PdfAsserts.cs @@ -30,6 +30,10 @@ public static class PdfAsserts public static async Task AssertCompliant(string profile, string filePath) { + if (string.IsNullOrEmpty(profile)) + { + return; + } Assert.True(File.Exists(filePath)); var report = await LazyPdfAValidator.Value.ValidateWithDetailedReportAsync(filePath); Assert.True(report.Jobs.Job.ValidationReport.IsCompliant); @@ -128,4 +132,13 @@ public static class PdfAsserts $"Expected filters: {string.Join(",", filters)}, actual: {string.Join(",", obj.GetImageFilters())}"); } } + + public static void AssertVersion(int version, string filePath) + { + lock (PdfiumNativeLibrary.Instance) + { + using var doc = PdfDocument.Load(filePath); + Assert.Equal(version, doc.Version); + } + } } \ No newline at end of file diff --git a/NAPS2.Sdk.Tests/Pdf/PdfATests.cs b/NAPS2.Sdk.Tests/Pdf/PdfATests.cs index 356094cc9..2accbae13 100644 --- a/NAPS2.Sdk.Tests/Pdf/PdfATests.cs +++ b/NAPS2.Sdk.Tests/Pdf/PdfATests.cs @@ -1,35 +1,76 @@ +using NAPS2.Ocr; using NAPS2.Pdf; using NAPS2.Sdk.Tests.Asserts; +using Xunit; namespace NAPS2.Sdk.Tests.Pdf; -// TODO: Validate with OCR output -// TODO: Maaaybe validate with external import? We certainly can't guarantee it, but maybe some cases can be verified for best effort public class PdfATests : ContextualTests { - // Sadly the pdfa verifier library only supports windows/mac - [PlatformFact(exclude: PlatformFlags.Mac)] - public async Task Validate() - { - var pdfExporter = new PdfExporter(ScanningContext); - var testCases = new (PdfCompat pdfCompat, string profile, string fileName)[] - { - (PdfCompat.PdfA1B, "PDF/A-1B", "pdfa1b_test.pdf"), - (PdfCompat.PdfA2B, "PDF/A-2B", "pdfa2b_test.pdf"), - (PdfCompat.PdfA3B, "PDF/A-3B", "pdfa3b_test.pdf"), - (PdfCompat.PdfA3U, "PDF/A-3U", "pdfa3u_test.pdf") - }; + private readonly PdfExporter _pdfExporter; + private readonly string _path; + private readonly string _importPath; - var tasks = testCases.Select(testCase => - { - using var image = CreateScannedImage(); - var path = Path.Combine(FolderPath, testCase.fileName); - pdfExporter.Export(path, new[] { image }, new PdfExportParams - { - Compat = testCase.pdfCompat - }).Wait(); - return PdfAsserts.AssertCompliant(testCase.profile, path); - }).ToArray(); - await Task.WhenAll(tasks); + public PdfATests() + { + _pdfExporter = new PdfExporter(ScanningContext); + _path = Path.Combine(FolderPath, "test.pdf"); + _importPath = CopyResourceToFile(PdfResources.word_patcht_pdf, "word.pdf"); } + + // Sadly the pdfa verifier library only supports windows/linux + [PlatformTheory(exclude: PlatformFlags.Mac)] + [MemberData(nameof(TestCases))] + public async Task Validate(PdfCompat pdfCompat, string profile, int version) + { + await _pdfExporter.Export(_path, new[] { CreateScannedImage() }, new PdfExportParams + { + Compat = pdfCompat + }); + + PdfAsserts.AssertVersion(version, _path); + await PdfAsserts.AssertCompliant(profile, _path); + } + + [PlatformTheory(exclude: PlatformFlags.Mac)] + [MemberData(nameof(TestCases))] + public async Task ValidateWithOcr(PdfCompat pdfCompat, string profile, int version) + { + SetUpFakeOcr(ifNoMatch: "hello world"); + + await _pdfExporter.Export(_path, new[] { CreateScannedImage() }, new PdfExportParams + { + Compat = pdfCompat + }, new OcrParams("eng")); + + PdfAsserts.AssertVersion(version, _path); + await PdfAsserts.AssertCompliant(profile, _path); + } + + [PlatformTheory(exclude: PlatformFlags.Mac)] + [MemberData(nameof(TestCases))] + public async Task ValidateWithPdfium(PdfCompat pdfCompat, string profile, int version) + { + var images = await new PdfImporter(ScanningContext).Import(_importPath).ToListAsync(); + + await _pdfExporter.Export(_path, images, new PdfExportParams + { + Compat = pdfCompat + }); + + PdfAsserts.AssertVersion(version, _path); + await PdfAsserts.AssertCompliant(profile, _path); + } + + // Note that we don't have a Pdfium OCR test as we fail compliance due to the way Pdfium embeds fonts, which isn't + // practical to fix. + + public static IEnumerable TestCases = + [ + [PdfCompat.Default, "", 14], + [PdfCompat.PdfA1B, "PDF/A-1B", 14], + [PdfCompat.PdfA2B, "PDF/A-2B", 17], + [PdfCompat.PdfA3B, "PDF/A-3B", 17], + [PdfCompat.PdfA3U, "PDF/A-3U", 17] + ]; } \ No newline at end of file diff --git a/NAPS2.Sdk.Tests/PlatformTheoryAttribute.cs b/NAPS2.Sdk.Tests/PlatformTheoryAttribute.cs new file mode 100644 index 000000000..14269af06 --- /dev/null +++ b/NAPS2.Sdk.Tests/PlatformTheoryAttribute.cs @@ -0,0 +1,19 @@ +using System.Runtime.InteropServices; +using Xunit; + +namespace NAPS2.Sdk.Tests; + +public sealed class PlatformTheoryAttribute : TheoryAttribute +{ + public PlatformTheoryAttribute(PlatformFlags include = PlatformFlags.None, PlatformFlags exclude = PlatformFlags.None) + { + if (include != PlatformFlags.None && (CurrentPlatformFlags.Get() & include) != include) + { + Skip = $"Only runs on platform(s): {include}"; + } + if (exclude != PlatformFlags.None && (CurrentPlatformFlags.Get() & exclude) != PlatformFlags.None) + { + Skip = $"Doesn't run on platform(s): {exclude}"; + } + } +} \ No newline at end of file diff --git a/NAPS2.Sdk/Pdf/PdfAHelper.cs b/NAPS2.Sdk/Pdf/PdfAHelper.cs index d0dda5445..520fde64a 100644 --- a/NAPS2.Sdk/Pdf/PdfAHelper.cs +++ b/NAPS2.Sdk/Pdf/PdfAHelper.cs @@ -6,12 +6,12 @@ namespace NAPS2.Pdf; internal static class PdfAHelper { - public static void CreateXmpMetadata(PdfDocument document, PdfCompat compat) + public static void CreateXmpMetadata(PdfDocument document, PdfCompat compat, string producer) { var metadataDict = new PdfDictionary(document); metadataDict.Elements["/Type"] = new PdfName("/Metadata"); metadataDict.Elements["/Subtype"] = new PdfName("/XML"); - metadataDict.CreateStream(CreateRawXmpMetadata(document.Info, GetConformance(compat))); + metadataDict.CreateStream(CreateRawXmpMetadata(document.Info, GetConformance(compat), producer)); document.Internals.AddObject(metadataDict); document.Internals.Catalog.Elements["/Metadata"] = metadataDict.Reference; } @@ -33,7 +33,8 @@ internal static class PdfAHelper } } - private static byte[] CreateRawXmpMetadata(PdfDocumentInformation info, (string, string) conformance) + private static byte[] CreateRawXmpMetadata(PdfDocumentInformation info, (string, string) conformance, + string producer) { string xml = $@" @@ -45,7 +46,7 @@ internal static class PdfAHelper xmlns:pdfaid=""http://www.aiim.org/pdfa/ns/id/"" dc:format=""application/pdf"" pdf:Keywords=""{info.Keywords}"" - pdf:Producer=""{PdfSharpCore.ProductVersionInfo.Producer}"" + pdf:Producer=""{producer}"" xmp:CreateDate=""{info.CreationDate:yyyy'-'MM'-'dd'T'HH':'mm':'ssK}"" xmp:ModifyDate=""{info.ModificationDate:yyyy'-'MM'-'dd'T'HH':'mm':'ssK}"" xmp:CreatorTool=""{info.Creator}"" diff --git a/NAPS2.Sdk/Pdf/PdfExporter.cs b/NAPS2.Sdk/Pdf/PdfExporter.cs index cebb1fe1b..65ae2b992 100644 --- a/NAPS2.Sdk/Pdf/PdfExporter.cs +++ b/NAPS2.Sdk/Pdf/PdfExporter.cs @@ -23,6 +23,7 @@ public class PdfExporter { private const int PDF_VERSION_14 = 14; private const int PDF_VERSION_17 = 17; + private const string PDFIUM_PRODUCER = "PDFium"; private readonly ScanningContext _scanningContext; private readonly ILogger _logger; @@ -133,8 +134,9 @@ public class PdfExporter await pdfPagesOcrPipeline; if (progress.IsCancellationRequested) return false; + var producer = pdfPages.Any() ? PDFIUM_PRODUCER : PdfSharpCore.ProductVersionInfo.Producer; // TODO: Doing in memory as that's presumably faster than IO, but of course that's quite a bit of memory use potentially... - var stream = FinalizeAndSaveDocument(document, exportParams); + var stream = FinalizeAndSaveDocument(document, exportParams, producer); if (progress.IsCancellationRequested) return false; return MergePassthroughPages(stream, output, pdfPages, exportParams, progress); @@ -296,7 +298,8 @@ public class PdfExporter return state; } - private static MemoryStream FinalizeAndSaveDocument(PdfDocument document, PdfExportParams exportParams) + private static MemoryStream FinalizeAndSaveDocument(PdfDocument document, PdfExportParams exportParams, + string producer) { var compat = exportParams.Compat; var now = DateTime.Now; @@ -312,7 +315,7 @@ public class PdfExporter { PdfAHelper.SetColorProfile(document); PdfAHelper.SetCidMap(document); - PdfAHelper.CreateXmpMetadata(document, compat); + PdfAHelper.CreateXmpMetadata(document, compat, producer); } document.Version = compat switch @@ -465,7 +468,8 @@ public class PdfExporter while (true) { var font = new XFont(lineFontFamily, lineFontSize + 1, XFontStyle.Regular); - if (eligibleWords.All(word => gfx.MeasureString(word.Text, font).Width < word.Bounds.w * hAdjust)) + if (eligibleWords.All( + word => gfx.MeasureString(word.Text, font).Width < word.Bounds.w * hAdjust)) { lineFontSize++; } diff --git a/NAPS2.Sdk/Pdf/Pdfium/PdfDocument.cs b/NAPS2.Sdk/Pdf/Pdfium/PdfDocument.cs index 30d01dbce..3ffbf7366 100644 --- a/NAPS2.Sdk/Pdf/Pdfium/PdfDocument.cs +++ b/NAPS2.Sdk/Pdf/Pdfium/PdfDocument.cs @@ -43,6 +43,8 @@ internal class PdfDocument : NativePdfiumObject public int PageCount => Native.FPDF_GetPageCount(Handle); + public int? Version => Native.FPDF_GetFileVersion(Handle, out int version) ? version : null; + public PdfPage GetPage(int pageIndex) { return new PdfPage(Native.FPDF_LoadPage(Handle, pageIndex), this, pageIndex); diff --git a/NAPS2.Sdk/Pdf/Pdfium/PdfiumNativeLibrary.cs b/NAPS2.Sdk/Pdf/Pdfium/PdfiumNativeLibrary.cs index 94dcbc336..b89a7ce7f 100644 --- a/NAPS2.Sdk/Pdf/Pdfium/PdfiumNativeLibrary.cs +++ b/NAPS2.Sdk/Pdf/Pdfium/PdfiumNativeLibrary.cs @@ -73,6 +73,8 @@ internal class PdfiumNativeLibrary : Unmanaged.NativeLibrary public delegate bool FPDF_SaveAsCopy_delegate(IntPtr document, ref FPDF_FileWrite fileWrite, int flags); + public delegate bool FPDF_GetFileVersion_delegate(IntPtr document, out int fileVersion); + public delegate IntPtr FPDF_GetMetaText_delegate(IntPtr document, [MarshalAs(UnmanagedType.LPStr)] string tag, byte[]? buffer, IntPtr buflen); @@ -214,6 +216,7 @@ internal class PdfiumNativeLibrary : Unmanaged.NativeLibrary public FPDF_LoadMemDocument_delegate FPDF_LoadMemDocument => Load(); public FPDF_CloseDocument_delegate FPDF_CloseDocument => Load(); public FPDF_SaveAsCopy_delegate FPDF_SaveAsCopy => Load(); + public FPDF_GetFileVersion_delegate FPDF_GetFileVersion => Load(); public FPDF_GetMetaText_delegate FPDF_GetMetaText => Load(); public FPDF_GetPageCount_delegate FPDF_GetPageCount => Load(); public FPDF_LoadPage_delegate FPDF_LoadPage => Load();