Implement in-memory pdf storage

This commit is contained in:
Ben Olden-Cooligan 2022-07-21 21:44:57 -07:00
parent 3f816c1a13
commit 71f8d9956f
13 changed files with 178 additions and 57 deletions

View File

@ -77,7 +77,6 @@ public class GdiImageContext : ImageContext
{
switch (storage)
{
// TODO: PDF memory storage?
case ImageFileStorage fileStorage:
if (MaybeRenderPdf(fileStorage, out var renderedPdf))
{
@ -90,8 +89,12 @@ public class GdiImageContext : ImageContext
// that seems like a reasonable tradeoff to avoid a whole class of hard-to-diagnose errors.
var stream = new MemoryStream(File.ReadAllBytes(fileStorage.FullPath));
return new GdiImage(new Bitmap(stream));
case MemoryStreamImageStorage memoryStreamStorage:
return new GdiImage(new Bitmap(memoryStreamStorage.Stream));
case ImageMemoryStorage memoryStorage:
if (MaybeRenderPdf(memoryStorage, out var renderedMemoryPdf))
{
return renderedMemoryPdf!;
}
return new GdiImage(new Bitmap(memoryStorage.Stream));
case GdiImage image:
return image.Clone();
}

View File

@ -3,4 +3,6 @@ namespace NAPS2.Images;
public interface IPdfRenderer
{
IEnumerable<IMemoryImage> Render(ImageContext imageContext, string path, float defaultDpi);
IEnumerable<IMemoryImage> Render(ImageContext imageContext, byte[] buffer, int length, float defaultDpi);
}

View File

@ -9,4 +9,15 @@ public static class ImageExtensions
stream.Seek(0, SeekOrigin.Begin);
return stream;
}
public static string AsTypeHint(this ImageFileFormat imageFormat)
{
return imageFormat switch
{
ImageFileFormat.Bmp => ".bmp",
ImageFileFormat.Jpeg => ".jpg",
ImageFileFormat.Png => ".png",
_ => ""
};
}
}

View File

@ -29,6 +29,23 @@ public abstract class ImageContext
return false;
}
protected bool MaybeRenderPdf(ImageMemoryStorage memoryStorage, out IMemoryImage? renderedPdf)
{
if (memoryStorage.TypeHint == ".pdf")
{
if (_pdfRenderer == null)
{
throw new InvalidOperationException(
"Unable to render pdf page as the ImageContext wasn't created with an IPdfRenderer.");
}
var stream = memoryStorage.Stream;
renderedPdf = _pdfRenderer.Render(this, stream.GetBuffer(), (int) stream.Length, 300).Single();
return true;
}
renderedPdf = null;
return false;
}
// TODO: Describe ownership transfer
/// <summary>
/// Performs the specified transformation on the specified image using a compatible transformer.

View File

@ -5,17 +5,19 @@
/// image storage should use IMemoryImage, but storing a raw stream is a useful intermediate representation for some
/// serialization use cases where we don't know yet if the image will be stored in-memory or on disk.
/// </summary>
public class MemoryStreamImageStorage : IImageStorage
public class ImageMemoryStorage : IImageStorage
{
public MemoryStreamImageStorage(MemoryStream stream)
public ImageMemoryStorage(MemoryStream stream, string typeHint)
{
Stream = stream;
TypeHint = typeHint;
}
public MemoryStream Stream { get; }
public string TypeHint { get; }
public void Dispose()
{
Stream.Dispose();
}
}

View File

@ -6,8 +6,7 @@ public class StorageAwareTestData : IEnumerable<object[]>
{
public IEnumerator<object[]> GetEnumerator()
{
// TODO: Uncomment once working
// yield return new object[] { new StorageConfig.Memory() };
yield return new object[] { new StorageConfig.Memory() };
yield return new object[] { new StorageConfig.File() };
}

View File

@ -187,15 +187,24 @@ public class PdfSharpImporter : IPdfImporter
private async Task<ProcessedImage> ExportRawPdfPage(PdfPage page, ImportParams importParams)
{
// TODO: Handle no file storage (i.e. in-memory pdf storage)
string pdfPath = _scanningContext.FileStorageManager.NextFilePath() + ".pdf";
IImageStorage storage;
var document = new PdfDocument();
document.Pages.Add(page);
document.Save(pdfPath);
if (_scanningContext.FileStorageManager != null)
{
string pdfPath = _scanningContext.FileStorageManager.NextFilePath() + ".pdf";
document.Save(pdfPath);
storage = new ImageFileStorage(pdfPath);
}
else
{
var stream = new MemoryStream();
document.Save(stream);
storage = new ImageMemoryStorage(stream, ".pdf");
}
// TODO: Are we 100% sure we want ProcessedImage to support PDFs? Need to implement that.
var image = new ProcessedImage(
new ImageFileStorage(pdfPath),
storage,
new ImageMetadata(BitDepth.Color, false),
new PostProcessingData(),
TransformState.Empty);

View File

@ -9,11 +9,16 @@ public class PdfDocument : NativePdfiumObject
return new PdfDocument(Native.FPDF_LoadDocument(path, password));
}
public static PdfDocument Load(IntPtr buffer, int length, string? password = null)
{
return new PdfDocument(Native.FPDF_LoadMemDocument(buffer, length, password));
}
public static PdfDocument CreateNew()
{
return new PdfDocument(Native.FPDF_CreateNewDocument());
}
private PdfDocument(IntPtr handle) : base(handle)
{
}
@ -43,6 +48,7 @@ public class PdfDocument : NativePdfiumObject
public void Save(string path)
{
using var stream = new FileStream(path, FileMode.Create);
int WriteBlock(IntPtr self, IntPtr data, ulong size)
{
var buffer = new byte[size];
@ -50,7 +56,7 @@ public class PdfDocument : NativePdfiumObject
stream.Write(buffer, 0, (int) size);
return 1;
}
PdfiumNativeLibrary.FPDF_FileWrite fileWrite = new()
{
WriteBlock = WriteBlock

View File

@ -1,4 +1,5 @@
using NAPS2.ImportExport.Pdf.Pdfium;
using System.Runtime.InteropServices;
using NAPS2.ImportExport.Pdf.Pdfium;
namespace NAPS2.ImportExport.Pdf;
@ -6,36 +7,63 @@ public class PdfiumPdfRenderer : IPdfRenderer
{
public IEnumerable<IMemoryImage> Render(ImageContext imageContext, string path, float defaultDpi)
{
var nativeLib = PdfiumNativeLibrary.LazyInstance.Value;
// Pdfium is not thread-safe
lock (nativeLib)
lock (PdfiumNativeLibrary.LazyInstance.Value)
{
using var doc = PdfDocument.Load(path);
var pageCount = doc.PageCount;
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
foreach (var memoryImage in RenderDocument(imageContext, defaultDpi, doc))
{
using var page = doc.GetPage(pageIndex);
using var imageObj = GetSingleImageObject(page);
if (imageObj != null)
{
// TODO: This could be wrong if the image object has a mask, but GetRenderedBitmap does a re-encode which we don't really want
// Ideally we would be do this conditionally based on the presence of a mask, if pdfium could provide us that info
using var pdfBitmap = imageObj.GetBitmap();
if (pdfBitmap.Format is ImagePixelFormat.RGB24 or ImagePixelFormat.ARGB32)
{
yield return CopyPdfBitmapToNewImage(imageContext, pdfBitmap, page);
continue;
}
}
yield return RenderPageToNewImage(imageContext, page, defaultDpi);
yield return memoryImage;
}
}
}
public IEnumerable<IMemoryImage> Render(ImageContext imageContext, byte[] buffer, int length, float defaultDpi)
{
// Pdfium is not thread-safe
lock (PdfiumNativeLibrary.LazyInstance.Value)
{
var handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
try
{
using var doc = PdfDocument.Load(handle.AddrOfPinnedObject(), length);
foreach (var memoryImage in RenderDocument(imageContext, defaultDpi, doc))
{
yield return memoryImage;
}
}
finally
{
handle.Free();
}
}
}
private IEnumerable<IMemoryImage> RenderDocument(ImageContext imageContext, float defaultDpi, PdfDocument doc)
{
var pageCount = doc.PageCount;
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
{
using var page = doc.GetPage(pageIndex);
using var imageObj = GetSingleImageObject(page);
if (imageObj != null)
{
// TODO: This could be wrong if the image object has a mask, but GetRenderedBitmap does a re-encode which we don't really want
// Ideally we would be do this conditionally based on the presence of a mask, if pdfium could provide us that info
using var pdfBitmap = imageObj.GetBitmap();
if (pdfBitmap.Format is ImagePixelFormat.RGB24 or ImagePixelFormat.ARGB32)
{
yield return CopyPdfBitmapToNewImage(imageContext, pdfBitmap, page);
continue;
}
}
yield return RenderPageToNewImage(imageContext, page, defaultDpi);
}
}
private PdfPageObject? GetSingleImageObject(PdfPage page)
{
using var pageText = page.GetText();

View File

@ -21,4 +21,9 @@ public class PdfiumWorkerCoordinator : IPdfRenderer
});
return new[] { image };
}
public IEnumerable<IMemoryImage> Render(ImageContext imageContext, byte[] buffer, int length, float defaultDpi)
{
throw new NotImplementedException();
}
}

View File

@ -69,35 +69,68 @@ public class ScanningContext : IDisposable
}
private IImageStorage ConvertStorageIfNeeded(IImageStorage storage, BitDepth bitDepth, bool lossless, int quality)
{
// TODO: We should revisit existing tests and make sure we have coverage for both filestorage and non
if (FileStorageManager != null)
{
return ConvertToFileStorage(storage, bitDepth, lossless, quality);
}
return ConvertToMemoryStorage(storage);
}
private IImageStorage ConvertToMemoryStorage(IImageStorage storage)
{
switch (storage)
{
case IMemoryImage image:
if (FileStorageManager == null)
// TODO: Clone may not be enough, as the original bitmap could have a lock on the filesystem that should be released.
return image.Clone();
case ImageFileStorage fileStorage:
return ImageContext.Load(fileStorage.FullPath);
case ImageMemoryStorage memoryStorage:
if (memoryStorage.TypeHint == ".pdf")
{
// TODO: Clone may not be enough, as the original bitmap could have a lock on the filesystem that should be released.
return image.Clone();
return memoryStorage;
}
return ImageContext.Load(memoryStorage.Stream);
default:
// The only case that should hit this is a test with a mock
return storage;
}
}
private IImageStorage ConvertToFileStorage(IImageStorage storage, BitDepth bitDepth, bool lossless, int quality)
{
switch (storage)
{
case IMemoryImage image:
return WriteImageToBackingFile(image, bitDepth, lossless, quality);
case ImageFileStorage fileStorage:
if (FileStorageManager != null)
return fileStorage;
case ImageMemoryStorage memoryStorage:
if (memoryStorage.TypeHint == ".pdf")
{
return fileStorage;
}
// TODO: We should revisit existing tests and make sure we have coverage for both filestorage and non
return ImageContext.Load(fileStorage.FullPath);
case MemoryStreamImageStorage memoryStreamStorage:
var loadedImage = ImageContext.Load(memoryStreamStorage.Stream);
if (FileStorageManager == null)
{
return loadedImage;
return WriteDataToBackingFile(memoryStorage.Stream, ".pdf");
}
// TODO: Can we just write this to a file directly? Is there any case where SaveSmallestFormat is really needed?
var loadedImage = ImageContext.Load(memoryStorage.Stream);
return WriteImageToBackingFile(loadedImage, bitDepth, lossless, quality);
default:
// The only case that should hit this is a test with a mock
return storage;
}
// TODO: It probably makes sense to abstract this based on the type of backend (filestorage/not)
}
private ImageFileStorage WriteDataToBackingFile(MemoryStream stream, string ext)
{
if (FileStorageManager == null)
{
throw new InvalidOperationException();
}
var path = FileStorageManager.NextFilePath() + ext;
using var fileStream = new FileStream(path, FileMode.CreateNew, FileAccess.Write);
stream.WriteTo(fileStream);
return new ImageFileStorage(path, false);
}
private IImageStorage WriteImageToBackingFile(IMemoryImage image, BitDepth bitDepth, bool lossless, int quality)

View File

@ -11,6 +11,7 @@ message SerializedImage {
SerializedImageMetadata metadata = 4;
bytes thumbnail = 5;
string renderedFilePath = 6;
string typeHint = 7;
}
message SerializedImageMetadata {

View File

@ -3,7 +3,7 @@ using NAPS2.Scan;
namespace NAPS2.Serialization;
// TODO: Add tests for this class
// TODO: Add tests for this class. Focus on use case tests (i.e. serialize + deserialize) rather than a bunch of tests to verify the generated proto.
public static class SerializedImageHelper
{
public static SerializedImage Serialize(ProcessedImage image, SerializeOptions options)
@ -48,20 +48,23 @@ public static class SerializedImageHelper
{
using var stream = File.OpenRead(fileStorage.FullPath);
result.FileContent = ByteString.FromStream(stream);
result.TypeHint = Path.GetExtension(fileStorage.FullPath).ToLowerInvariant();
}
else
{
result.FilePath = fileStorage.FullPath;
}
break;
case MemoryStreamImageStorage memoryStreamStorage:
result.FileContent = ByteString.FromStream(memoryStreamStorage.Stream);
case ImageMemoryStorage memoryStorage:
result.FileContent = ByteString.FromStream(memoryStorage.Stream);
result.TypeHint = memoryStorage.TypeHint;
break;
case IMemoryImage imageStorage:
var fileFormat = imageStorage.OriginalFileFormat == ImageFileFormat.Unspecified
? ImageFileFormat.Jpeg
: imageStorage.OriginalFileFormat;
result.FileContent = ByteString.FromStream(imageStorage.SaveToMemoryStream(fileFormat));
result.TypeHint = fileFormat.AsTypeHint();
break;
}
return result;
@ -79,6 +82,8 @@ public static class SerializedImageHelper
}
else if (options.ShareFileStorage)
{
// TODO: Think about what exactly the contract is for the serializer and image lifetime.
// For example, what happens when we copy an image, delete it, then try to paste?
storage = new ImageFileStorage(serializedImage.FilePath, true);
}
else
@ -92,8 +97,8 @@ public static class SerializedImageHelper
}
else
{
var memoryStream = new MemoryStream(serializedImage.FileContent.ToByteArray());
storage = new MemoryStreamImageStorage(memoryStream);
var stream = new MemoryStream(serializedImage.FileContent.ToByteArray());
storage = new ImageMemoryStorage(stream, serializedImage.TypeHint);
}
var processedImage = scanningContext.CreateProcessedImage(