Add progress to parallel OCR

This commit is contained in:
Ben Olden-Cooligan 2016-06-13 19:39:57 -04:00
parent 4479ff8785
commit 79915adf9a
2 changed files with 77 additions and 28 deletions

View File

@ -23,6 +23,7 @@ using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Threading;
using NAPS2.Ocr;
using NAPS2.Scan.Images;
using NAPS2.Util;
@ -87,13 +88,12 @@ namespace NAPS2.ImportExport.Pdf
}
}
if (useOcr)
bool result = useOcr
? BuildDocumentWithOcr(progressCallback, document, images, ocrLanguageCode)
: BuildDocumentWithoutOcr(progressCallback, document, images);
if (!result)
{
BuildDocumentWithOcr(document, images, ocrLanguageCode);
}
else
{
BuildDocumentWithoutOcr(document, images);
return false;
}
PathHelper.EnsureParentDirExists(path);
@ -101,51 +101,69 @@ namespace NAPS2.ImportExport.Pdf
return true;
}
private void BuildDocumentWithoutOcr(PdfDocument document, IEnumerable<ScannedImage> images)
private bool BuildDocumentWithoutOcr(Func<int, bool> progressCallback, PdfDocument document, IEnumerable<ScannedImage> images)
{
int progress = 0;
foreach (var image in images)
{
using (Stream stream = image.GetImageStream())
using (var img = new Bitmap(stream))
{
float hAdjust = 72 / img.HorizontalResolution;
float vAdjust = 72 / img.VerticalResolution;
double realWidth = img.Width * hAdjust;
double realHeight = img.Height * vAdjust;
if (!progressCallback(progress))
{
return false;
}
Size realSize = GetRealSize(img);
PdfPage page = document.AddPage();
page.Width = (int)realWidth;
page.Height = (int)realHeight;
page.Width = realSize.Width;
page.Height = realSize.Height;
using (XGraphics gfx = XGraphics.FromPdfPage(page))
{
gfx.DrawImage(img, 0, 0, (int)realWidth, (int)realHeight);
gfx.DrawImage(img, 0, 0, realSize.Width, realSize.Height);
}
}
progress++;
}
return true;
}
private void BuildDocumentWithOcr(PdfDocument document, IEnumerable<ScannedImage> images, string ocrLanguageCode)
private bool BuildDocumentWithOcr(Func<int, bool> progressCallback, PdfDocument document, IEnumerable<ScannedImage> images, string ocrLanguageCode)
{
int progress = 0;
Pipeline.For(images).Step(image =>
{
if (!progressCallback(progress))
{
return null;
}
using (Stream stream = image.GetImageStream())
using (var img = new Bitmap(stream))
{
float hAdjust = 72 / img.HorizontalResolution;
float vAdjust = 72 / img.VerticalResolution;
double realWidth = img.Width * hAdjust;
double realHeight = img.Height * vAdjust;
if (!progressCallback(progress))
{
return null;
}
Size realSize = GetRealSize(img);
PdfPage page;
lock (document)
{
page = document.AddPage();
page.Width = (int)realWidth;
page.Height = (int)realHeight;
page.Width = realSize.Width;
page.Height = realSize.Height;
using (XGraphics gfx = XGraphics.FromPdfPage(page))
{
gfx.DrawImage(img, 0, 0, (int)realWidth, (int)realHeight);
gfx.DrawImage(img, 0, 0, realSize.Width, realSize.Height);
}
}
if (!progressCallback(progress))
{
return null;
}
string tempImageFilePath = Path.Combine(Paths.Temp, Path.GetRandomFileName());
img.Save(tempImageFilePath);
@ -156,6 +174,11 @@ namespace NAPS2.ImportExport.Pdf
OcrResult ocrResult;
try
{
if (!progressCallback(progress))
{
return null;
}
ocrResult = ocrEngine.ProcessImage(tempImageFilePath, ocrLanguageCode);
}
finally
@ -170,6 +193,10 @@ namespace NAPS2.ImportExport.Pdf
{
return;
}
if (!progressCallback(progress))
{
return;
}
lock (document)
{
using (XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend))
@ -185,7 +212,18 @@ namespace NAPS2.ImportExport.Pdf
}
}
}
Interlocked.Increment(ref progress);
});
return progressCallback(progress);
}
private static Size GetRealSize(Bitmap img)
{
float hAdjust = 72 / img.HorizontalResolution;
float vAdjust = 72 / img.VerticalResolution;
double realWidth = img.Width * hAdjust;
double realHeight = img.Height * vAdjust;
return new Size((int)realWidth, (int)realHeight);
}
private static RectangleF AdjustBounds(Rectangle b, float hAdjust, float vAdjust)

View File

@ -22,7 +22,7 @@ namespace NAPS2.Util
return new PipelineSource<T>(input);
}
public abstract class PipelineBase<T> : IPipelineSyntax<T>
private abstract class PipelineBase<T> : IPipelineSyntax<T>
{
public IPipelineSyntax<T2> Step<T2>(Func<T, T2> pipelineStepFunc)
{
@ -55,7 +55,7 @@ namespace NAPS2.Util
public abstract IEnumerable<T> GetOutput(List<Task> taskList);
}
public class PipelineSource<T> : PipelineBase<T>
private class PipelineSource<T> : PipelineBase<T>
{
private readonly IEnumerable<T> value;
@ -70,7 +70,7 @@ namespace NAPS2.Util
}
}
public class PipelineStep<T1, T2> : PipelineBase<T2>
private class PipelineStep<T1, T2> : PipelineBase<T2>
{
private readonly PipelineBase<T1> previous;
private readonly Func<T1, T2> func;
@ -91,7 +91,11 @@ namespace NAPS2.Util
{
foreach (var item in input)
{
collection.Add(func(item));
var result = func(item);
if (!ReferenceEquals(result, null))
{
collection.Add(result);
}
}
}
finally
@ -103,7 +107,7 @@ namespace NAPS2.Util
}
}
public class PipelineParallelStep<T1, T2> : PipelineBase<T2>
private class PipelineParallelStep<T1, T2> : PipelineBase<T2>
{
private readonly PipelineBase<T1> previous;
private readonly Func<T1, T2> func;
@ -122,7 +126,14 @@ namespace NAPS2.Util
{
try
{
Parallel.ForEach(input, item => collection.Add(func(item)));
Parallel.ForEach(input, item =>
{
var result = func(item);
if (!ReferenceEquals(result, null))
{
collection.Add(result);
}
});
}
finally
{