From aa8aa7e2ea35ce655297e8322dc82bf77a31d04b Mon Sep 17 00:00:00 2001 From: happysalada Date: Fri, 25 Aug 2023 04:05:50 +0800 Subject: [PATCH] python310Packages.paddleocr: init at 2.7.0.1 --- .../python-modules/paddleocr/default.nix | 111 ++++++++++++++++++ .../paddleocr/remove-import-imaug.patch | 20 ++++ pkgs/top-level/python-packages.nix | 2 + 3 files changed, 133 insertions(+) create mode 100644 pkgs/development/python-modules/paddleocr/default.nix create mode 100644 pkgs/development/python-modules/paddleocr/remove-import-imaug.patch diff --git a/pkgs/development/python-modules/paddleocr/default.nix b/pkgs/development/python-modules/paddleocr/default.nix new file mode 100644 index 000000000000..9bd81077abfd --- /dev/null +++ b/pkgs/development/python-modules/paddleocr/default.nix @@ -0,0 +1,111 @@ +{ lib +, buildPythonPackage +, pythonRelaxDepsHook +, fetchFromGitHub +, attrdict +, beautifulsoup4 +, cython +, fire +, fonttools +, lmdb +, lxml +, numpy +, opencv4 +, openpyxl +, pdf2docx +, pillow +, premailer +, pyclipper +, pymupdf +, python-docx +, rapidfuzz +, scikit-image +, shapely +, tqdm +, paddlepaddle +, lanms-neo +, polygon3 +}: + +let + version = "2.7.0.1"; +in +buildPythonPackage { + pname = "paddleocr"; + inherit version; + format = "setuptools"; + + src = fetchFromGitHub { + owner = "PaddlePaddle"; + repo = "PaddleOCR"; + rev = "254786752a2659e184822b4b2de5637a05236590"; + hash = "sha256-M/Fpk9swX9Gds7o5poM9Iv6LOhKoZNbe0Wv9JNMPOU0="; + }; + + patches = [ + # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` + # classes. These classes depend on the `imgaug` package which is + # unmaintained and has been removed from nixpkgs. + # + # The image OCR feature of PaddleOCR doesn't use these classes though, so + # they work even after stripping the the `IaaAugment` and `CopyPaste` + # exports. It probably breaks some of the OCR model creation tooling that + # PaddleOCR provides, however. + ./remove-import-imaug.patch + ]; + + nativeBuildInputs = [ pythonRelaxDepsHook ]; + # trying to relax only pymupdf makes the whole build fail + pythonRelaxDeps = true; + pythonRemoveDeps = [ + "imgaug" + "visualdl" + "opencv-python" + "opencv-contrib-python" + ]; + + propagatedBuildInputs = [ + attrdict + beautifulsoup4 + cython + fire + fonttools + lmdb + lxml + numpy + opencv4 + openpyxl + pdf2docx + pillow + premailer + pyclipper + pymupdf + python-docx + rapidfuzz + scikit-image + shapely + tqdm + paddlepaddle + lanms-neo + polygon3 + ]; + + # TODO: The tests depend, among possibly other things, on `cudatoolkit`. + # But Cudatoolkit fails to install. + # preCheck = "export HOME=$TMPDIR"; + # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; + doCheck = false; + + meta = with lib; { + homepage = "https://github.com/PaddlePaddle/PaddleOCR"; + license = licenses.asl20; + description = "Multilingual OCR toolkits based on PaddlePaddle"; + longDescription = '' + PaddleOCR aims to create multilingual, awesome, leading, and practical OCR + tools that help users train better models and apply them into practice. + ''; + changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}"; + maintainers = with maintainers; [ happysalada ]; + platforms = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" ]; + }; +} diff --git a/pkgs/development/python-modules/paddleocr/remove-import-imaug.patch b/pkgs/development/python-modules/paddleocr/remove-import-imaug.patch new file mode 100644 index 000000000000..f24156fb6a12 --- /dev/null +++ b/pkgs/development/python-modules/paddleocr/remove-import-imaug.patch @@ -0,0 +1,20 @@ +diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py +index 121582b4..a6987c75 100644 +--- a/ppocr/data/imaug/__init__.py ++++ b/ppocr/data/imaug/__init__.py +@@ -16,7 +16,6 @@ from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + +-from .iaa_augment import IaaAugment + from .make_border_map import MakeBorderMap + from .make_shrink_map import MakeShrinkMap + from .random_crop_data import EastRandomCropData, RandomCropImgMask +@@ -30,7 +29,6 @@ from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, + RFLRecResizeImg, SVTRRecAug + from .ssl_img_aug import SSLRotateResize + from .randaugment import RandAugment +-from .copy_paste import CopyPaste + from .ColorJitter import ColorJitter + from .operators import * + from .label_ops import * diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index ef716ba704ee..f096bc9503d8 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -1215,6 +1215,8 @@ self: super: with self; { paddle2onnx = callPackage ../development/python-modules/paddle2onnx { }; + paddleocr = callPackage ../development/python-modules/paddleocr { }; + paddlepaddle = callPackage ../development/python-modules/paddlepaddle { }; pulumi = callPackage ../development/python-modules/pulumi { inherit (pkgs) pulumi; };