From 5c80cdeb1a0f6c8e32e7990025ec21d00cbdbbe2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Tue, 30 Jun 2020 07:56:38 +0200
Subject: [PATCH] python3Packages.pytorch: fix AArch64 build

aarch64-linux builds fail because of the use of opcodes in QNNPACK
that the GNU assembpler does not support. This change cherry-picks an
upstream patch that fixes this. See:

https://github.com/pytorch/pytorch/issues/33124
https://github.com/pytorch/pytorch/pull/40584
---
 .../python-modules/pytorch/default.nix            | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pkgs/development/python-modules/pytorch/default.nix b/pkgs/development/python-modules/pytorch/default.nix
index 057d8dc74f01..a746d4c7dfb3 100644
--- a/pkgs/development/python-modules/pytorch/default.nix
+++ b/pkgs/development/python-modules/pytorch/default.nix
@@ -121,6 +121,21 @@ in buildPythonPackage rec {
     sha256 = "1xjbn4hi96m7xslv3p2jc6qcsng0fx3w1m6isqfah81piljf8wng";
   };
 
+  patches = lib.optionals stdenv.isAarch64 [
+    # GNU aarch64 assembler does not support 4s on neon mov:
+    # https://github.com/pytorch/pytorch/issues/33124
+    #
+    # Fix from:
+    # https://github.com/pytorch/pytorch/pull/40584
+    #
+    # This patch can be removed with the next major version (1.6.0).
+    (fetchpatch {
+      name = "qnnpack-neon-fix.patch";
+      url = "https://github.com/pytorch/pytorch/commit/7676682584d0caf9243bce74ea0a88711ec4a807.diff";
+      sha256 = "13spncaqlpsp8qk2850yly7xqwmhhfwznhmzkk8jgpslkbx75vgq";
+    })
+  ];
+
   preConfigure = lib.optionalString cudaSupport ''
     export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
     export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++