Merge pull request #165226 from illustris/dask-yarn

This commit is contained in:
Sandro 2022-04-28 14:22:00 +02:00 committed by GitHub
commit b49cd9aeea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 95 additions and 72 deletions

View File

@ -0,0 +1,47 @@
{ lib
, buildPythonPackage
, fetchFromGitHub
, pytestCheckHook
, pythonOlder
, distributed
, dask
, grpcio
, skein
}:
buildPythonPackage rec {
pname = "dask-yarn";
version = "0.9";
src = fetchFromGitHub {
owner = "dask";
repo = pname;
rev = version;
hash = "sha256-/BTsxQSiVQrihrCa9DE7pueyg3aPAdjd/Dt4dpUwdtM=";
};
propagatedBuildInputs = [
distributed
dask
grpcio
skein
];
checkInputs = [ pytestCheckHook ];
preCheck = ''
export HOME=$TMPDIR
'';
pythonImportsCheck = [ "dask_yarn" ];
meta = with lib; {
description = "Deploy dask on YARN clusters";
longDescription = ''Dask-Yarn deploys Dask on YARN clusters,
such as are found in traditional Hadoop installations.
Dask-Yarn provides an easy interface to quickly start,
stop, and scale Dask clusters natively from Python.
'';
homepage = "https://yarn.dask.org/";
license = licenses.bsd3;
maintainers = with maintainers; [ illustris ];
};
}

View File

@ -2,26 +2,29 @@
, callPackage
, fetchPypi
, isPy27
, jre
, lib
, pythonPackages
, cryptography
, grpcio
, pyyaml
, grpcio-tools
, hadoop
, pytestCheckHook
, python
}:
let
buildPythonPackage rec {
pname = "skein";
version = "0.8.1";
src = fetchPypi {
inherit pname version;
sha256 = "04208b4be9df2dc68ac5b3e3ae51fd9b589add95ea1b67222a8de754d17b1efa";
};
skeinJar = callPackage ./skeinjar.nix { inherit src version; };
in
buildPythonPackage rec {
inherit pname version src;
disabled = isPy27;
# Update this hash if bumping versions
jarHash = "sha256-UGiEoTZ17IhLG72FZ18Zb+Ej4T8z9rMIMDUxzSZGZyY=";
skeinJar = callPackage ./skeinjar.nix { inherit pname version jarHash; };
propagatedBuildInputs = with pythonPackages; [ cryptography grpcio grpcio-tools jupyter pytest pyyaml requests jre ];
propagatedBuildInputs = [ cryptography grpcio pyyaml ];
buildInputs = [ grpcio-tools ];
preBuild = ''
# Ensure skein.jar exists skips the maven build in setup.py
@ -29,12 +32,30 @@ buildPythonPackage rec {
ln -s ${skeinJar} skein/java/skein.jar
'';
postPatch = ''
substituteInPlace skein/core.py --replace "'yarn'" "'${hadoop}/bin/yarn'" \
--replace "else 'java'" "else '${hadoop.jdk}/bin/java'"
'';
pythonImportsCheck = [ "skein" ];
checkInputs = [ pytestCheckHook ];
# These tests require connecting to a YARN cluster. They could be done through NixOS tests later.
disabledTests = [
"test_ui"
"test_tornado"
"test_kv"
"test_core"
"test_cli"
];
meta = with lib; {
homepage = "https://jcristharif.com/skein";
description = "A tool and library for easily deploying applications on Apache YARN";
license = licenses.bsd3;
maintainers = with maintainers; [ alexbiehl ];
broken = true; # maven repo src isn't stable
maintainers = with maintainers; [ alexbiehl illustris ];
# https://github.com/NixOS/nixpkgs/issues/48663#issuecomment-1083031627
# replace with https://github.com/NixOS/nixpkgs/pull/140325 once it is merged
broken = lib.traceIf isPy27 "${pname} not supported on ${python.executable}" isPy27;
};
}

View File

@ -1,21 +1,20 @@
{ callPackage, stdenv, maven, src, version }:
{ fetchPypi, unzip, stdenv, pname, version, jarHash }:
let
skeinRepo = callPackage ./skeinrepo.nix { inherit src version; };
in
stdenv.mkDerivation rec {
pname = "skein.jar";
inherit pname version;
inherit version src;
src = fetchPypi {
inherit pname version;
format = "wheel";
hash = jarHash;
};
nativeBuildInputs = [ maven ];
dontUnpack = true;
buildPhase = ''
mvn --offline -f java/pom.xml package -Dmaven.repo.local="${skeinRepo}" -Dskein.version=${version} -Dversion=${version}
'';
nativeBuildInputs = [ unzip ];
installPhase = ''
# Making sure skein.jar exists skips the maven build in setup.py
mv java/target/skein-*.jar $out
unzip ${src}
mv ./skein/java/skein.jar $out
'';
}

View File

@ -1,44 +0,0 @@
{ autoPatchelfHook, lib, maven, stdenv, src, version }:
stdenv.mkDerivation rec {
pname = "skein-maven-repo";
inherit version src;
nativeBuildInputs = [ maven ] ++ lib.optional stdenv.isLinux autoPatchelfHook;
installPhase = ''
mkdir -p $out
archs="${
if stdenv.isLinux
then "linux-x86_32 linux-x86_64"
else "osx-x86_64"
}"
for arch in $archs
do
mvn -Dmaven.repo.local=$out dependency:get -Dartifact=com.google.protobuf:protoc:3.0.0:exe:$arch
mvn -Dmaven.repo.local=$out dependency:get -Dartifact=io.grpc:protoc-gen-grpc-java:1.16.0:exe:$arch
done
if ${ lib.boolToString stdenv.isLinux }
then
autoPatchelf $out
fi
# We have to use maven package here as dependency:go-offline doesn't
# fetch every required jar.
mvn -f java/pom.xml -Dmaven.repo.local=$out package
rm $(find $out -name _remote.repositories)
rm $(find $out -name resolver-status.properties)
'';
outputHashMode = "recursive";
outputHashAlgo = "sha256";
outputHash = if stdenv.isLinux
then "12f0q3444qw6y4f6qsa9540a0fz4cgi844zzi8z1phqn3k4dnl6v"
else "0bjbwiv17cary1isxca0m2hsvgs1i5fh18z247h1hky73lnhbrz8";
} // lib.optionalAttrs stdenv.isLinux { dontAutoPatchelf = true; }

View File

@ -2049,6 +2049,8 @@ in {
dask-xgboost = callPackage ../development/python-modules/dask-xgboost { };
dask-yarn = callPackage ../development/python-modules/dask-yarn { };
databases = callPackage ../development/python-modules/databases { };
databricks-cli = callPackage ../development/python-modules/databricks-cli { };
@ -9404,9 +9406,7 @@ in {
six = callPackage ../development/python-modules/six { };
skein = callPackage ../development/python-modules/skein {
jre = pkgs.jre8; # TODO: remove override https://github.com/NixOS/nixpkgs/pull/89731
};
skein = callPackage ../development/python-modules/skein { };
skidl = callPackage ../development/python-modules/skidl { };