diff --git a/nixos/modules/services/cluster/spark/default.nix b/nixos/modules/services/cluster/spark/default.nix
index bf39c5537332..985ff24d62c8 100644
--- a/nixos/modules/services/cluster/spark/default.nix
+++ b/nixos/modules/services/cluster/spark/default.nix
@@ -69,8 +69,8 @@ with lib;
confDir = mkOption {
type = types.path;
description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
- default = "${cfg.package}/lib/${cfg.package.untarDir}/conf";
- defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"'';
+ default = "${cfg.package}/conf";
+ defaultText = literalExpression ''"''${package}/conf"'';
};
logDir = mkOption {
type = types.path;
@@ -113,9 +113,9 @@ with lib;
Type = "forking";
User = "spark";
Group = "spark";
- WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
- ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh";
- ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh";
+ WorkingDirectory = "${cfg.package}/";
+ ExecStart = "${cfg.package}/sbin/start-master.sh";
+ ExecStop = "${cfg.package}/sbin/stop-master.sh";
TimeoutSec = 300;
StartLimitBurst=10;
Restart = "always";
@@ -136,9 +136,9 @@ with lib;
serviceConfig = {
Type = "forking";
User = "spark";
- WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
- ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}";
- ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh";
+ WorkingDirectory = "${cfg.package}/";
+ ExecStart = "${cfg.package}/sbin/start-worker.sh spark://${cfg.worker.master}";
+ ExecStop = "${cfg.package}/sbin/stop-worker.sh";
TimeoutSec = 300;
StartLimitBurst=10;
Restart = "always";
diff --git a/nixos/tests/spark/default.nix b/nixos/tests/spark/default.nix
index 63d6a5d44db8..eed7db35bf4f 100644
--- a/nixos/tests/spark/default.nix
+++ b/nixos/tests/spark/default.nix
@@ -41,6 +41,7 @@ let
worker.wait_for_unit("spark-worker.service")
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
assert "
Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
+ worker.succeed("spark-submit --version | systemd-cat")
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
'';
});
diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix
index a95db8d005eb..0d5d2c1e4eb9 100644
--- a/pkgs/applications/networking/cluster/spark/default.nix
+++ b/pkgs/applications/networking/cluster/spark/default.nix
@@ -3,10 +3,9 @@
, fetchzip
, makeWrapper
, jdk8
-, python3Packages
-, extraPythonPackages ? [ ]
+, python3
+, python310
, coreutils
-, hadoopSupport ? true
, hadoop
, RSupport ? true
, R
@@ -14,55 +13,43 @@
}:
let
- spark = { pname, version, hash, extraMeta ? {} }:
+ spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }:
stdenv.mkDerivation (finalAttrs: {
- inherit pname version hash;
- jdk = if hadoopSupport then hadoop.jdk else jdk8;
+ inherit pname version hash hadoop R pysparkPython;
+ inherit (finalAttrs.hadoop) jdk;
src = fetchzip {
url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
inherit (finalAttrs) hash;
};
nativeBuildInputs = [ makeWrapper ];
- buildInputs = [ finalAttrs.jdk python3Packages.python ]
- ++ extraPythonPackages
+ buildInputs = [ finalAttrs.jdk finalAttrs.pysparkPython ]
++ lib.optional RSupport R;
- untarDir = with finalAttrs; "${pname}-${version}";
installPhase = with finalAttrs; ''
- mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
- mv * $out/lib/${untarDir}
-
- cp $out/lib/${untarDir}/conf/log4j.properties{.template,} || \
- cp $out/lib/${untarDir}/conf/log4j2.properties{.template,}
-
- cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
- export JAVA_HOME="${jdk}"
- export SPARK_HOME="$out/lib/${untarDir}"
- '' + lib.optionalString hadoopSupport ''
- export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
- '' + ''
- export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
- export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
- ${lib.optionalString RSupport ''
- export SPARKR_R_SHELL="${R}/bin/R"
- export PATH="\$PATH:${R}/bin"''}
- EOF
-
- for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
- makeWrapper "$n" "$out/bin/$(basename $n)"
- substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
+ mkdir -p "$out/opt"
+ mv * $out/
+ for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
+ wrapProgram "$n" --set JAVA_HOME "${jdk}" \
+ --run "[ -z SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
+ ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${R}/bin/R"''} \
+ --prefix PATH : "${
+ lib.makeBinPath (
+ [ pysparkPython ] ++
+ (lib.optionals RSupport [ R ])
+ )}"
done
- for n in $(find $out/lib/${untarDir}/sbin -type f); do
- # Spark deprecated scripts with "slave" in the name.
- # This line adds forward compatibility with the nixos spark module for
- # older versions of spark that don't have the new "worker" scripts.
- ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
- done
- ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
+ ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
+ ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
'';
- passthru.tests = nixosTests.spark.default.passthru.override {
- sparkPackage = finalAttrs.finalPackage;
+ passthru = {
+ tests = nixosTests.spark.default.passthru.override {
+ sparkPackage = finalAttrs.finalPackage;
+ };
+ # Add python packages to PYSPARK_PYTHON
+ withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: {
+ pysparkPython = old.pysparkPython.withPackages f;
+ });
};
meta = {
@@ -90,11 +77,13 @@ in
pname = "spark";
version = "3.3.3";
hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM=";
+ pysparkPython = python310;
};
spark_3_2 = spark rec {
pname = "spark";
version = "3.2.4";
hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E=";
+ pysparkPython = python310;
extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ];
};
}