diff --git a/nixos/modules/services/cluster/spark/default.nix b/nixos/modules/services/cluster/spark/default.nix index bf39c5537332..985ff24d62c8 100644 --- a/nixos/modules/services/cluster/spark/default.nix +++ b/nixos/modules/services/cluster/spark/default.nix @@ -69,8 +69,8 @@ with lib; confDir = mkOption { type = types.path; description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory."; - default = "${cfg.package}/lib/${cfg.package.untarDir}/conf"; - defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"''; + default = "${cfg.package}/conf"; + defaultText = literalExpression ''"''${package}/conf"''; }; logDir = mkOption { type = types.path; @@ -113,9 +113,9 @@ with lib; Type = "forking"; User = "spark"; Group = "spark"; - WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; - ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh"; - ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh"; + WorkingDirectory = "${cfg.package}/"; + ExecStart = "${cfg.package}/sbin/start-master.sh"; + ExecStop = "${cfg.package}/sbin/stop-master.sh"; TimeoutSec = 300; StartLimitBurst=10; Restart = "always"; @@ -136,9 +136,9 @@ with lib; serviceConfig = { Type = "forking"; User = "spark"; - WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; - ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}"; - ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh"; + WorkingDirectory = "${cfg.package}/"; + ExecStart = "${cfg.package}/sbin/start-worker.sh spark://${cfg.worker.master}"; + ExecStop = "${cfg.package}/sbin/stop-worker.sh"; TimeoutSec = 300; StartLimitBurst=10; Restart = "always"; diff --git a/nixos/tests/spark/default.nix b/nixos/tests/spark/default.nix index 63d6a5d44db8..eed7db35bf4f 100644 --- a/nixos/tests/spark/default.nix +++ b/nixos/tests/spark/default.nix @@ -41,6 +41,7 @@ let worker.wait_for_unit("spark-worker.service") worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" ) assert "Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/") + worker.succeed("spark-submit --version | systemd-cat") worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py") ''; }); diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index a95db8d005eb..0d5d2c1e4eb9 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -3,10 +3,9 @@ , fetchzip , makeWrapper , jdk8 -, python3Packages -, extraPythonPackages ? [ ] +, python3 +, python310 , coreutils -, hadoopSupport ? true , hadoop , RSupport ? true , R @@ -14,55 +13,43 @@ }: let - spark = { pname, version, hash, extraMeta ? {} }: + spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }: stdenv.mkDerivation (finalAttrs: { - inherit pname version hash; - jdk = if hadoopSupport then hadoop.jdk else jdk8; + inherit pname version hash hadoop R pysparkPython; + inherit (finalAttrs.hadoop) jdk; src = fetchzip { url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; inherit (finalAttrs) hash; }; nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ finalAttrs.jdk python3Packages.python ] - ++ extraPythonPackages + buildInputs = [ finalAttrs.jdk finalAttrs.pysparkPython ] ++ lib.optional RSupport R; - untarDir = with finalAttrs; "${pname}-${version}"; installPhase = with finalAttrs; '' - mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} - mv * $out/lib/${untarDir} - - cp $out/lib/${untarDir}/conf/log4j.properties{.template,} || \ - cp $out/lib/${untarDir}/conf/log4j2.properties{.template,} - - cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF - export JAVA_HOME="${jdk}" - export SPARK_HOME="$out/lib/${untarDir}" - '' + lib.optionalString hadoopSupport '' - export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) - '' + '' - export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}" - export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" - ${lib.optionalString RSupport '' - export SPARKR_R_SHELL="${R}/bin/R" - export PATH="\$PATH:${R}/bin"''} - EOF - - for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do - makeWrapper "$n" "$out/bin/$(basename $n)" - substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname + mkdir -p "$out/opt" + mv * $out/ + for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do + wrapProgram "$n" --set JAVA_HOME "${jdk}" \ + --run "[ -z SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \ + ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${R}/bin/R"''} \ + --prefix PATH : "${ + lib.makeBinPath ( + [ pysparkPython ] ++ + (lib.optionals RSupport [ R ]) + )}" done - for n in $(find $out/lib/${untarDir}/sbin -type f); do - # Spark deprecated scripts with "slave" in the name. - # This line adds forward compatibility with the nixos spark module for - # older versions of spark that don't have the new "worker" scripts. - ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true - done - ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java + ln -s ${finalAttrs.hadoop} "$out/opt/hadoop" + ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''} ''; - passthru.tests = nixosTests.spark.default.passthru.override { - sparkPackage = finalAttrs.finalPackage; + passthru = { + tests = nixosTests.spark.default.passthru.override { + sparkPackage = finalAttrs.finalPackage; + }; + # Add python packages to PYSPARK_PYTHON + withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: { + pysparkPython = old.pysparkPython.withPackages f; + }); }; meta = { @@ -90,11 +77,13 @@ in pname = "spark"; version = "3.3.3"; hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM="; + pysparkPython = python310; }; spark_3_2 = spark rec { pname = "spark"; version = "3.2.4"; hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E="; + pysparkPython = python310; extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ]; }; }