mirror of
https://github.com/ilyakooo0/nixpkgs.git
synced 2025-01-04 01:42:53 +03:00
nixos/slurm: convert test from perl to python
This commit is contained in:
parent
472e165b56
commit
9b28dbd36a
@ -1,4 +1,4 @@
|
||||
import ./make-test.nix ({ lib, ... }:
|
||||
import ./make-test-python.nix ({ lib, ... }:
|
||||
let
|
||||
mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
||||
|
||||
@ -85,63 +85,57 @@ in {
|
||||
|
||||
testScript =
|
||||
''
|
||||
startAll;
|
||||
start_all()
|
||||
|
||||
# Set up authentification across the cluster
|
||||
foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
|
||||
{
|
||||
$node->waitForUnit("default.target");
|
||||
for node in [submit, control, dbd, node1, node2, node3]:
|
||||
|
||||
$node->succeed("mkdir /etc/munge");
|
||||
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
|
||||
$node->succeed("chmod 0400 /etc/munge/munge.key");
|
||||
$node->succeed("chown munge:munge /etc/munge/munge.key");
|
||||
$node->succeed("systemctl restart munged");
|
||||
node.wait_for_unit("default.target")
|
||||
|
||||
node.succeed("mkdir /etc/munge")
|
||||
node.succeed(
|
||||
"echo '${mungekey}' > /etc/munge/munge.key"
|
||||
)
|
||||
node.succeed("chmod 0400 /etc/munge/munge.key")
|
||||
node.succeed("chown munge:munge /etc/munge/munge.key")
|
||||
node.succeed("systemctl restart munged")
|
||||
|
||||
node.wait_for_unit("munged")
|
||||
|
||||
$node->waitForUnit("munged");
|
||||
};
|
||||
|
||||
# Restart the services since they have probably failed due to the munge init
|
||||
# failure
|
||||
subtest "can_start_slurmdbd", sub {
|
||||
$dbd->succeed("systemctl restart slurmdbd");
|
||||
$dbd->waitForUnit("slurmdbd.service");
|
||||
$dbd->waitForOpenPort(6819);
|
||||
};
|
||||
with subtest("can_start_slurmdbd"):
|
||||
dbd.succeed("systemctl restart slurmdbd")
|
||||
dbd.wait_for_unit("slurmdbd.service")
|
||||
dbd.wait_for_open_port(6819)
|
||||
|
||||
# there needs to be an entry for the current
|
||||
# cluster in the database before slurmctld is restarted
|
||||
subtest "add_account", sub {
|
||||
$control->succeed("sacctmgr -i add cluster default");
|
||||
with subtest("add_account"):
|
||||
control.succeed("sacctmgr -i add cluster default")
|
||||
# check for cluster entry
|
||||
$control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default");
|
||||
};
|
||||
control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
|
||||
|
||||
subtest "can_start_slurmctld", sub {
|
||||
$control->succeed("systemctl restart slurmctld");
|
||||
$control->waitForUnit("slurmctld.service");
|
||||
};
|
||||
with subtest("can_start_slurmctld"):
|
||||
control.succeed("systemctl restart slurmctld")
|
||||
control.waitForUnit("slurmctld.service")
|
||||
|
||||
subtest "can_start_slurmd", sub {
|
||||
foreach my $node (($node1,$node2,$node3))
|
||||
{
|
||||
$node->succeed("systemctl restart slurmd.service");
|
||||
$node->waitForUnit("slurmd");
|
||||
}
|
||||
};
|
||||
with subtest("can_start_slurmd"):
|
||||
for node in [node1, node2, node3]:
|
||||
node.succeed("systemctl restart slurmd.service")
|
||||
node.wait_for_unit("slurmd")
|
||||
|
||||
# Test that the cluster works and can distribute jobs;
|
||||
|
||||
subtest "run_distributed_command", sub {
|
||||
with subtest("run_distributed_command"):
|
||||
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
||||
# The output must contain the 3 different names
|
||||
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
||||
};
|
||||
submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
|
||||
|
||||
subtest "check_slurm_dbd", sub {
|
||||
with subtest("check_slurm_dbd"):
|
||||
# find the srun job from above in the database
|
||||
sleep 5;
|
||||
$control->succeed("sacct | grep hostname");
|
||||
};
|
||||
control.succeed("sleep 5")
|
||||
control.succeed("sacct | grep hostname")
|
||||
'';
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user