Skip to content

Commit

Permalink
Support Slurm 23.11 (#54)
Browse files Browse the repository at this point in the history
* Support Slurm 23.11
* Fix user shell for Debian
* Manage cli_filter.lua like other configless managed configs
* Add dirs parameter to slurm::job_container defind type
* Add shared parameter to slurm::job_container
* Make cgroup_signal_child_processes optional
* Add reconfig_ignore_errors parameter
* Support newer systemd module
* The scrun.lua is now part of configless deployment
  • Loading branch information
treydock authored Mar 25, 2024
1 parent 8092040 commit e9f9645
Show file tree
Hide file tree
Showing 25 changed files with 173 additions and 117 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ Manage SLURM.

### Supported Versions of SLURM

This module is designed to work with SLURM 21.08.x, 22.05.x and 23.02.x.
This module is designed to work with SLURM 22.05.x, 23.02.x and 23.11.x.

| SLURM Version | SLURM Puppet module versions |
| ----------------- | -----------------------------|
| 20.02.x | 0.x |
| 20.11.x | 1.x |
| 21.08.x & 22.05.x | 2.x |
| 23.02.x | 3.x |
| 23.11.x | 4.x |

## Usage

Expand Down
1 change: 1 addition & 0 deletions data/os/Debian.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ slurm::source_dependencies:
- libyaml-dev
slurm::env_dir: /etc/default
slurm::slurmrestd_user_group: nogroup
slurm::slurm_user_shell: /usr/sbin/nologin
1 change: 1 addition & 0 deletions data/os/Ubuntu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
slurm::slurm_user_shell: /sbin/nologin
2 changes: 2 additions & 0 deletions hiera.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ defaults: # Used for any hierarchy level that omits these keys.
hierarchy:
- name: 'os name major release'
path: "os/%{facts.os.name}/%{facts.os.release.major}.yaml"
- name: 'os name'
path: "os/%{facts.os.name}.yaml"
- name: 'os family major release'
path: "os/%{facts.os.family}/%{facts.os.release.major}.yaml"
- name: 'os family'
Expand Down
50 changes: 24 additions & 26 deletions manifests/common/config.pp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@
$slurm::nodesets.each |$name, $_nodeset| {
slurm::nodeset { $name: * => $_nodeset }
}
}

if $slurm::manage_slurm_conf and ! $slurm::configless {
concat { 'slurm-topology.conf':
ensure => 'present',
path => $slurm::topology_conf_path,
Expand Down Expand Up @@ -139,35 +137,35 @@
source => $slurm::oci_conf_source,
notify => $slurm::service_notify,
}
}

if ($slurm::client or $slurm::slurmctld) and ($slurm::cli_filter_lua_source or $slurm::cli_filter_lua_content) {
file { "${slurm::conf_dir}/cli_filter.lua":
ensure => 'file',
owner => 'root',
group => 'root',
mode => '0644',
source => $slurm::cli_filter_lua_source,
content => $slurm::cli_filter_lua_content,
}
if $slurm::cli_filter_lua_source or $slurm::cli_filter_lua_content {
file { "${slurm::conf_dir}/cli_filter.lua":
ensure => 'file',
owner => 'root',
group => 'root',
mode => '0644',
source => $slurm::cli_filter_lua_source,
content => $slurm::cli_filter_lua_content,
}

if $slurm::slurmctld and $slurm::enable_configless {
File["${slurm::conf_dir}/cli_filter.lua"] ~> Exec['scontrol reconfig']
if $slurm::slurmctld and $slurm::enable_configless {
File["${slurm::conf_dir}/cli_filter.lua"] ~> Exec['scontrol reconfig']
}
}
}

if ($slurm::client or $slurm::slurmctld) and ($slurm::scrun_lua_source or $slurm::scrun_lua_content) {
file { "${slurm::conf_dir}/scrun.lua":
ensure => 'file',
owner => 'root',
group => 'root',
mode => '0644',
source => $slurm::scrun_lua_source,
content => $slurm::scrun_lua_content,
}
if $slurm::scrun_lua_source or $slurm::scrun_lua_content {
file { "${slurm::conf_dir}/scrun.lua":
ensure => 'file',
owner => 'root',
group => 'root',
mode => '0644',
source => $slurm::scrun_lua_source,
content => $slurm::scrun_lua_content,
}

if $slurm::slurmctld and $slurm::enable_configless {
File["${slurm::conf_dir}/scrun.lua"] ~> Exec['scontrol reconfig']
if $slurm::slurmctld and $slurm::enable_configless {
File["${slurm::conf_dir}/scrun.lua"] ~> Exec['scontrol reconfig']
}
}
}

Expand Down
10 changes: 6 additions & 4 deletions manifests/init.pp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
# @param reload_services
# @param restart_services
# @param slurmctld_conn_validator_timeout
# @param reconfig_ignore_errors
# @param manage_slurm_user
# @param slurm_user_group
# @param slurm_group_gid
Expand Down Expand Up @@ -145,7 +146,6 @@
# @param slurmrestd_restart_on_failure
# @param cgroup_conf_template
# @param cgroup_conf_source
# @param cgroup_automount
# @param cgroup_mountpoint
# @param cgroup_plugin
# @param cgroup_allowed_ram_space
Expand All @@ -158,6 +158,7 @@
# @param cgroup_max_swap_percent
# @param cgroup_memory_swappiness
# @param cgroup_min_ram_space
# @param cgroup_signal_child_processes
# @param oci_conf_template
# @param oci_conf_source
# @param oci_container_path
Expand Down Expand Up @@ -217,7 +218,7 @@
Boolean $install_pam = true,

# Source install
String $version = '21.08.8',
String $version = '23.11.5',
Array $source_dependencies = [],
Array $configure_flags = [],
Boolean $source_install_manage_alternatives = true,
Expand All @@ -240,6 +241,7 @@
Boolean $reload_services = false,
Boolean $restart_services = true,
Integer $slurmctld_conn_validator_timeout = 60,
Boolean $reconfig_ignore_errors = false,

# User and group management
Boolean $manage_slurm_user = true,
Expand Down Expand Up @@ -385,9 +387,8 @@
# cgroups
String $cgroup_conf_template = 'slurm/cgroup/cgroup.conf.erb',
Optional[String] $cgroup_conf_source = undef,
Boolean $cgroup_automount = true,
Stdlib::Absolutepath $cgroup_mountpoint = '/sys/fs/cgroup',
Optional[String] $cgroup_plugin = undef,
String $cgroup_plugin = 'autodetect',
Integer $cgroup_allowed_ram_space = 100,
Integer $cgroup_allowed_swap_space = 0,
Boolean $cgroup_constrain_cores = false,
Expand All @@ -398,6 +399,7 @@
Integer $cgroup_max_swap_percent = 100,
Optional[Integer[0,100]] $cgroup_memory_swappiness = undef,
Integer $cgroup_min_ram_space = 30,
Optional[Boolean] $cgroup_signal_child_processes = undef,

# OCI
String $oci_conf_template = 'slurm/oci.conf.erb',
Expand Down
39 changes: 32 additions & 7 deletions manifests/job_container.pp
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,37 @@
# job_container.conf BasePath
# @param auto_base_path
# job_container.conf AutoBasePath
# @param dirs
# job_container.conf Dirs
# @param init_script
# job_container.conf InitScript
# @param node_name
# job_container.conf NodeName
# @param shared
# job_container.conf Shared
# @param order
# Order in job_container.conf
#
define slurm::job_container (
Stdlib::Absolutepath $base_path,
Boolean $auto_base_path = false,
Optional[Array[Stdlib::Absolutepath]] $dirs = undef,
Optional[Stdlib::Absolutepath] $init_script = undef,
Optional[String] $node_name = undef,
Optional[Boolean] $shared = undef,
Variant[String[1], Integer] $order = '50',
) {
include slurm

$_base_path = "BasePath=${base_path}"
$_auto_base_path = "AutoBasePath=${auto_base_path}"

if $dirs {
$_dirs = "Dirs=${dirs.join(',')}"
} else {
$_dirs = undef
}

if $init_script {
$_init_script = "InitScript=${init_script}"
} else {
Expand All @@ -35,17 +47,30 @@
$node_param = undef
}

$params = [
$node_param,
$_auto_base_path,
$_base_path,
$_init_script,
].filter |$p| { $p =~ NotUndef }
if $shared !~ Undef {
$shared_param = "Shared=${shared}"
} else {
$shared_param = undef
}

if $node_name {
$params = [
$node_param,
$_auto_base_path,
$_base_path,
$_dirs,
$_init_script,
$shared_param,
].filter |$p| { $p =~ NotUndef }
$content = "${strip(join($params, ' '))}\n"
} else {
$content = join($params, "\n")
$params = [
$_base_path,
$_dirs,
$_init_script,
$shared_param,
].filter |$p| { $p =~ NotUndef }
$content = "${_auto_base_path}\n${params.join(' ')}"
}

concat::fragment { "job_container.conf-${name}":
Expand Down
16 changes: 7 additions & 9 deletions manifests/params.pp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
'CliFilterPlugins' => undef,
'CommunicationParameters' => undef,
'CompleteWait' => '0',
'CoreSpecPlugin' => 'core_spec/none',
'CoreSpecPlugin' => undef,
'CpuFreqDef' => undef,
'CpuFreqGovernors' => 'OnDemand,Performance,UserSpace',
'CredType' => 'cred/munge',
Expand All @@ -46,8 +46,6 @@
'EioTimeout' => '60',
'EnforcePartLimits' => 'NO',
'EpilogMsgTime' => '2000',
'ExtSensorsFreq' => undef,
'ExtSensorsType' => undef,
'FairShareDampeningFactor' => '1',
'FederationParameters' => undef,
'FirstJobId' => '1',
Expand All @@ -63,7 +61,7 @@
'JobAcctGatherType' => 'jobacct_gather/cgroup',
'JobAcctGatherFrequency' => 'task=30,energy=0,network=0,filesystem=0',
'JobAcctGatherParams' => undef,
'JobCompType' => 'jobcomp/none',
'JobCompType' => undef,
'JobContainerType' => undef,
'JobFileAppend' => undef,
'JobRequeue' => '1',
Expand Down Expand Up @@ -107,7 +105,7 @@
'PriorityParameters' => undef,
'PrioritySiteFactorParameters' => undef,
'PrioritySiteFactorPlugin' => 'site_factor/none',
'PriorityType' => 'priority/basic',
'PriorityType' => 'priority/multifactor',
'PriorityUsageResetPeriod' => 'NONE',
'PriorityWeightAge' => '0',
'PriorityWeightAssoc' => '0',
Expand Down Expand Up @@ -138,7 +136,7 @@
'SchedulerTimeSlice' => '30',
'SchedulerType' => 'sched/backfill',
'ScronParameters' => undef,
'SelectType' => 'select/linear',
'SelectType' => 'select/cons_tres',
'SelectTypeParameters' => undef,
'SlurmctldAddr' => undef,
'SlurmctldDebug' => 'info',
Expand All @@ -162,15 +160,15 @@
'SuspendRate' => '60',
'SuspendTime' => '-1',
'SuspendTimeout' => '30',
'SwitchType' => 'switch/none',
'SwitchType' => undef,
'TaskPlugin' => 'task/affinity,task/cgroup',
'TaskPluginParam' => undef,
'TCPTimeout' => '2',
'TmpFS' => '/tmp',
'TopologyParam' => undef,
'TopologyPlugin' => 'topology/none',
'TopologyPlugin' => undef,
'TrackWCKey' => undef,
'TreeWidth' => '50',
'TreeWidth' => '16',
'UnkillableStepProgram' => undef,
'UnkillableStepTimeout' => '60',
'UsePAM' => '0',
Expand Down
7 changes: 7 additions & 0 deletions manifests/slurmctld/service.pp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@
hasrestart => true,
}

if $slurm::reconfig_ignore_errors {
$reconfig_command = 'scontrol reconfig || exit 0'
} else {
$reconfig_command = 'scontrol reconfig'
}

exec { 'scontrol reconfig':
command => $reconfig_command,
path => '/usr/bin:/bin:/usr/sbin:/sbin',
refreshonly => true,
}
Expand Down
2 changes: 1 addition & 1 deletion metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
},
{
"name": "puppet/systemd",
"version_requirement": ">= 3.1.0 <6.0.0"
"version_requirement": ">= 3.1.0 <7.0.0"
},
{
"name": "puppet/archive",
Expand Down
6 changes: 1 addition & 5 deletions spec/acceptance/01_slurmd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
require 'spec_helper_acceptance'

describe 'slurmd' do
if ['docker', 'hyperv'].include?(fact('virtual'))
let(:slurm_user) { 'root' }
else
let(:slurm_user) { 'slurm' }
end
let(:slurm_user) { 'slurm' }

context 'with default parameters' do
nodes = hosts_as('slurmd')
Expand Down
6 changes: 1 addition & 5 deletions spec/acceptance/02_slurmdbd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
require 'spec_helper_acceptance'

describe 'slurmdbd' do
if ['docker', 'hyperv'].include?(fact('virtual'))
let(:slurm_user) { 'root' }
else
let(:slurm_user) { 'slurm' }
end
let(:slurm_user) { 'slurm' }

context 'with default parameters' do
nodes = hosts_as('slurmdbd')
Expand Down
6 changes: 1 addition & 5 deletions spec/acceptance/03_slurmctld_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
require 'spec_helper_acceptance'

describe 'slurmctld' do
if ['docker', 'hyperv'].include?(fact('virtual'))
let(:slurm_user) { 'root' }
else
let(:slurm_user) { 'slurm' }
end
let(:slurm_user) { 'slurm' }

context 'with default parameters' do
nodes = hosts_as('slurmctld')
Expand Down
6 changes: 1 addition & 5 deletions spec/acceptance/05_slurmrestd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
require 'spec_helper_acceptance'

describe 'slurmrestd' do
if ['docker', 'hyperv'].include?(fact('virtual'))
let(:slurm_user) { 'root' }
else
let(:slurm_user) { 'slurm' }
end
let(:slurm_user) { 'slurm' }

context 'with default parameters' do
nodes = hosts_as('slurmdbd')
Expand Down
4 changes: 2 additions & 2 deletions spec/defines/slurm_job_container_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
]).each do |_os, os_facts|
let(:facts) { os_facts }
let(:title) { '/dev/shm' }
let(:params) { { auto_base_path: true, base_path: '/dev/shm/slurm' } }
let(:params) { { auto_base_path: true, base_path: '/tmp', dirs: ['/dev/shm'], shared: true } }

it { is_expected.to create_slurm__job_container('/dev/shm') }
it { is_expected.to contain_class('slurm') }

it do
is_expected.to contain_concat__fragment('job_container.conf-/dev/shm').with(
target: 'job_container.conf',
content: "AutoBasePath=true\nBasePath=/dev/shm/slurm",
content: "AutoBasePath=true\nBasePath=/tmp Dirs=/dev/shm Shared=true",
order: '50',
)
end
Expand Down
Loading

0 comments on commit e9f9645

Please sign in to comment.