Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
brdunn
slurm_sim_tools
Commits
8ca8fa12
Commit
8ca8fa12
authored
Jun 11, 2018
by
hoffmaps
Browse files
added files for Docker setup
parent
1bee8d8f
Changes
12
Hide whitespace changes
Inline
Side-by-side
docker_files/slurm_sim/Dockerfile
0 → 100644
View file @
8ca8fa12
FROM
centos:7
LABEL
desc="Slurm simulator made ready"
#Adding in the code from github to be able to start/stop mysql (and sshd?)
COPY
cmd_start /sbin/
COPY
cmd_stop /sbin/
# giving permissions to use the cmd from above
RUN
\
chmod
a+rwx /sbin/cmd_start
&&
\
chmod
a+rwx /sbin/cmd_stop
# installing mysql (mariadb)
RUN
\
yum
-y
update
&&
\
yum
-y
install
mariadb-server
&&
\
yum
-y
install
mariadb-devel
&&
\
echo
"Done installing Mariadb"
#Python (need to install gcc-c++ to get the gcc command to work down the road with pandas
RUN
\
yum
-y
install
gcc-c++
&&
\
yum
-y
install install
epel-release
&&
\
yum
-y
install
python34 python34-libs python34-devel python34-numpy python34-scipy python34-pip
&&
\
pip3
install
pymysql
&&
\
pip3
install
pandas
&&
\
echo
"Python all installed"
#Installing R
RUN
\
yum
-y
install
R R-Rcpp R-Rcpp-devel
&&
\
yum
-y
install
python-devel
&&
\
yum
-y
install
texlive-
*
&&
\
echo
"R all installed"
# Installing some additional things to do things down the line
# sudo - to allow slurm to give root commands if needed
# git - to get stuff from github
# wget - to be able to get rstudio server on the machine
# Adding the slurm user to the system - need to install sudo for sudo commands..?
RUN
\
yum
-y
install sudo
&&
\
yum
-y
install
git
&&
\
yum
-y
install
wget
# setting up directories and adding slurm user
RUN
\
mkdir
/install_files
&&
\
useradd
-d
/home/slurm
-ms
/bin/bash slurm
&&
\
usermod
-aG
wheel slurm
&&
\
echo
"slurm:slurm"
|chpasswd
&&
\
echo
"Added slurm user"
# switch to slurm user so the next directories made are owned by slurm
USER
slurm
# making the directory for slurm simulations, the install files (R script) and installing toolkit
RUN
\
cd
/home/slurm
&&
\
mkdir
slurm_sim_ws
&&
\
cd
slurm_sim_ws
&&
\
mkdir
sim
&&
\
cd
/home/slurm/slurm_sim_ws
&&
\
git clone https://github.com/nsimakov/slurm_sim_tools.git
# switch back to root user so can have all access needed
USER
root
# copies over the R script that installs all the packages for R into the installFiles in the root directory
COPY
./package_install.R /install_files
#Runs the R script to install the packages
RUN
\
Rscript /install_files/package_install.R
&&
\
echo
"Installed packages"
#This installs Rstudio Server
RUN
\
wget https://download2.rstudio.org/rstudio-server-rhel-1.1.453-x86_64.rpm
&&
\
yum
-y
install
rstudio-server-rhel-1.1.453-x86_64.rpm
&&
\
yum
-y
install
initscripts
# 8787 is the default port that rstudio server uses, so need to expose it to use it
EXPOSE
8787
# installs ssh and makes some keys for it
RUN
\
yum
-y
install
openssh openssh-server openssh-clients openssl-libs
&&
\
mkdir
/var/run/sshd
&&
\
ssh-keygen
-t
rsa
-f
/etc/ssh/ssh_host_rsa_key
-N
''
&&
\
ssh-keygen
-t
ecdsa
-f
/etc/ssh/ssh_host_ecdsa_key
-N
''
&&
\
ssh-keygen
-t
ed25519
-f
/etc/ssh/ssh_host_ed25519_key
-N
''
# need to expose port 22 to allow for ssh to work properly
EXPOSE
22
# configuring mysqld (get it up and running and add the slurm user)
RUN
\
chmod
g+rw /var/lib/mysql /var/log/mariadb /var/run/mariadb
&&
\
mysql_install_db
&&
\
chown
-R
mysql:mysql /var/lib/mysql
&&
\
cmd_start mysqld
&&
\
mysql
-e
"create user 'slurm'@'localhost' identified by 'slurm';"
&&
\
mysql
-e
"GRANT ALL PRIVILEGES ON *.* TO 'slurm'@'localhost' IDENTIFIED BY 'slurm';"
&&
\
cmd_stop mysqld
# expose for mysql use
EXPOSE
3306
# slurm user switch again
USER
slurm
# installing the slurm simulator from github - making appropriate directories as well
RUN
\
cd
/home/slurm/slurm_sim_ws
&&
\
git clone https://github.com/nsimakov/slurm_simulator.git
&&
\
cd
slurm_simulator
&&
\
cd
..
&&
\
mkdir
bld_opt
&&
\
cd
bld_opt
&&
\
../slurm_simulator/configure
--prefix
=
/home/slurm/slurm_sim_ws/slurm_opt
--enable-simulator
\
--enable-pam
--without-munge
--enable-front-end
--with-mysql-config
=
/usr/bin/
--disable-debug
\
CFLAGS
=
"-g -O3 -D NDEBUG=1"
&&
\
make
-j
install
# back to root for easier permissions stuff
USER
root
# copies over files used in startup
COPY
./startup_file.sh /install_files
COPY
./initial_test.sh /install_files
COPY
./micro_cluster_setup.py /install_files
COPY
./micro_ws_config.sh /install_files
COPY
./populate_slurmdb.sh /install_files
COPY
./generate_job_trace.sh /install_files
COPY
./run_sim.sh /install_files
COPY
./check_results.R /install_files
# give rwx permissions of install_files directory to everyone so can run things
RUN
\
chmod
-R
a+rwx /install_files
# need to expose this for the slurmdbd to work maybe?
#EXPOSE 29001
# sets cmd_start as entrypoint, then runs the startup file and the initial test file
ENTRYPOINT
["/sbin/cmd_start"]
CMD
["/install_files/startup_file.sh","/install_files/initial_test.sh"]
docker_files/slurm_sim/check_results.R
0 → 100644
View file @
8ca8fa12
#!/usr/bin/env Rscript
# This script gets the results from the simulation and runs some tests on them
# It tests if the requested features were given to the jobs
# Features: cpu type, gpu, big mem
# How implemented - each feature corresponds to a different type of node
library
(
RSlurmSimTools
)
# needs these libraries to run the tests
library
(
dplyr
)
# this function allows easier comparison to see if the simulator assigned things correctly
# checks the trace value (requested feature) against the sacct value (Node List, indicating assigning of a feature)
# the check values are there for reuse of the function for more than one type of test
check_nodes
<-
function
(
df.joined
,
row_num
,
trace_col
,
trace_check
,
sacct_col
,
sacct_check
){
result
=
TRUE
# assumes correct
# df.joined is the joined data frame from trace and sacct data frames
trace_val
=
df.joined
[
row_num
,
trace_col
]
# trace value (feature)
sacct_val
=
df.joined
[
row_num
,
sacct_col
]
# sim value (if implemented feature)
# no feature requested if the value is NA, so check for that
if
(
!
is.na
(
trace_val
))
{
# check if the feature (trace_check) was requested
if
(
trace_val
==
trace_check
)
{
# checks if the node list has the node corresponding to that feature
if
(
!
(
grepl
(
sacct_check
,
sacct_val
)))
{
# if improper nodes have been assigned, its a false result (didn't assign properly)
result
=
FALSE
}
}
}
result
# result is returned
}
# reads in the csv file of the job traces (jobs submitted)
job_trace
<-
read.csv
(
file
=
"/home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/test_trace.csv"
)
# reads in log file of resulting data (what jobs were assigned, where, etc)
sacct_base
<-
read_sacct_out
(
"/home/slurm/slurm_sim_ws/sim/micro/baseline/results/jobcomp.log"
)
# creating a joined data frame by job id so that can go through jobs easier
joined
<-
left_join
(
job_trace
,
sacct_base
,
by
=
c
(
"sim_job_id"
=
"local_job_id"
)
)
done_well
=
TRUE
# assumes did correctly
# loops through each row in the joined data frame
for
(
row
in
1
:
nrow
(
joined
))
{
# checks if all features have been met (or weren't present)
done_well
=
check_nodes
(
joined
,
row
,
"sim_req_mem"
,
500000
,
"NodeList"
,
"b"
)
&&
# big mem
check_nodes
(
joined
,
row
,
"sim_features"
,
"CPU-M"
,
"NodeList"
,
"m"
)
&&
# M cpu
check_nodes
(
joined
,
row
,
"sim_features"
,
"CPU-N"
,
"NodeList"
,
"n"
)
&&
# N cpu
check_nodes
(
joined
,
row
,
"sim_gres"
,
"gpu:1"
,
"NodeList"
,
"g"
)
&&
# 1 gpu
check_nodes
(
joined
,
row
,
"sim_gres"
,
"gpu:2"
,
"NodeList"
,
"g"
)
# 2 gpu
# if at any point a feature doesn't match, breaks out of the loop
if
(
!
done_well
)
{
# prints out the job id for tracing back what failed
jobid
=
joined
[
row
,
"sim_job_id"
]
print
(
paste
(
"Id of incorrectly assigned job:"
,
jobid
))
break
}
}
# prints overall result
print
(
"Did the simulator do well?....."
)
print
(
done_well
)
docker_files/slurm_sim/cmd_start
0 → 100644
View file @
8ca8fa12
#!/bin/bash
echo
"Reach Entry Point"
echo
$$
>
/var/run/enrypoint.pid
set
-e
loop
=
0
run_bash
=
0
start_process
(){
name
=
$1
command
=
$2
pid_file
=
$3
if
[
!
-f
"
${
pid_file
}
"
]
;
then
echo
"Launching
${
name
}
"
${
command
}
elif
[
!
-f
"/proc/
`
cat
${
pid_file
}
`
"
]
;
then
echo
"Launching
${
name
}
"
${
command
}
else
echo
"
${
name
}
already running"
fi
}
# Start process and confirm it launches by looking for a
# confirm_sentence in log_file.
# Format:
# start_process_w_confirm name command pid_file log_file confirm_sentence
start_process_w_confirm
(){
name
=
$1
command
=
$2
pid_file
=
$3
log_file
=
$4
confirm_sentence
=
$5
timeout_time
=
2
if
[
-f
"
${
pid_file
}
"
]
;
then
if
[
-d
"/proc/
`
cat
${
pid_file
}
`
"
]
;
then
echo
"
${
name
}
already running"
return
1
fi
fi
if
[
-f
"
${
log_file
}
"
]
;
then
cat
"
${
log_file
}
"
>>
"
${
log_file
}
.old"
rm
"
${
log_file
}
"
fi
echo
"Launching
${
name
}
"
${
command
}
for
((
i
=
0
;
i <
${
timeout_time
}
;
i++
))
{
if
grep
-q
${
confirm_sentence
}
${
log_file
}
;
then
echo
"
${
name
}
is up"
return
0
fi
sleep
1
}
echo
"Something wrong with
${
name
}
, can not find key-phrase in log"
return
1
}
for
var
in
"
$@
"
do
case
"
$var
"
in
mysqld
)
echo
"Launching mysqld"
trap
"/sbin/shutdown.sh mysqld; exit"
SIGHUP SIGINT SIGTERM
mysqld_safe &
mysqladmin
--silent
--wait
=
30 ping
;;
munged
)
start_process munged munged /run/munge/munged.pid
;;
sshd
)
start_process sshd /usr/sbin/sshd /run/sshd.pid
;;
slurmdbd
)
start_process_w_confirm slurmdbd /usr/sbin/slurmdbd /run/slurmdbd.pid
\
/var/log/slurm/slurmdbd.log started
;;
slurmctld
)
start_process_w_confirm slurmctld /usr/sbin/slurmctld /run/slurmctld.pid
\
/var/log/slurm/slurmctld.log started
;;
slurmd
)
start_process_w_confirm slurmd /usr/sbin/slurmd /run/slurmd.pid
\
/var/log/slurm/slurmd.log started
;;
self_contained_slurm_wlm
)
cmd_start munged sshd mysqld slurmdbd slurmctld slurmd
;;
bash
)
echo
"Launching bash"
run_bash
=
1
;;
-loop
)
loop
=
1
;;
-set-no-exit-on-fail
)
set
+e
;;
-set-exit-on-fail
)
set
-e
;;
*
)
echo
"Executing
${
var
}
"
${
var
}
;;
esac
done
if
[
$run_bash
-eq
1
]
;
then
/bin/bash
elif
[
$loop
-eq
1
]
;
then
echo
"All requested daemon launched"
while
true
;
do
sleep
60
done
fi
docker_files/slurm_sim/cmd_stop
0 → 100644
View file @
8ca8fa12
#!/bin/bash
kill_process
(){
kill
-SIGTERM
$1
while
[
-f
/proc/
$1
]
;
do
sleep
1
;
done
}
stop_process
(){
name
=
$1
pid_file
=
$2
if
[
-f
"
${
pid_file
}
"
]
;
then
echo
"Stopping
${
name
}
"
kill_process
`
cat
${
pid_file
}
`
else
echo
"
${
name
}
is not running"
fi
}
for
var
in
"
$@
"
do
case
"
$var
"
in
mysqld
)
echo
"Stopping mysqld"
mysqladmin shutdown
;;
munged
)
stop_process munged /run/munge/munged.pid
;;
sshd
)
stop_process sshd /run/sshd.pid
;;
slurmdbd
)
stop_process slurmdbd /run/slurmdbd.pid
;;
slurmctld
)
stop_process slurmctld /run/slurmctld.pid
;;
slurmd
)
stop_process slurmd /run/slurmd.pid
;;
self_contained_slurm_wlm
)
cmd_stop slurmd slurmctld slurmdbd mysqld sshd munged
;;
*
)
echo
"unknown command
${
var
}
"
;;
esac
done
docker_files/slurm_sim/generate_job_trace.sh
0 → 100644
View file @
8ca8fa12
#!/bin/bash
# This script sets up and runs the generation of the job trace files using Rscript
# goes to the directory where we want to edit things in (working directory)
cd
/home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/
# begins the R script that generates a bunch of test jobs for the simulator
Rscript 12_prep_jobs_for_testrun.R
docker_files/slurm_sim/initial_test.sh
0 → 100644
View file @
8ca8fa12
#!/bin/bash
# this script sets up the micro cluster simulation and runs it, checking if it works properly
echo
"Setting up Micro Cluster simulation...."
# creates and uses mysql database needed for the simulation
mysql
-e
"CREATE DATABASE slurm_micro2sim;"
mysql
-e
"USE slurm_micro2sim;"
# calls the setup file for the micro Cluster simulation (executes as slurm)
su slurm
-c
/install_files/micro_cluster_setup.py
echo
"Done with Micro Cluster Setup"
echo
"Starting simulation...."
# runs the simulation as the slurm user so the simulator doesn't get upset
su slurm
-c
/install_files/run_sim.sh
echo
"Simulation Finished."
echo
"Starting R check file....."
# this file runs some code that checks if features were given correctly
Rscript /install_files/check_results.R
cd
/home/slurm
# goes to the home directory of slurm
su slurm
# switches to slurm user at the end (starts bash)
docker_files/slurm_sim/micro_cluster_setup.py
0 → 100644
View file @
8ca8fa12
#!/usr/bin/env python3
#This python script sets up the micro Cluster simulation to run by calling other scripts
# using these to work with calling files to do things
import
os
import
subprocess
from
time
import
sleep
,
time
# need sleep to do the sleep(3) after dbd
# this function starts a process and then waits for it to finish before being done
def
start_finish_process
(
file_path
):
proc
=
subprocess
.
Popen
(
args
=
file_path
)
proc
.
wait
()
# wait for process to finish
print
(
"Finished process of: "
+
file_path
)
return
proc
# function to start up the slurmdbd
def
startup_slurmdbd
(
dbd_loc
,
conf_loc
):
proc
=
subprocess
.
Popen
(
args
=
[
dbd_loc
,
"-Dvvv"
],
env
=
{
"SLURM_CONF"
:
conf_loc
}
)
# runs the dbd in environment with the SLURM_CONF variable set
sleep
(
3
)
# sleeps to allow for spin up time
print
(
"Started up the Slurmdbd"
)
return
proc
# function that just prints out what processes are going on (helpful for seeing whats going on)
def
check_processes
():
checkPs
=
subprocess
.
Popen
(
args
=
"ps -A"
,
shell
=
True
)
checkPs
.
wait
()
return
# goes through the process list and kills all the processes
def
kill_processes
(
proc_list
):
for
p
in
proc_list
:
if
p
!=
None
:
p
.
kill
()
p
=
None
# main "function"
if
__name__
==
"__main__"
:
process_list
=
[]
# starts a list of processes (each process is added to it)
#microcluster workspace and slurm configuration setup
setup_ws_config_proc
=
start_finish_process
(
"/install_files/micro_ws_config.sh"
)
process_list
.
append
(
setup_ws_config_proc
)
# the two parts of starting slurm dbd in "foreground" mode
slurmdbd_loc
=
"/home/slurm/slurm_sim_ws/slurm_opt/sbin/slurmdbd"
slurm_conf_loc
=
"/home/slurm/slurm_sim_ws/sim/micro/baseline/etc/slurm.conf"
# process to start up the slurmdbd
slurmdbd_proc
=
startup_slurmdbd
(
slurmdbd_loc
,
slurm_conf_loc
)
process_list
.
append
(
slurmdbd_proc
)
# prints processes going on
check_processes
()
# process to populate the slurmdb
pop_slurmdb_proc
=
start_finish_process
(
"/install_files/populate_slurmdb.sh"
)
process_list
.
append
(
pop_slurmdb_proc
)
check_processes
()
# kills all the processes in the list (don't need them)
kill_processes
(
process_list
)
# calls file that generates all the test jobs for the test simulation
gen_jobs_proc
=
start_finish_process
(
"/install_files/generate_job_trace.sh"
)
# Done setting up the micro cluster sim - ready to run it
docker_files/slurm_sim/micro_ws_config.sh
0 → 100644
View file @
8ca8fa12
#!/bin/bash
# This script sets up the workspace and slurm configuration for the micro cluster simulation
echo
"Starting micro cluster sim ws and slurm configuration...."
# initiating workspace for micro-cluster simulation
cd
/home/slurm/slurm_sim_ws
mkdir
-p
/home/slurm/slurm_sim_ws/sim/micro
# creating slurm configuration properly
cd
/home/slurm/slurm_sim_ws
/home/slurm/slurm_sim_ws/slurm_sim_tools/src/cp_slurm_conf_dir.py
-o
-s
/home/slurm/slurm_sim_ws/slurm_opt /home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/etc /home/slurm/slurm_sim_ws/sim/micro/baseline
echo
"Finished with workspace setup and configuration"
docker_files/slurm_sim/package_install.R
0 → 100644
View file @
8ca8fa12
#!/usr/bin/env Rscript
# this script installs the needed packages for R to use for the slurm simulator
# installs packages for R from tutorial things
for
(
pkg
in
c
(
"ggplot2"
,
"gridExtra"
,
"cowplot"
,
"lubridate"
,
"rPython"
,
"stringer"
,
"dplyr"
,
"rstudioapi"
))
{
install.packages
(
pkg
,
contriburl
=
contrib.url
(
"https://cloud.r-project.org/"
,
"source"
))
print
(
paste
(
"installed package"
,
pkg
))
}
# slurm sim tools installation
install.packages
(
"/home/slurm/slurm_sim_ws/slurm_sim_tools/src/RSlurmSimTools"
,
repos
=
NULL
,
type
=
"source"
)
# installs pacakges for RMD
for
(
pkg
in
c
(
"evaluate"
,
"highr"
,
"markdown"
,
"yaml"
,
"htmltools"
,
"caTools"
,
"bitops"
,
"knitr"
,
"jsonlite"
,
"base64enc"
,
"rprojroot"
,
"rmarkdown"
))
{
install.packages
(
pkg
,
contriburl
=
contrib.url
(
"https://cloud.r-project.org/"
,
"source"
))
print
(
paste
(
"installed package"
,
pkg
))
}
# had to install data.table package
install.packages
(
"data.table"
,
contriburl
=
contrib.url
(
"https://cloud.r-project.org/"
,
"source"
),
dependencies
=
TRUE
)
docker_files/slurm_sim/populate_slurmdb.sh
0 → 100644
View file @
8ca8fa12
#!/bin/bash