Commit 8ca8fa12 authored by hoffmaps's avatar hoffmaps
Browse files

added files for Docker setup

parent 1bee8d8f
FROM centos:7
LABEL desc="Slurm simulator made ready"
#Adding in the code from github to be able to start/stop mysql (and sshd?)
COPY cmd_start /sbin/
COPY cmd_stop /sbin/
# giving permissions to use the cmd from above
RUN \
chmod a+rwx /sbin/cmd_start && \
chmod a+rwx /sbin/cmd_stop
# installing mysql (mariadb)
RUN \
yum -y update && \
yum -y install mariadb-server && \
yum -y install mariadb-devel && \
echo "Done installing Mariadb"
#Python (need to install gcc-c++ to get the gcc command to work down the road with pandas
RUN \
yum -y install gcc-c++ && \
yum -y install install epel-release && \
yum -y install python34 python34-libs python34-devel python34-numpy python34-scipy python34-pip && \
pip3 install pymysql && \
pip3 install pandas && \
echo "Python all installed"
#Installing R
RUN \
yum -y install R R-Rcpp R-Rcpp-devel && \
yum -y install python-devel && \
yum -y install texlive-* && \
echo "R all installed"
# Installing some additional things to do things down the line
# sudo - to allow slurm to give root commands if needed
# git - to get stuff from github
# wget - to be able to get rstudio server on the machine
# Adding the slurm user to the system - need to install sudo for sudo commands..?
RUN \
yum -y install sudo && \
yum -y install git && \
yum -y install wget
# setting up directories and adding slurm user
RUN \
mkdir /install_files && \
useradd -d /home/slurm -ms /bin/bash slurm && \
usermod -aG wheel slurm && \
echo "slurm:slurm"|chpasswd && \
echo "Added slurm user"
# switch to slurm user so the next directories made are owned by slurm
USER slurm
# making the directory for slurm simulations, the install files (R script) and installing toolkit
RUN \
cd /home/slurm && \
mkdir slurm_sim_ws && \
cd slurm_sim_ws && \
mkdir sim && \
cd /home/slurm/slurm_sim_ws && \
git clone https://github.com/nsimakov/slurm_sim_tools.git
# switch back to root user so can have all access needed
USER root
# copies over the R script that installs all the packages for R into the installFiles in the root directory
COPY ./package_install.R /install_files
#Runs the R script to install the packages
RUN \
Rscript /install_files/package_install.R && \
echo "Installed packages"
#This installs Rstudio Server
RUN \
wget https://download2.rstudio.org/rstudio-server-rhel-1.1.453-x86_64.rpm && \
yum -y install rstudio-server-rhel-1.1.453-x86_64.rpm && \
yum -y install initscripts
# 8787 is the default port that rstudio server uses, so need to expose it to use it
EXPOSE 8787
# installs ssh and makes some keys for it
RUN \
yum -y install openssh openssh-server openssh-clients openssl-libs && \
mkdir /var/run/sshd && \
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' && \
ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N '' && \
ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N ''
# need to expose port 22 to allow for ssh to work properly
EXPOSE 22
# configuring mysqld (get it up and running and add the slurm user)
RUN \
chmod g+rw /var/lib/mysql /var/log/mariadb /var/run/mariadb && \
mysql_install_db && \
chown -R mysql:mysql /var/lib/mysql && \
cmd_start mysqld && \
mysql -e "create user 'slurm'@'localhost' identified by 'slurm';" && \
mysql -e "GRANT ALL PRIVILEGES ON *.* TO 'slurm'@'localhost' IDENTIFIED BY 'slurm';" && \
cmd_stop mysqld
# expose for mysql use
EXPOSE 3306
# slurm user switch again
USER slurm
# installing the slurm simulator from github - making appropriate directories as well
RUN \
cd /home/slurm/slurm_sim_ws && \
git clone https://github.com/nsimakov/slurm_simulator.git && \
cd slurm_simulator && \
cd .. && \
mkdir bld_opt && \
cd bld_opt && \
../slurm_simulator/configure --prefix=/home/slurm/slurm_sim_ws/slurm_opt --enable-simulator \
--enable-pam --without-munge --enable-front-end --with-mysql-config=/usr/bin/ --disable-debug \
CFLAGS="-g -O3 -D NDEBUG=1" && \
make -j install
# back to root for easier permissions stuff
USER root
# copies over files used in startup
COPY ./startup_file.sh /install_files
COPY ./initial_test.sh /install_files
COPY ./micro_cluster_setup.py /install_files
COPY ./micro_ws_config.sh /install_files
COPY ./populate_slurmdb.sh /install_files
COPY ./generate_job_trace.sh /install_files
COPY ./run_sim.sh /install_files
COPY ./check_results.R /install_files
# give rwx permissions of install_files directory to everyone so can run things
RUN \
chmod -R a+rwx /install_files
# need to expose this for the slurmdbd to work maybe?
#EXPOSE 29001
# sets cmd_start as entrypoint, then runs the startup file and the initial test file
ENTRYPOINT ["/sbin/cmd_start"]
CMD ["/install_files/startup_file.sh","/install_files/initial_test.sh"]
#!/usr/bin/env Rscript
# This script gets the results from the simulation and runs some tests on them
# It tests if the requested features were given to the jobs
# Features: cpu type, gpu, big mem
# How implemented - each feature corresponds to a different type of node
library(RSlurmSimTools) # needs these libraries to run the tests
library(dplyr)
# this function allows easier comparison to see if the simulator assigned things correctly
# checks the trace value (requested feature) against the sacct value (Node List, indicating assigning of a feature)
# the check values are there for reuse of the function for more than one type of test
check_nodes <- function(df.joined, row_num, trace_col, trace_check, sacct_col, sacct_check){
result = TRUE # assumes correct
# df.joined is the joined data frame from trace and sacct data frames
trace_val = df.joined[row_num, trace_col] # trace value (feature)
sacct_val = df.joined[row_num, sacct_col] # sim value (if implemented feature)
# no feature requested if the value is NA, so check for that
if(!is.na(trace_val))
{
# check if the feature (trace_check) was requested
if(trace_val == trace_check)
{
# checks if the node list has the node corresponding to that feature
if(!(grepl(sacct_check, sacct_val)))
{
# if improper nodes have been assigned, its a false result (didn't assign properly)
result = FALSE
}
}
}
result # result is returned
}
# reads in the csv file of the job traces (jobs submitted)
job_trace <- read.csv(file="/home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/test_trace.csv")
# reads in log file of resulting data (what jobs were assigned, where, etc)
sacct_base <- read_sacct_out("/home/slurm/slurm_sim_ws/sim/micro/baseline/results/jobcomp.log")
# creating a joined data frame by job id so that can go through jobs easier
joined <- left_join(job_trace, sacct_base, by = c("sim_job_id" = "local_job_id") )
done_well = TRUE # assumes did correctly
# loops through each row in the joined data frame
for(row in 1:nrow(joined))
{
# checks if all features have been met (or weren't present)
done_well = check_nodes(joined, row, "sim_req_mem", 500000, "NodeList", "b") && # big mem
check_nodes(joined, row, "sim_features", "CPU-M", "NodeList", "m") && # M cpu
check_nodes(joined, row, "sim_features", "CPU-N", "NodeList", "n") && # N cpu
check_nodes(joined, row, "sim_gres", "gpu:1", "NodeList", "g") && # 1 gpu
check_nodes(joined, row, "sim_gres", "gpu:2", "NodeList", "g") # 2 gpu
# if at any point a feature doesn't match, breaks out of the loop
if(!done_well)
{
# prints out the job id for tracing back what failed
jobid = joined[row, "sim_job_id"]
print(paste("Id of incorrectly assigned job:", jobid))
break
}
}
# prints overall result
print("Did the simulator do well?.....")
print(done_well)
#!/bin/bash
echo "Reach Entry Point"
echo $$ > /var/run/enrypoint.pid
set -e
loop=0
run_bash=0
start_process(){
name=$1
command=$2
pid_file=$3
if [ ! -f "${pid_file}" ]; then
echo "Launching ${name}"
${command}
elif [ ! -f "/proc/`cat ${pid_file}`" ]; then
echo "Launching ${name}"
${command}
else
echo "${name} already running"
fi
}
# Start process and confirm it launches by looking for a
# confirm_sentence in log_file.
# Format:
# start_process_w_confirm name command pid_file log_file confirm_sentence
start_process_w_confirm(){
name=$1
command=$2
pid_file=$3
log_file=$4
confirm_sentence=$5
timeout_time=2
if [ -f "${pid_file}" ]; then
if [ -d "/proc/`cat ${pid_file}`" ]; then
echo "${name} already running"
return 1
fi
fi
if [ -f "${log_file}" ]; then
cat "${log_file}" >> "${log_file}.old"
rm "${log_file}"
fi
echo "Launching ${name}"
${command}
for ((i=0; i < ${timeout_time}; i++))
{
if grep -q ${confirm_sentence} ${log_file} ; then
echo "${name} is up"
return 0
fi
sleep 1
}
echo "Something wrong with ${name}, can not find key-phrase in log"
return 1
}
for var in "$@"
do
case "$var" in
mysqld)
echo "Launching mysqld"
trap "/sbin/shutdown.sh mysqld; exit" SIGHUP SIGINT SIGTERM
mysqld_safe &
mysqladmin --silent --wait=30 ping
;;
munged)
start_process munged munged /run/munge/munged.pid
;;
sshd)
start_process sshd /usr/sbin/sshd /run/sshd.pid
;;
slurmdbd)
start_process_w_confirm slurmdbd /usr/sbin/slurmdbd /run/slurmdbd.pid \
/var/log/slurm/slurmdbd.log started
;;
slurmctld)
start_process_w_confirm slurmctld /usr/sbin/slurmctld /run/slurmctld.pid \
/var/log/slurm/slurmctld.log started
;;
slurmd)
start_process_w_confirm slurmd /usr/sbin/slurmd /run/slurmd.pid \
/var/log/slurm/slurmd.log started
;;
self_contained_slurm_wlm)
cmd_start munged sshd mysqld slurmdbd slurmctld slurmd
;;
bash)
echo "Launching bash"
run_bash=1
;;
-loop)
loop=1
;;
-set-no-exit-on-fail)
set +e
;;
-set-exit-on-fail)
set -e
;;
*)
echo "Executing ${var}"
${var}
;;
esac
done
if [ $run_bash -eq 1 ]; then
/bin/bash
elif [ $loop -eq 1 ]; then
echo "All requested daemon launched"
while true; do
sleep 60
done
fi
#!/bin/bash
kill_process(){
kill -SIGTERM $1
while [ -f /proc/$1 ];do sleep 1;done
}
stop_process(){
name=$1
pid_file=$2
if [ -f "${pid_file}" ]; then
echo "Stopping ${name}"
kill_process `cat ${pid_file}`
else
echo "${name} is not running"
fi
}
for var in "$@"
do
case "$var" in
mysqld)
echo "Stopping mysqld"
mysqladmin shutdown
;;
munged)
stop_process munged /run/munge/munged.pid
;;
sshd)
stop_process sshd /run/sshd.pid
;;
slurmdbd)
stop_process slurmdbd /run/slurmdbd.pid
;;
slurmctld)
stop_process slurmctld /run/slurmctld.pid
;;
slurmd)
stop_process slurmd /run/slurmd.pid
;;
self_contained_slurm_wlm)
cmd_stop slurmd slurmctld slurmdbd mysqld sshd munged
;;
*)
echo "unknown command ${var}"
;;
esac
done
#!/bin/bash
# This script sets up and runs the generation of the job trace files using Rscript
# goes to the directory where we want to edit things in (working directory)
cd /home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/
# begins the R script that generates a bunch of test jobs for the simulator
Rscript 12_prep_jobs_for_testrun.R
#!/bin/bash
# this script sets up the micro cluster simulation and runs it, checking if it works properly
echo "Setting up Micro Cluster simulation...."
# creates and uses mysql database needed for the simulation
mysql -e "CREATE DATABASE slurm_micro2sim;"
mysql -e "USE slurm_micro2sim;"
# calls the setup file for the micro Cluster simulation (executes as slurm)
su slurm -c /install_files/micro_cluster_setup.py
echo "Done with Micro Cluster Setup"
echo "Starting simulation...."
# runs the simulation as the slurm user so the simulator doesn't get upset
su slurm -c /install_files/run_sim.sh
echo "Simulation Finished."
echo "Starting R check file....."
# this file runs some code that checks if features were given correctly
Rscript /install_files/check_results.R
cd /home/slurm # goes to the home directory of slurm
su slurm # switches to slurm user at the end (starts bash)
#!/usr/bin/env python3
#This python script sets up the micro Cluster simulation to run by calling other scripts
# using these to work with calling files to do things
import os
import subprocess
from time import sleep,time # need sleep to do the sleep(3) after dbd
# this function starts a process and then waits for it to finish before being done
def start_finish_process(file_path):
proc = subprocess.Popen(args=file_path)
proc.wait() # wait for process to finish
print("Finished process of: " + file_path)
return proc
# function to start up the slurmdbd
def startup_slurmdbd(dbd_loc, conf_loc):
proc = subprocess.Popen(args=[dbd_loc, "-Dvvv"], env={"SLURM_CONF": conf_loc} ) # runs the dbd in environment with the SLURM_CONF variable set
sleep(3) # sleeps to allow for spin up time
print("Started up the Slurmdbd")
return proc
# function that just prints out what processes are going on (helpful for seeing whats going on)
def check_processes():
checkPs = subprocess.Popen(args="ps -A", shell=True)
checkPs.wait()
return
# goes through the process list and kills all the processes
def kill_processes(proc_list):
for p in proc_list:
if p!=None:
p.kill()
p=None
# main "function"
if __name__ == "__main__":
process_list = [] # starts a list of processes (each process is added to it)
#microcluster workspace and slurm configuration setup
setup_ws_config_proc = start_finish_process("/install_files/micro_ws_config.sh")
process_list.append(setup_ws_config_proc)
# the two parts of starting slurm dbd in "foreground" mode
slurmdbd_loc = "/home/slurm/slurm_sim_ws/slurm_opt/sbin/slurmdbd"
slurm_conf_loc = "/home/slurm/slurm_sim_ws/sim/micro/baseline/etc/slurm.conf"
# process to start up the slurmdbd
slurmdbd_proc = startup_slurmdbd(slurmdbd_loc, slurm_conf_loc)
process_list.append(slurmdbd_proc)
# prints processes going on
check_processes()
# process to populate the slurmdb
pop_slurmdb_proc = start_finish_process("/install_files/populate_slurmdb.sh")
process_list.append(pop_slurmdb_proc)
check_processes()
# kills all the processes in the list (don't need them)
kill_processes(process_list)
# calls file that generates all the test jobs for the test simulation
gen_jobs_proc = start_finish_process("/install_files/generate_job_trace.sh")
# Done setting up the micro cluster sim - ready to run it
#!/bin/bash
# This script sets up the workspace and slurm configuration for the micro cluster simulation
echo "Starting micro cluster sim ws and slurm configuration...."
# initiating workspace for micro-cluster simulation
cd /home/slurm/slurm_sim_ws
mkdir -p /home/slurm/slurm_sim_ws/sim/micro
# creating slurm configuration properly
cd /home/slurm/slurm_sim_ws
/home/slurm/slurm_sim_ws/slurm_sim_tools/src/cp_slurm_conf_dir.py -o -s /home/slurm/slurm_sim_ws/slurm_opt /home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/etc /home/slurm/slurm_sim_ws/sim/micro/baseline
echo "Finished with workspace setup and configuration"
#!/usr/bin/env Rscript
# this script installs the needed packages for R to use for the slurm simulator
# installs packages for R from tutorial things
for(pkg in c("ggplot2","gridExtra","cowplot","lubridate","rPython","stringer","dplyr","rstudioapi")) {
install.packages(pkg, contriburl=contrib.url("https://cloud.r-project.org/","source"))
print(paste("installed package",pkg))
}
# slurm sim tools installation
install.packages("/home/slurm/slurm_sim_ws/slurm_sim_tools/src/RSlurmSimTools", repos = NULL, type="source")
# installs pacakges for RMD
for(pkg in c("evaluate","highr","markdown","yaml","htmltools","caTools","bitops","knitr","jsonlite","base64enc","rprojroot","rmarkdown")) {
install.packages(pkg, contriburl=contrib.url("https://cloud.r-project.org/","source"))
print(paste("installed package",pkg))
}
# had to install data.table package
install.packages("data.table", contriburl=contrib.url("https://cloud.r-project.org/","source"),dependencies=TRUE)
#!/bin/bash
# This script populates the slurmdb with "users" that submit jobs