Commit dee35f3e authored by doc's avatar doc
Browse files

Added regression testing materials

parent b2f983c0
# load RSlurmSimTools
library(RSlurmSimTools)
# change working directory to this script directory
top_dir <- NULL
top_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
print(top_dir)
setwd(top_dir)
#write job trace file using data.frame directly
trace <- data.frame(
sim_job_id = c(1001L,1002L,1003L),
sim_username = c("user1","user2","user3"),
sim_tasks = c(1L,2L,3L),
sim_cpus_per_task = c(1L,1L,1L),
sim_tasks_per_node = c(12L,12L,12L),
sim_submit_ts = c(),
sim_duration = c(60L,30L,40L),
sim_wclimit = c(300L,100L,200L),
sim_qosname = c("normal","normal","normal"),
sim_partition = c("normal","normal","normal"),
sim_account = c("account1","account1","account1"),
sim_dependency = c("","",""),
sim_req_mem = as.integer(c(NA,NA,NA)),
sim_req_mem_per_cpu = c(0L,0L,0L),
sim_features = c("","",""),
sim_gres = c("","",""),
sim_shared = c(0L,0L,0L),
sim_cancelled_ts = c(0L,0L,0L)
)
write_trace(file.path(top_dir,"test0.trace"),trace)
#dependency check
trace <- list(
sim_job(
job_id=1001,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
),sim_job(
job_id=1002,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
dependency="afterok:1001"
),sim_job(
job_id=1003,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
),sim_job(
job_id=1004,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
dependency="afterok:1002:1003"
),sim_job(
job_id=1005,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=24L,
tasks_per_node=12L,
dependency="afterok:1004"
)
)
#convert list of lists to data.frame
trace <- do.call(rbind, lapply(trace,data.frame))
write_trace(file.path(top_dir,"dependency_test.trace"),trace)
trace$sim_dependency<-""
write_trace(file.path(top_dir,"dependency_test_nodep.trace"),trace)
#features
trace <- list(
sim_job(
job_id=1001,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
features="IB&CPU-M"
),sim_job(
job_id=1002,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
features="IB&CPU-M"
),sim_job(
job_id=1003,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
features="IB&CPU-M"
),sim_job(
job_id=1004,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
features="CPU-M"
),sim_job(
job_id=1005,
submit="2016-10-01 00:11:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
)
)
#convert list of lists to data.frame
trace <- do.call(rbind, lapply(trace,data.frame))
write_trace(file.path(top_dir,"features_test.trace"),trace)
#gres
trace <- list(
sim_job(
job_id=1001,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
),sim_job(
job_id=1002,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
),sim_job(
job_id=1003,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
),sim_job(
job_id=1004,
submit="2016-10-01 00:01:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L,
gres = "gpu:2"
),sim_job(
job_id=1005,
submit="2016-10-01 00:11:00",
wclimit=300L,
duration=600L,
tasks=12L,
tasks_per_node=12L
)
)
#convert list of lists to data.frame
trace <- do.call(rbind, lapply(trace,data.frame))
write_trace(file.path(top_dir,"gres_test.trace"),trace)
# load RSlurmSimTools
library(RSlurmSimTools)
# change working directory to script location
#top_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
top_dir <- getwd() # gets current directory they are in
setwd(top_dir)
#library(ggplot2)
#library(scales)
#library(lubridate)
#library(stringr)
# This script generate job trace with large number of jobs
# First different types of jobs are specified, followed by random pick from
# this job types banck with random walltime limit and execution walltime
#Big Memory Jobs Template
job_types__bigmem <- list(
sim_job( #big_mem_job
tasks = 1L,
tasks_per_node = 1L,
req_mem = 500000,
req_mem_per_cpu = 0L,
freq = 1/3
),
sim_job(#big_mem_job
tasks = 6L,
tasks_per_node = 6L,
req_mem = 500000,
req_mem_per_cpu = 0L,
freq = 1/3
),
sim_job(#big_mem_job
tasks = 12L,
tasks_per_node = 12L,
req_mem = 500000,
req_mem_per_cpu = 0L,
freq = 1/3
)
)
#GPU Jobs Template
job_types__gpu <- list(
sim_job( #gpu
tasks = 1L,
tasks_per_node = 1L,
gres = "gpu:1",
freq = 1/3
),
sim_job( #gpu
tasks = 2L,
tasks_per_node = 2L,
gres = "gpu:2",
freq = 1/3
),
sim_job( #gpu
tasks = 12L,
tasks_per_node = 12L,
gres = "gpu:2",
freq = 1/3
)
)
# General compute jobs with variable node count
job_types__gen_comp <- list(
sim_job( #abitrary serial 1/2 for named resources 8/7 nodes/records * weights
tasks = 1L,
tasks_per_node = 1L,
freq = 0.2
),
sim_job( #abitrary 6 cores
tasks = 6L,
tasks_per_node = 6L,
freq = 0.2
),
sim_job( #abitrary single node
tasks = 12L,
tasks_per_node = 12L,
freq = 0.2
),
sim_job( #abitrary 2 node
tasks = 24L,
tasks_per_node = 12L,
freq = 0.1
),
sim_job( #abitrary 3 node
tasks = 36L,
tasks_per_node = 12L,
freq = 0.1
),
sim_job( #abitrary 4 node
tasks = 48L,
tasks_per_node = 12L,
freq = 0.1
),
sim_job( #abitrary 8 node
tasks = 96L,
tasks_per_node = 12L,
freq = 0.1
)
)
job_types__gen_comp <- lapply(job_types__gen_comp
, function(x){
x$freq <- 4*x$freq
x
})
# General compute jobs with variable node count and request for CPU-N
job_types__cpu_n <- list(
sim_job( #abitrary serial 1/2 for named resources 8/6 nodes/records * weights
tasks = 1L,
tasks_per_node = 1L,
features = "CPU-N",
freq = 0.3
),
sim_job( #abitrary 6 cores
tasks = 6L,
tasks_per_node = 6L,
features = "CPU-N",
freq = 0.2
),
sim_job( #abitrary single node
tasks = 12L,
tasks_per_node = 12L,
features = "CPU-N",
freq = 0.2
),
sim_job( #abitrary 2 node
tasks = 24L,
tasks_per_node = 12L,
features = "CPU-N",
freq = 0.1
),
sim_job( #abitrary 3 node
tasks = 36L,
tasks_per_node = 12L,
features = "CPU-N",
freq = 0.1
),
sim_job( #abitrary 4 node
tasks = 48L,
tasks_per_node = 12L,
features = "CPU-N",
freq = 0.1
)
)
job_types__cpu_n <- lapply(job_types__cpu_n, function(x){
x$freq <- 2*x$freq
x
})
# General compute jobs with variable node count and request for CPU-M
job_types__cpu_m <- list(
sim_job( #abitrary serial 1/2 for named resources 8/6 nodes/records * weights
tasks = 1L,
tasks_per_node = 1L,
features = "CPU-M",
freq = 0.3
),
sim_job( #abitrary 6 cores
tasks = 6L,
tasks_per_node = 6L,
features = "CPU-M",
freq = 0.2
),
sim_job( #abitrary single node
tasks = 12L,
tasks_per_node = 12L,
features = "CPU-M",
freq = 0.2
),
sim_job( #abitrary 2 node
tasks = 24L,
tasks_per_node = 12L,
features = "CPU-M",
freq = 0.1
),
sim_job( #abitrary 3 node
tasks = 36L,
tasks_per_node = 12L,
features = "CPU-M",
freq = 0.1
),
sim_job( #abitrary 4 node
tasks = 48L,
tasks_per_node = 12L,
features = "CPU-M",
freq = 0.1
)
)
job_types__cpu_m <- lapply(job_types__cpu_m
, function(x){
x$freq <- 2*x$freq
x
})
# Make job bank from abave specific job_types
job_types <- c(job_types__bigmem,job_types__gpu
,job_types__gen_comp
,job_types__cpu_n,job_types__cpu_m
)
# Normalize freq to prob for job distribution
prob <- sapply(job_types, function(x){x$freq})
prob <- prob / sum(prob)
# Set Number of Jobs
N <- 500
# Seed the seed
set.seed(20170318)
# Generate job trace
r<-sample(job_types, size = N, replace = TRUE, prob = prob)
# Convert to data.frame
trace <- do.call(rbind, lapply(r,data.frame))
# Set proper users and accounts
users <- list(
list("user1","account1"),
list("user2","account1"),
list("user3","account1"),
list("user4","account2"),
list("user5","account2")
)
ua <- sample(users, size = N, replace = TRUE)
trace$sim_username <- sapply(ua,function(x){x[[1]]})
trace$sim_account <- sapply(ua,function(x){x[[2]]})
# Set walltime limits (in minutes) and duration (in seconds)
true_min <- 5L
true_max <- 30L
wclimit <- as.integer(runif(N,min=true_min-2L,max=true_max+2L))
wclimit[wclimit<true_min] <- true_min
wclimit[wclimit>true_max] <- true_max
duration_factor <- runif(N,min=-0.2,max=1.2)
duration_factor[duration_factor<0.0] <- 0.0
duration_factor[duration_factor>1.0] <- 1.0
duration <- as.integer(round(duration_factor*wclimit*60.0))
trace$sim_wclimit <- wclimit
trace$sim_duration <- duration
sum(trace$sim_duration*trace$sim_tasks)/3600.0/120.0
# Set submit time
t0 <- as.POSIXct("2017-01-01 00:00:00")
submit <- as.integer(runif(N,min=0,max=7*3600))+t0
trace$sim_submit <- submit
trace$sim_submit_ts <- as.integer(submit)
# Sort by submit time
trace<-trace[order(trace$sim_submit_ts),]
# Generate job ids
trace$sim_job_id <- 1:N + 1000L
#write job trace for Slurm Simulator
write_trace(file.path(top_dir,"test.trace"),trace)
#write job trace as csv for reture reference
write.csv(trace,"test_trace.csv")
---
title: "Slurm Simulator: Installation"
output:
pdf_document:
latex_engine: xelatex
monofont: "DejaVu Sans Mono"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
This installation guide is tested on fresh installation of CenOS 7 (CentOS-7-x86_64-DVD-1611.iso KDE Plasma Workspaces with Development Tools)
Since there are many absolute paths in slurm.conf, it can be helpful to create a separate user for slurm named *slurm* and use it for Slurm Simulator.
The idea of simulator is to run simulator with various configurtation to compare them and choose one which is more suitable for particular situation. Becouse of this there are going to be multiple configurations with multiple outcomes. Therefore it is convinien to keep slurm binaries separately from the configuration and logs from the simulated run. The following directory structure is recommended used here (the respective directories will be created on appropriate steps during the tutorial)
```{bash, eval=FALSE}
/home/slurm - Slurm user home derectory
└── slurm_sim_ws - Slurm simulator work space
├── bld_opt - Slurm simulato building directory
├── sim - Directory where simulation will be performed
│ └── <system name> - Directory where simulation of particular system will be performed
│ └── <conf name> - Directory where simulation of particular configuration will be performed
│ ├── etc - Directory with configuration
│ ├── log - Directory with logs
│ └── var - Directory varius slurm output
├── slurm_opt - Slurm simulator binary installation directory
├── slurm_sim_tools - Slurm simulator toolkit
└── slurm_simulator - Slurm simulator source code
```
# Installing Dependencies
## Slurm Simulator Dependencies
### Install MySQL (MariaDB in this case)
Install mariadb server and devel packages:
```{bash, eval=FALSE}
sudo yum install mariadb-server
sudo yum install mariadb-devel
```
Enable and start mariadb server:
```{bash, eval=FALSE}
sudo systemctl enable mariadb
sudo systemctl start mariadb
```
Run mysql_secure_installation for more secure installation if needed.
If sql server is not accessible from the outside it is ok not to run it
```{bash, eval=FALSE}
sudo mysql_secure_installation
```
Add slurm user to sql, run in mysql:
```{sql}
create user 'slurm'@'localhost' identified by 'slurm'; grant all privileges on *.* to 'slurm'@'localhost' with grant option;
```
## Slurm Simulator Toolkit Dependencies
### Python
Install python3 with pymysql and pandas packages:
```{bash, eval=FALSE}
sudo yum -y install install epel-release
sudo yum -y install python34 python34-libs python34-devel python34-numpy python34-scipy python34-pip
sudo pip3 install pymysql
sudo pip3 install pandas
```
### R
Install R:
```{bash, eval=FALSE}
sudo yum -y install R R-Rcpp R-Rcpp-devel
sudo yum -y install python-devel
sudo yum install texlive-*
```
Install R-Studio:
```{bash, eval=FALSE}
wget https://download1.rstudio.org/rstudio-1.0.136-x86_64.rpm
sudo yum -y install rstudio-1.0.136-x86_64.rpm
```
In R-Studio or plain R install depending packages:
```{r, eval=FALSE}
install.packages("ggplot2")
install.packages("gridExtra")
install.packages("cowplot")
install.packages("lubridate")
install.packages("rPython")
install.packages("stringr")
install.packages("rstudioapi")
# install R Slurm Simulator Toolkit
install.packages("/home/slurm/slurm_sim_ws/slurm_sim_tools/src/RSlurmSimTools", repos = NULL, type="source")
```
# Prepering Slurm Simulator Workspace
Create work space for Slurm simulation activities:
```{bash, eval=FALSE}
cd
mkdir slurm_sim_ws
cd slurm_sim_ws
#create directory for simulations
mkdir sim
```
# Installing Slurm Simulator
Obtain Slurm Simulator source code with git:
```{bash, eval=FALSE}
git clone https://github.com/nsimakov/slurm_simulator.git
cd slurm_simulator
```
Ensure what slurm-17.02_Sim branch is used:
```{bash, eval=FALSE}
git branch
```
```
Output:
* slurm-17.02_Sim
```
If it is not the case checkout proper branch:
```{bash, eval=FALSE}
git fetch
git checkout slurm-17.02_Sim
```
Prepare builing directory
```{bash,eval=FALSE}
cd ..
mkdir bld_opt
cd bld_opt
```
Run configure:
```{bash,eval=FALSE}
../slurm_simulator/configure --prefix=/home/slurm/slurm_sim_ws/slurm_opt --enable-simulator \
--enable-pam --without-munge --enable-front-end --with-mysql-config=/usr/bin/ --disable-debug \
CFLAGS="-g -O3 -D NDEBUG=1"
```