In data pipeline system and configuration management systems, it’s very common that you need execute a bunch of jobs which has dependencies with each other.
Write a program pipeline_runner to execute a list of shell scripts. The definition of those scripts and their dependencies are described in a JSON file. The program only takes in one argument which is the file path of JSON file that defines the jobs.
#---------------------------- user check ---------------------start if [ "`whoami`" != "$runuser" ]; then echo"Please re-run ${this_file} as $runuser." exit 1 fi #---------------------------- user check ---------------------end
#---------------------------- function ---------------------start
pause() { read -n1 -p "Press any key to continue..." }
log_combined_check_first() { if [ -f "$log_combined_output" ]; then echo"${log_combined_output} has been generated, the programe will exit" exit 0 fi }
log0_compressed_check() { if [ ! -f "$log0_compressed_output" ]; then eval${log0_compressed_commands} fi }
log0_check() { if [ ! -f "$log0_output" ]; then eval${log0_commands} fi }
log1_compressed_check() { if [ ! -f "$log1_compressed_output" ]; then eval${log1_compressed_commands} fi }
log1_check() { if [ ! -f "$log1_output" ]; then eval${log1_commands} fi }
log_combined_check() { if [ ! -f "$log_combined_output" ]; then eval${log_combined_commands} echo"${log_combined_output} has been generated, the programe will exit" fi }
#---------------------------- function ---------------------end
#---------------------------- main ---------------------start
echo" Please read first: [0]Check jobs.json and jq by yourself first [1]A job will only be executed if all its input files exist. [2]A job can have multiple input files (or none) but only produce one output file. [3]Users could run the program multiple times, but if a job's output file already exists, the program would skip the job. " pause
#check if file exist and do the job
log_combined_check_first
log0_compressed_check log0_check
log1_compressed_check log1_check
log_combined_check
#---------------------------- main ---------------------end