From 2122bf9ea64c6b305f2d9b5c01f72d04ac243ac0 Mon Sep 17 00:00:00 2001 From: Georgi Tushev Date: Sun, 4 Nov 2018 22:37:57 +0100 Subject: [PATCH] add cellranger batch --- batchCellrangerCounter.pl | 88 +++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 18 deletions(-) diff --git a/batchCellrangerCounter.pl b/batchCellrangerCounter.pl index ab1d507..de56718 100644 --- a/batchCellrangerCounter.pl +++ b/batchCellrangerCounter.pl @@ -8,7 +8,8 @@ sub version($); sub createBatchJob($); -sub runCellranger($$$$); +sub printSBatchHeader($$$$$$); +sub printSBatchScript($$$$$); MAIN: { @@ -19,14 +20,26 @@ my $path_fastq; my $path_genome_index; my $path_output; - my $opts_cellranger; + my $opts; + my $sbatch_nodes = 1; + my $sbatch_partition = "cuttlefish"; + my $sbatch_time = "100:00:00"; + my $sbatch_ntasks = 1; + my $sbatch_cpus = 32; + my $sbatch_name = "braintest"; # set-up paramters Getopt::Long::GetOptions( "f|fastq=s" => \$path_fastq, "o|output-dir=s" => \$path_output, "g|genome=s" => \$path_genome_index, - "p|opts=s" => \$opts_cellranger, + "x|opts=s" => \$opts, + "n|nodes=i" => \$sbatch_nodes, + "p|partition=s" => \$sbatch_partition, + "t|time=s" => \$sbatch_time, + "T|ntasks=i" => \$sbatch_ntasks, + "c|cpus=i" => \$sbatch_cpus, + "l|name=s" => \$sbatch_name, "h|help" => \$help, "v|version" => \$version ) or usage("Error::invalid command line options"); @@ -41,18 +54,32 @@ # create batch job my $batch_job = createBatchJob($path_fastq); - # run cellranger count - runCellranger($batch_job, $path_output, $path_genome_index, $opts_cellranger); + # print SBatch header + printSBatchHeader($sbatch_nodes, + $sbatch_partition, + $sbatch_time, + $sbatch_ntasks, + $sbatch_cpus, + $sbatch_name); + + # change working directory + print "cd $path_output\n\n"; + + # create sbatch script + printSBatchScript($batch_job, $path_output, $path_genome_index, $sbatch_cpus, $opts); } -sub runCellranger($$$$) +sub printSBatchScript($$$$$) { my $batch_job = $_[0]; my $path_output = $_[1]; my $path_genome_index = $_[2]; - my $opts_cellranger = $_[3]; + my $sbatch_cpus = $_[3]; + my $opts = $_[4]; + + $path_output =~ s/\/$//g; foreach my $id (sort keys %{$batch_job}) { @@ -61,22 +88,48 @@ ($$$$) " --fastqs=" . $batch_job->{$id} . " --transcriptome=" . $path_genome_index . " --sample=" . $id; - $command .= " " . $opts_cellranger if (defined($opts_cellranger)); + $command .= " " . $opts if (defined($opts)); + # execute cellranger command + print "echo $id\n"; print $command,"\n"; - system($command); - + # move result to output folder - if (-d $id) - { - $path_output =~ s/\/$//g; - $command = "mv " . $id . " " . $path_output . '/' . $id; - print $command,"\n"; - system($command); - } + $command = "if [ -d $id]; then\n" . + "\tmv $id $path_output/$id\n" . + "fi\n"; + print $command,"\n"; } } + +sub printSBatchHeader($$$$$$) +{ + my $nodes = $_[0]; + my $partition = $_[1]; + my $time = $_[2]; + my $ntasks = $_[3]; + my $cpus = $_[4]; + my $name = $_[5]; + + # print header + print "#!/bin/bash\n"; + print "\n"; + print "#SBATCH --nodes=$nodes\n"; + print "#SBATCH --partition=$partition\n"; + print "#SBATCH --time=$time\n"; + print "#SBATCH --ntasks=$ntasks\n"; + print "#SBATCH --cpus-per-task=$cpus\n"; + print "#SBATCH --job-name=$name\n"; + print "#SBATCH --error=error_\%j.out\n"; + print "#SBATCH --output=output_\%j.out\n"; + print "\n"; + print "echo \$SLURM_SUBMIT_DIR\n"; + print "echo \"Running on \`hostname\`\"\n"; + print "\n"; +} + + sub createBatchJob($) { my $path_fastq = $_[0]; @@ -87,7 +140,6 @@ ($) opendir(my $dh, $path_fastq); while (my $path_tag = readdir $dh) { - my $path_full = $path_fastq . '/' . $path_tag; next unless -d $path_full; next if ($path_tag =~ m/^\./);