use warnings;
use strict;
use Getopt::Long();
sub usage($);
sub version($);
sub createBatchJob($);
sub printSBatchHeader($$$$$$);
sub printSBatchScript($$$$$);
# define inputs
my $version_tag = "version 1.0, October 2018";
my $version;
my $help;
my $path_fastq;
my $path_genome_index;
my $path_output;
my $opts;
my $sbatch_nodes = 1;
my $sbatch_partition = "cuttlefish";
my $sbatch_time = "100:00:00";
my $sbatch_ntasks = 1;
my $sbatch_cpus = 32;
my $sbatch_name = "braintest";
# set-up paramters
"f|fastq=s" => \$path_fastq,
"o|output-dir=s" => \$path_output,
"g|genome=s" => \$path_genome_index,
"x|opts=s" => \$opts,
"n|nodes=i" => \$sbatch_nodes,
"p|partition=s" => \$sbatch_partition,
"t|time=s" => \$sbatch_time,
"T|ntasks=i" => \$sbatch_ntasks,
"c|cpus=i" => \$sbatch_cpus,
"l|name=s" => \$sbatch_name,
"h|help" => \$help,
"v|version" => \$version
) or usage("Error::invalid command line options");
# parse inputs
version($version_tag) if($version);
usage($version_tag) if($help);
usage("Error::path to FastQ files is required") unless defined($path_fastq);
usage("Error::path to output location is required") unless defined($path_output);
usage("Error::path to genome index is required") unless defined($path_genome_index);
# create batch job
my $batch_job = createBatchJob($path_fastq);
# print SBatch header
# change working directory
print "cd $path_output\n\n";
# create sbatch script
printSBatchScript($batch_job, $path_output, $path_genome_index, $sbatch_cpus, $opts);
sub printSBatchScript($$$$$)
my $batch_job = $_[0];
my $path_output = $_[1];
my $path_genome_index = $_[2];
my $sbatch_cpus = $_[3];
my $opts = $_[4];
$path_output =~ s/\/$//g;
foreach my $id (sort keys %{$batch_job})
my $command = "cellranger count" .
" --id=" . $id .
" --fastqs=" . $batch_job->{$id} .
" --transcriptome=" . $path_genome_index .
" --sample=" . $id;
$command .= " " . $opts if (defined($opts));
# execute cellranger command
print "echo $id\n";
print $command,"\n";
# move result to output folder
$command = "if [ -d $id]; then\n" .
"\tmv $id $path_output/$id\n" .
print $command,"\n";
sub printSBatchHeader($$$$$$)
my $nodes = $_[0];
my $partition = $_[1];
my $time = $_[2];
my $ntasks = $_[3];
my $cpus = $_[4];
my $name = $_[5];
# print header
print "#!/bin/bash\n";
print "\n";
print "#SBATCH --nodes=$nodes\n";
print "#SBATCH --partition=$partition\n";
print "#SBATCH --time=$time\n";
print "#SBATCH --ntasks=$ntasks\n";
print "#SBATCH --cpus-per-task=$cpus\n";
print "#SBATCH --job-name=$name\n";
print "#SBATCH --error=error_\%j.out\n";
print "#SBATCH --output=output_\%j.out\n";
print "\n";
print "echo \$SLURM_SUBMIT_DIR\n";
print "echo \"Running on \`hostname\`\"\n";
print "\n";
sub createBatchJob($)
my $path_fastq = $_[0];
my %tags = ();
$path_fastq =~ s/\/$//g;
opendir(my $dh, $path_fastq);
while (my $path_tag = readdir $dh)
my $path_full = $path_fastq . '/' . $path_tag;
next unless -d $path_full;
next if ($path_tag =~ m/^\./);
$tags{$path_tag} = $path_full;
return \%tags;
sub usage($)
my $message = $_[0];
if (defined $message && length($message))
$message .= "\n" unless($message =~/\n$/);
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
"usage: $command\n" .
"-f|--fastq\n" .
"\t path to FastQ files (required)\n" .
"-o|--output-dir\n" .
"\t path to output directory (required)\n" .
"-g|--genome\n" .
"\t path to genome index (required)\n" .
"-p|--opts\n" .
"\t additional Cellranger Count parameters\n" .
"-h|--help\n" .
"\t print help message\n" .
"-v|--version\n" .
"\t print current version\n"
sub version($)
my $message = $_[0];
print STDERR (
"Scientific Computing Facility\n",
"Max-Planck Institute For Brain Research\n",
"bug reports to:\n",