Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long();
use POSIX qw(strftime);
sub usage($);
sub version($);
sub createBatchJob($$);
sub preparePath($$);
sub printSBatchHeader($$$$$$);
sub printSBatchScript($$$$$);
MAIN:
{
# define inputs
my $version_tag = "version 1.0, October 2018";
my $version;
my $help;
my $path_fastq;
my $path_genome_index;
my $file_genome_annotation;
my $path_output;
my $opts;
my $sbatch_nodes = 1;
my $sbatch_partition = "cuttlefish";
my $sbatch_time = "100:00:00";
my $sbatch_ntasks = 1;
my $sbatch_cpus = 16;
my $sbatch_name = "braintest";
# set-up paramters
Getopt::Long::GetOptions(
"f|fastq=s" => \$path_fastq,
"o|output-dir=s" => \$path_output,
"g|genome=s" => \$path_genome_index,
"a|annotation=s" => \$file_genome_annotation,
"x|opts=s" => \$opts,
"n|nodes=i" => \$sbatch_nodes,
"p|partition=s" => \$sbatch_partition,
"t|time=s" => \$sbatch_time,
"T|ntasks=i" => \$sbatch_ntasks,
"c|cpus=i" => \$sbatch_cpus,
"l|name=s" => \$sbatch_name,
"h|help" => \$help,
"v|version" => \$version
) or usage("Error::invalid command line options");
# parse inputs
version($version_tag) if($version);
usage($version_tag) if($help);
usage("Error::path to FastQ files is required") unless defined($path_fastq);
usage("Error::path to output location is required") unless defined($path_output);
usage("Error::path to genome index is required") unless defined($path_genome_index);
usage("Error::genome annotation file is required") unless defined($file_genome_annotation);
# create batch job
my $batch_job = createBatchJob($path_fastq, $path_output);
# print SBatch header
printSBatchHeader($sbatch_nodes,
$sbatch_partition,
$sbatch_time,
$sbatch_ntasks,
$sbatch_cpus,
$sbatch_name);
# change working directory
print "cd $path_output\n\n";
# create sbatch script
printSBatchScript($batch_job, $path_genome_index, $file_genome_annotation, $sbatch_cpus, $opts);
}
sub printSBatchScript($$$$$)
{
my $batch_job = $_[0];
my $path_genome_index = $_[1];
my $file_genome_annotation = $_[2];
my $cpus = $_[3];
my $opts = $_[4];
my $command = "STAR" .
" --runMode alignReads" .
" --genomeDir $path_genome_index" .
" --readFilesCommand zcat" .
" --outStd Log" .
" --outSAMtype BAM SortedByCoordinate" .
" --outSAMstrandField intronMotif" .
" --outFilterIntronMotifs RemoveNoncanonical" .
" --alignSoftClipAtReferenceEnds No" .
" --outFilterScoreMinOverLread 0.25" .
" --outFilterMatchNminOverLread 0.25";
foreach my $tag (sort keys %{$batch_job->{"file"}{"fastq"}})
{
my $file_fastq = $batch_job->{"file"}{"fastq"}{$tag};
my $file_bam = $batch_job->{"path"}{"bam"} . "/" . $batch_job->{"file"}{"bam"}{$tag};
my $file_log = $batch_job->{"path"}{"log"} . "/" . $batch_job->{"file"}{"log"}{$tag};
print "echo $tag\n";
print $command, " --readFilesIn ", $file_fastq, "\n";
print "mv Aligned.sortedByCoord.out.bam ", $file_bam,"\n";
print "mv Log.final.out ",$file_log,"\n";
print "samtools index ", $file_bam,"\n";
print "rm -f SJ.out.tab\n";
print "rm -f Log.*out\n";
print "\n";
}
# count command
my $file_count = $batch_job->{"path"}{"count"} . "/" . $batch_job->{"file"}{"count"};
$command = "featureCounts -T $cpus -t exon -g gene_id -Q 255 -a " . $file_genome_annotation . " -o " . $file_count . " \$(ls " . $batch_job->{"path"}{"bam"} . "/*.bam)";
print "echo featureCounts\n";
print $command,"\n";
}
sub printSBatchHeader($$$$$$)
{
my $nodes = $_[0];
my $partition = $_[1];
my $time = $_[2];
my $ntasks = $_[3];
my $cpus = $_[4];
my $name = $_[5];
# print header
print "#!/bin/bash\n";
print "\n";
print "#SBATCH --nodes=$nodes\n";
print "#SBATCH --partition=$partition\n";
print "#SBATCH --time=$time\n";
print "#SBATCH --ntasks=$ntasks\n";
print "#SBATCH --cpus-per-task=$cpus\n";
print "#SBATCH --job-name=$name\n";
print "#SBATCH --error=error_\%j.out\n";
print "#SBATCH --output=output_\%j.out\n";
print "\n";
print "echo \$SLURM_SUBMIT_DIR\n";
print "echo \"Running on \`hostname\`\"\n";
print "\n";
}
sub createBatchJob($$)
{
my $path_fastq = $_[0];
my $path_output = $_[1];
my %job = ();
# clean trailing separator
$path_fastq =~ s/\/$//g;
$path_output =~ s/\/$//g;
# generate bam and log output paths
$job{"path"}{"fastq"} = $path_fastq;
$job{"path"}{"bam"} = preparePath($path_output, "bam");
$job{"path"}{"log"} = preparePath($path_output, "log");
$job{"path"}{"count"} = preparePath($path_output, "count");
my @files_fastq = glob($path_fastq . "/*.fa*.gz");
if (!@files_fastq)
{
print STDERR "Error::could not retrieve FASTQ files.\n";
die("\n");
}
foreach my $file_fastq (@files_fastq)
{
my $file_name = ($file_fastq =~ m/.*fastq\/(.*)\.fa.*\.gz$/) ? $1 : "unknown";
if ($file_name eq "unknown")
{
print STDERR "Error::could not retrieve FASTQ file name $file_fastq.\n";
die("\n");
}
$job{"file"}{"fastq"}{$file_name} = $file_fastq;
$job{"file"}{"bam"}{$file_name} = $file_name . ".bam";
$job{"file"}{"log"}{$file_name} = $file_name . "_starlog.txt";
}
my $datestring = strftime('%Y%m%d',localtime);
$job{"file"}{"count"} = "counts_" . $datestring . ".txt";
return \%job;
}
sub preparePath($$)
{
my $path = $_[0];
my $tag = $_[1];
$path =~ s/\/$//g;
my $path_new = $path . '/' . $tag;
if (-d $path_new)
{
print STDERR "Warning: $path_new already exists.\n";
}
else
{
mkdir $path_new;
}
return $path_new;
}
sub usage($)
{
my $message = $_[0];
if (defined $message && length($message))
{
$message .= "\n" unless($message =~/\n$/);
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command\n" .
"-f|--fastq\n" .
"\t path to FastQ files (required)\n" .
"-o|--output-dir\n" .
"\t path to output directory (required)\n" .
"-g|--genome\n" .
"\t path to genome index (required)\n" .
"-p|--opts\n" .
"\t additional Cellranger Count parameters\n" .
"-h|--help\n" .
"\t print help message\n" .
"-v|--version\n" .
"\t print current version\n"
);
die("\n");
}
sub version($)
{
my $message = $_[0];
print STDERR (
$message,"\n",
"Scientific Computing Facility\n",
"Max-Planck Institute For Brain Research\n",
"bug reports to:\n",
"\tsciclist\@brain.mpg.de\n"
);
die("\n");
}