Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
################################################################################
## Download competition data for the
## ENCODE-DREAM in vivo Transcription Factor Binding Site Prediction Challenge
################################################################################
import shutil
import synapseclient
from synapseclient import Project, Folder, File
import sys
import os
syn = synapseclient.Synapse()
# If you haven't set up a .synapseConfig file, you'll have to supply credentials
syn.login()
print "Make sure you've accepted the terms of use before running this script!"
# You may wish to copy these files to a specific destination directory. If so,
# set the path to that directory here or pass it as an argument to the script.
datadir = os.environ["DREAM_DATA"] + "/"
# -------------------------------------------------------------------------------
# All Challenge Data available for download:
# (* indicates data from the 'Essential Data Collection' - see https://www.synapse.org/#!Synapse:syn6131484/wiki/402033 )
# -------------------------------------------------------------------------------
# * ChIPseq fold_change_signal = syn6181334
# * ChIPseq labels = syn6181335
# * ChIPseq peaks conservative = syn6181337
# * ChIPseq peaks relaxed = syn6181338
# DNASE bams = syn6176232
# * DNASE fold_coverage_wiggles = syn6176233
# * DNASE peaks conservative = syn6176235
# * DNASE peaks relaxed = syn6176236
# * RNAseq = syn6176231
# * annotations = 'syn6184307'
# -------------------------------------------------------------------------------
# As written, this script will download the entire Essential Data Collection
# MODIFY THIS LINE TO INCLUDE THE SYNAPSE IDS OF DATA TYPES YOU WANT TO DOWNLOAD
folders = {
'syn7413983': 'ChIPseq/labels', # chip data
'syn6181334': 'ChIPseq/fold_change_signal', # chip data
'syn6176233': 'essential_training_data/DNASE/fold_coverage_wiggles', # dnase data
'syn6176232': 'essential_training_data/DNASE/bams', # dnase data
'syn6176231': 'RNAseq/', # rnaseq data
'syn6184307': 'annotations/', # rnaseq data
'syn8077511': 'ChIPseq/within_cell/', # ChIP-seq data for the within-celltype phase
'syn8442975': 'ChIPseq/heldout_celltypes/', # ChIP-seq data (Post-challenge release)
'syn8441154': 'ChIPseq/heldout_chr/', # ChIP-seq data (Post-challenge release)
}
for folder in folders:
# Get folder
folder_ = syn.get(folder)
print 'Downloading contents of %s folder (%s)\n' % (folder_.name, folder_.id,)
# Query for child entities
query_results = syn.query('select id,name from file where parentId=="%s"' % folder)
data_files = []
if not os.path.exists( datadir + folders[folder]):
os.makedirs(datadir + folders[folder])
# Download all data files
for entity in query_results['results']:
if 'syn7444261' == entity['file.id']:
continue
print '\tDownloading file: ', entity['file.name']
data_file = syn.get(entity['file.id'])
shutil.copy2(data_file.path, datadir + folders[folder])
try:
os.remove(data_file.path)
except OSError:
pass
#syn.cache.remove
print 'Downloading '+folder+' complete!'
syn.logout()