Skip to content

Commit

Permalink
preprocess_dataset: limit number of shapes predicted for one sequence…
Browse files Browse the repository at this point in the history
… to 10
  • Loading branch information
heller committed Jul 31, 2017
1 parent 1b04e1e commit 8853b33
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
4 changes: 2 additions & 2 deletions bin/preprocess_dataset
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def main(args):
print>>sys.stderr, 'INPUT:', fastaPositiveFileName
print>>sys.stderr, 'OUTPUT:', shapePositiveFileName

calculate_rna_shapes_from_file(shapePositiveFileName, fastaPositiveFileName)
calculate_rna_shapes_from_file(shapePositiveFileName, fastaPositiveFileName, 10)

print>>sys.stderr, 'STEP 6 finished'

Expand All @@ -317,7 +317,7 @@ def main(args):
print>>sys.stderr, 'INPUT:', fastaPositiveFileName
print>>sys.stderr, 'OUTPUT:', shapePositiveFileName

calculate_rna_shapes_from_file(shapeNegativeFileName, fastaNegativeFileName)
calculate_rna_shapes_from_file(shapeNegativeFileName, fastaNegativeFileName, 10)

print>>sys.stderr, 'STEP 6 finished'

Expand Down
11 changes: 7 additions & 4 deletions sshmm/structure_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def translateIntoContexts(dotBracketString):
contextString = contextString1.replace('T', 'E')
return contextString

def calculate_rna_shapes_from_file(output, fastaFileName):
def calculate_rna_shapes_from_file(output, fastaFileName, shapesPerSequence):
contextsFile = open(output, 'w')

proc = Popen(['RNAshapes', '-r', '-o', '1', '-f', fastaFileName], stdout=PIPE, bufsize=-1)
Expand All @@ -26,12 +26,15 @@ def calculate_rna_shapes_from_file(output, fastaFileName):
firstField = fields[0]
if firstField.startswith('>'):
print>>contextsFile, firstField
num_shapes = 0
else:
match1 = dotBracketPattern.search(firstField)
if match1 and match1.end() == len(firstField):
contexts = translateIntoContexts(firstField)
prob = float(fields[2][1:-1])
print>>contextsFile, contexts, prob
num_shapes += 1
if num_shapes <= shapesPerSequence:
contexts = translateIntoContexts(firstField)
prob = float(fields[2][1:-1])
print>>contextsFile, contexts, prob


def calculate_rna_shapes_from_sequence(nucleotide_sequence):
Expand Down

0 comments on commit 8853b33

Please sign in to comment.