Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#include <iostream>
#include <vector>
#include <fstream>
#include <string>
#include <omp.h>
using namespace std;
int calc_distance(const string &s1, const string &s2, const int match, const int mismatch, const int gap) {
int n1 = s1.size(), n2 = s2.size();
int n = n2+1;
vector<int> mat(n, 0);
for (int i = 0; i < n; ++i) mat[i] = gap * i;
int t1{}, t2{};
for (int i = 1; i <= n1; ++i) {
t1 = gap*i;
for (int j = 1; j <= n2; ++j) {
int match_case = mat[j-1] + ((s1[i-1] == s2[j-1]) ? match : mismatch);
t2 = max(match_case, max(t1, mat[j])+gap);
mat[j-1] = t1;
t1 = t2;
}
}
return t2;
}
int main(int argc, char* argv[]) {
// Interpret program input
string input_file_name = argv[1];
int match = stoi(argv[2]);
int mismatch = stoi(argv[3]);
int gap_open = stoi(argv[4]);
string output_file_name = argv[5];
int threads = stoi(argv[6]);
// read fasta file
ifstream input_fasta;
input_fasta.open(input_file_name);
if (!input_fasta) {
cout << "The specified file could not be found!" << endl;
exit(1);
}
string line{};
vector<string> sequences{};
string seq{};
while (getline(input_fasta, line)) {
if (line.back() == '\n') {
line.pop_back();
}
if (line[0] == '>') {
if (!seq.empty()) {
sequences.push_back(seq);
seq.clear();
}
continue;
}
else {
seq += line;
}
}
input_fasta.close();
if (!seq.empty()) {
sequences.push_back(seq);
}
const int n = sequences.size();
// create output matrix
vector<vector<double>> distances(n);
for (int i = 0; i < n; ++i) {
distances.push_back(vector<double>(n, 0));
}
// compute needleman-wunsch distances in parallel
omp_set_num_threads(threads);
//int calculations = 0;
#pragma omp parallel for shared(distances)// schedule(dynamic)// reduction(sum)
for (int i = 0; i < n; ++i) {
#ifdef DEBUG
double start = omp_get_wtime();
#endif
vector<double> row(n, 0);
for (int j = i; j < n; ++j) {
#ifdef DEBUG
double inner_start = omp_get_wtime();
#endif
int score = calc_distance(sequences[i], sequences[j], match, mismatch, gap_open);
#ifdef DEBUG
cout << "took " << (omp_get_wtime()-inner_start) << " seconds to finish i,j=" << i << ", " << j << endl;
#endif
//row[j] = double(score) / max(sequences[i].size(), sequences[j].size());
row[j] = double(score) / ((sequences[i].size()+sequences[j].size())/2.0);
//#pragma omp atomic
//calculations++;
//distances[j][i] = distances[i][j] = score;
}
#ifdef DEBUG
cout << "thread #" << omp_get_thread_num() << " took " << (omp_get_wtime()-start) << " seconds to finish i=" << i << endl;
#endif
//#pragma omp critical
distances[i] = row;
}
//cout << (n*(n+1)/2) << " vs " << calculations << endl;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
distances[i][j] = distances[j][i];
}
}
// save distances to text file
ofstream matrix_out_file(output_file_name);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
matrix_out_file << distances[i][j] << "\t";
#ifdef DEBUG
cout << distances[i][j] << "\t";
#endif
}
matrix_out_file << endl;
#ifdef DEBUG
cout << endl;
#endif
}
matrix_out_file.close();
}