Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
tcolumn/main.cpp
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
316 lines (253 sloc)
5.68 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.cpp | |
// tcolumn | |
// | |
// Created by Peter Arndt on 18/03/2016. | |
// Copyright © 2016 Peter Arndt. All rights reserved. | |
// | |
#include <iostream> | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <string> | |
#include <vector> | |
#include <fstream> | |
#include <sstream> | |
#include <algorithm> | |
using namespace std; | |
struct Col_info{ | |
size_t n; | |
string id; | |
size_t truncation; | |
size_t R; | |
vector<size_t> field_len; | |
size_t max_len; | |
Col_info(size_t n,string aid,size_t truncation, size_t R) | |
: n(n),R(R),truncation(truncation){ | |
id=aid; | |
if (id=="") | |
id="col"+to_string(n); | |
max_len=id.length(); | |
field_len.resize(R,0); | |
} | |
void add(size_t row, size_t len){ | |
field_len.at(row)=len; | |
if (len>max_len) | |
max_len=len; | |
} | |
string format(string s){ | |
if ((truncation == 0) || (max_len<=truncation)) | |
s.resize(max_len+2, ' '); | |
else{ | |
if (s.length() > truncation ){ | |
s.resize(truncation-2); | |
s+=".. "; | |
}else{ | |
s.resize(truncation+2, ' '); | |
} | |
} | |
return s; | |
} | |
string format_id(){ | |
return format(id); | |
} | |
}; | |
struct Table{ | |
size_t R,C; | |
vector<string> data; | |
vector<Col_info> cinfo; | |
char sep; | |
bool skip_comment,skip_empty; | |
bool header; | |
string header_line; | |
bool col_header; | |
size_t col_header_repeat; | |
bool col_num; | |
bool color; | |
size_t truncation; | |
Table(){ | |
R=0; | |
C=0; | |
skip_comment=true; | |
skip_empty=true; | |
header=true; | |
header_line=""; | |
col_header=true; | |
col_header_repeat=40; | |
col_num=true; | |
sep='\t'; | |
color = true; | |
truncation = 40; | |
} | |
void read(istream &is){ | |
string line; | |
size_t num_tab=0,num_comma=0; | |
while (getline(is, line)){ | |
if (skip_comment && (line.length()>0) && (line[0] == '#')) | |
continue; | |
if (skip_empty && ( (line.length() == 0) || (line.find_first_not_of(" \t") == string::npos))) | |
continue; | |
num_tab += count(header_line.begin(), header_line.end(), '\t'); | |
num_comma += count(header_line.begin(), header_line.end(), ','); | |
if (header && header_line.length()==0){ | |
header_line = line; | |
}else{ | |
data.push_back(line); | |
} | |
} | |
R=data.size(); | |
// - sep | |
if (num_tab<R) | |
sep=','; | |
// - process header | |
size_t col=0; | |
if (header_line.length()>0){ | |
stringstream ss(header_line); | |
string id; | |
while (getline(ss, id, sep)){ | |
cinfo.push_back(Col_info(col++, id, truncation, R)); | |
} | |
} | |
// - process data | |
for (size_t row=0; row<data.size(); row++){ | |
int col=0; | |
size_t pos,sep_pos=-1; | |
for (pos=0; pos< data[row].length(); pos++){ | |
if (data[row][pos]==sep){ | |
if (col>=cinfo.size()) | |
cinfo.push_back(Col_info(col+1, "", truncation, R)); | |
cinfo[col].add(row, pos-sep_pos-1); | |
sep_pos=pos; | |
col++; | |
} | |
} | |
if (col>=cinfo.size()) | |
cinfo.push_back(Col_info(col+1, "", truncation, R)); | |
cinfo[col].add(row, pos-sep_pos-1); | |
} | |
C=cinfo.size(); | |
} | |
void write(){ | |
string header_format=""; | |
for(size_t col=0; col<C; col++){ | |
header_format += cinfo[col].format_id(); | |
} | |
string header_num=""; | |
for(size_t col=0; col<C; col++){ | |
header_num += cinfo[col].format(to_string(col+1)); | |
} | |
if (color){ | |
header_format="\033[1;31m"+header_format+"\033[0m"; | |
header_num ="\033[1;31m"+header_num +"\033[0m"; | |
} | |
header_format +="\n"; | |
header_num +="\n"; | |
if(col_num) | |
cout << header_num; | |
if (col_header) | |
cout << header_format; | |
for (size_t row=0; row<data.size(); row++){ | |
size_t col_len,col_start=0; | |
for(size_t col=0; col<C; col++){ | |
col_len=cinfo[col].field_len[row]; | |
if (col_start<data[row].length()) | |
cout << cinfo[col].format(data[row].substr(col_start,col_len)); | |
col_start+=col_len+1; | |
} | |
cout << endl; | |
if (col_header && (col_header_repeat>0) && ((row+1) % col_header_repeat == 0)) | |
cout << header_format; | |
} | |
} | |
void dump(){ | |
cout << R<<" "<<C<<endl; | |
for(size_t col=0; col<C; col++){ | |
cout << col<<":"<<cinfo[col].max_len<< " "; | |
} | |
cout <<endl; | |
if (header) | |
cout << "header:" << header_line <<endl; | |
for (vector<string>::iterator vi = data.begin();vi != data.end();vi++) | |
cout << *vi <<endl; | |
} | |
}; | |
void help() | |
{ | |
const char *text= | |
"\n" | |
" tcolumn [options] <filename> \n" | |
"\n" | |
"Writes a tsv file nicely formated.\n" | |
"If filename is omitted stdin is read.\n" | |
"\n" | |
"Possible options are:\n" | |
"\n" | |
" -d the first line is not a header\n" | |
"\n" | |
" -e supress output of column names header\n" | |
" -f supress output of column number header\n" | |
"\n" | |
" -c no color\n" | |
" -n num repeat the header row every num lines\n" | |
" (default: 40, no repetition: 0)\n" | |
" -t num truncate long columns after num chars\n" | |
" (default: 40, no truncation: 0)\n" | |
"\n" | |
" -h this help message\n" | |
"\n" | |
"Peter Arndt 2016. All rights reserved.\n"; | |
cout << text; | |
} | |
int main (int argc, char *argv[]) | |
{ | |
Table T; | |
opterr = 0; | |
int c; | |
while ((c = getopt (argc, argv, "defhcn:t:")) != -1) | |
switch (c) | |
{ | |
case 'h': | |
help(); | |
return 0; | |
case 'c': | |
T.color=false; | |
break; | |
case 'd': | |
T.header=false; | |
break; | |
case 'e': | |
T.col_header=false; | |
break; | |
case 'f': | |
T.col_num=false; | |
break; | |
case 'n': | |
T.col_header_repeat = stoi(optarg); | |
break; | |
case 't': | |
T.truncation = stoi(optarg); | |
break; | |
case '?': | |
if ((optopt == 'n') || (optopt == 't')) | |
fprintf (stderr, "Option -%c requires an argument.\n", optopt); | |
else if (isprint (optopt)) | |
fprintf (stderr, "Unknown option `-%c'.\n", optopt); | |
else | |
fprintf (stderr, | |
"Unknown option character `\\x%x'.\n", | |
optopt); | |
return 1; | |
default: | |
abort (); | |
} | |
// ------ | |
if (optind < argc){ | |
ifstream fin(argv[optind]); | |
T.read(fin); | |
}else | |
T.read(cin); | |
// ------ | |
T.write(); | |
return 0; | |
} |