Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
//
// main.cpp
// tcolumn
//
// Created by Peter Arndt on 18/03/2016.
// Copyright © 2016 Peter Arndt. All rights reserved.
//
#include <iostream>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string>
#include <vector>
#include <fstream>
#include <sstream>
#include <algorithm>
using namespace std;
struct Col_info{
size_t n;
string id;
size_t truncation;
size_t R;
vector<size_t> field_len;
size_t max_len;
Col_info(size_t n,string aid,size_t truncation, size_t R)
: n(n),R(R),truncation(truncation){
id=aid;
if (id=="")
id="col"+to_string(n);
max_len=id.length();
field_len.resize(R,0);
}
void add(size_t row, size_t len){
field_len.at(row)=len;
if (len>max_len)
max_len=len;
}
string format(string s){
if ((truncation == 0) || (max_len<=truncation))
s.resize(max_len+2, ' ');
else{
if (s.length() > truncation ){
s.resize(truncation-2);
s+=".. ";
}else{
s.resize(truncation+2, ' ');
}
}
return s;
}
string format_id(){
return format(id);
}
};
struct Table{
size_t R,C;
vector<string> data;
vector<Col_info> cinfo;
char sep;
bool skip_comment,skip_empty;
bool header;
string header_line;
bool col_header;
size_t col_header_repeat;
bool col_num;
bool color;
size_t truncation;
Table(){
R=0;
C=0;
skip_comment=true;
skip_empty=true;
header=true;
header_line="";
col_header=true;
col_header_repeat=40;
col_num=true;
sep='\t';
color = true;
truncation = 40;
}
void read(istream &is){
string line;
size_t num_tab=0,num_comma=0;
while (getline(is, line)){
if (skip_comment && (line.length()>0) && (line[0] == '#'))
continue;
if (skip_empty && ( (line.length() == 0) || (line.find_first_not_of(" \t") == string::npos)))
continue;
num_tab += count(header_line.begin(), header_line.end(), '\t');
num_comma += count(header_line.begin(), header_line.end(), ',');
if (header && header_line.length()==0){
header_line = line;
}else{
data.push_back(line);
}
}
R=data.size();
// - sep
if (num_tab<R)
sep=',';
// - process header
size_t col=0;
if (header_line.length()>0){
stringstream ss(header_line);
string id;
while (getline(ss, id, sep)){
cinfo.push_back(Col_info(col++, id, truncation, R));
}
}
// - process data
for (size_t row=0; row<data.size(); row++){
int col=0;
size_t pos,sep_pos=-1;
for (pos=0; pos< data[row].length(); pos++){
if (data[row][pos]==sep){
if (col>=cinfo.size())
cinfo.push_back(Col_info(col+1, "", truncation, R));
cinfo[col].add(row, pos-sep_pos-1);
sep_pos=pos;
col++;
}
}
if (col>=cinfo.size())
cinfo.push_back(Col_info(col+1, "", truncation, R));
cinfo[col].add(row, pos-sep_pos-1);
}
C=cinfo.size();
}
void write(){
string header_format="";
for(size_t col=0; col<C; col++){
header_format += cinfo[col].format_id();
}
string header_num="";
for(size_t col=0; col<C; col++){
header_num += cinfo[col].format(to_string(col+1));
}
if (color){
header_format="\033[1;31m"+header_format+"\033[0m";
header_num ="\033[1;31m"+header_num +"\033[0m";
}
header_format +="\n";
header_num +="\n";
if(col_num)
cout << header_num;
if (col_header)
cout << header_format;
for (size_t row=0; row<data.size(); row++){
size_t col_len,col_start=0;
for(size_t col=0; col<C; col++){
col_len=cinfo[col].field_len[row];
if (col_start<data[row].length())
cout << cinfo[col].format(data[row].substr(col_start,col_len));
col_start+=col_len+1;
}
cout << endl;
if (col_header && (col_header_repeat>0) && ((row+1) % col_header_repeat == 0))
cout << header_format;
}
}
void dump(){
cout << R<<" "<<C<<endl;
for(size_t col=0; col<C; col++){
cout << col<<":"<<cinfo[col].max_len<< " ";
}
cout <<endl;
if (header)
cout << "header:" << header_line <<endl;
for (vector<string>::iterator vi = data.begin();vi != data.end();vi++)
cout << *vi <<endl;
}
};
void help()
{
const char *text=
"\n"
" tcolumn [options] <filename> \n"
"\n"
"Writes a tsv file nicely formated.\n"
"If filename is omitted stdin is read.\n"
"\n"
"Possible options are:\n"
"\n"
" -d the first line is not a header\n"
"\n"
" -e supress output of column names header\n"
" -f supress output of column number header\n"
"\n"
" -c no color\n"
" -n num repeat the header row every num lines\n"
" (default: 40, no repetition: 0)\n"
" -t num truncate long columns after num chars\n"
" (default: 40, no truncation: 0)\n"
"\n"
" -h this help message\n"
"\n"
"Peter Arndt 2016. All rights reserved.\n";
cout << text;
}
int main (int argc, char *argv[])
{
Table T;
opterr = 0;
int c;
while ((c = getopt (argc, argv, "defhcn:t:")) != -1)
switch (c)
{
case 'h':
help();
return 0;
case 'c':
T.color=false;
break;
case 'd':
T.header=false;
break;
case 'e':
T.col_header=false;
break;
case 'f':
T.col_num=false;
break;
case 'n':
T.col_header_repeat = stoi(optarg);
break;
case 't':
T.truncation = stoi(optarg);
break;
case '?':
if ((optopt == 'n') || (optopt == 't'))
fprintf (stderr, "Option -%c requires an argument.\n", optopt);
else if (isprint (optopt))
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
else
fprintf (stderr,
"Unknown option character `\\x%x'.\n",
optopt);
return 1;
default:
abort ();
}
// ------
if (optind < argc){
ifstream fin(argv[optind]);
T.read(fin);
}else
T.read(cin);
// ------
T.write();
return 0;
}