main.py

import os
import re
import logging
import matplotlib.pyplot as plt
import numpy as np

board_amount = 7
board_numbering_offset = 1  # the smallest number a board has
max_packet_arrival_dist = .1  # how big can the distance between packets be until we count it as a packet loss?
color_adt = 'b'
color_c3o = 'r'


def main():
	# get the files in the input folder
	path = os.path.join(os.path.dirname(__file__), 'input')
	dirpath, _, filenames = list(os.walk(path))[0]

	logging.basicConfig(
		level=logging.DEBUG,
		format='%(asctime)s %(name)s:%(levelname)s - %(message)s'
	)
	logging.debug('{} files found'.format(len(filenames)))

	# get the first log files (where the log starts)
	start_files = []
	for filename in filenames:
		if filename.endswith('.log'):
			start_files.append(filename)
	logging.debug('identified the following groups: {!r}'.format(start_files))

	# create a dict for the filenames
	files = {}
	for filename in start_files:
		files[filename] = []

	# save the related files in the dict
	for filename in filenames:
		for prefix in start_files:
			if filename.startswith(prefix):
				files[prefix].append(filename)
				break

	# sort the lists in the dict
	for prefix, file_list in files.items():
		def get_key(item: str):
			key = item.replace(prefix, '')
			key = key.replace('.', '')
			if key == '':
				key = 0
			return int(key)

		file_list.sort(key=get_key)
		file_list.reverse()

	# create a dict for the data
	session_data = {}
	for filename in start_files:
		session_data[filename] = []

	# create the regex
	# a line looks like this
	# 2018-02-12 13:47:53,477 DEBUG_V mr-keyboard.board4 - packet arrival time dist.: 0.0042400360107421875, c3 utc offset: 3727420072.5741544
	pattern = re.compile(
		r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}),'
		r'(?P<milliseconds>\d{3}) INFO mr-keyboard\.board(?P<board_nr>\d+) - packet arrival time dist.: '
		r'(?P<packet_arrival_time_dist>[0-9.]+), c3 utc offset: (?P<c3_utc_offset>[0-9.]+)'
	)

	# read and evaluate the data from the log files
	for key, session in files.items():
		logging.info('parsing group "{}"'.format(key))

		# generate data structures to save the data serparately for every board
		session_data = {}
		for i in range(board_amount):
			session_data[i + board_numbering_offset] = {
				'packet_losses': 0,
				'items': 0,  # amount of entries we evaluated
				'arrival_dist_list': [],  # list of all packet arrival distances
				'arrival_dist_sum': 0,  # the sum of entries in the list above
				'c3_offset_list': [],  # list of all counter3 utc offsets
				'c3_offset_sum': 0,
			}

		for filename in session:
			logging.info('parsing {}'.format(filename))

			# open the file and parse it
			filepath = os.path.join(dirpath, filename)
			with open(filepath) as f:
				# create an iterator
				lines = iter(f.readlines())

				while True:
					try:
						line = next(lines)

						# match the line with the regex, continue if it didn't match
						m = re.match(pattern, line)
						if m is None:
							continue

						board_nr = int(m.group('board_nr'))

						session_data[board_nr]['items'] += 1

						# handle arrival time distance information
						arrival_dist = float(m.group('packet_arrival_time_dist'))
						session_data[board_nr]['arrival_dist_list'].append(arrival_dist)
						session_data[board_nr]['arrival_dist_sum'] += arrival_dist

						# check if it took too long
						if arrival_dist > max_packet_arrival_dist:
							session_data[board_nr]['packet_losses'] += 1

						# handle information on the offset between counter3 and UTC
						c3_offset = float(m.group('c3_utc_offset'))
						session_data[board_nr]['c3_offset_list'].append(c3_offset)
						session_data[board_nr]['c3_offset_sum'] += c3_offset

					except StopIteration:
						# we hit EOF, stop reading
						break

		logging.debug('files parsed, calculating statistics for group {}'.format(key))

		# set the name of the window
		fig = plt.figure()
		fig.canvas.set_window_title(key)

		# evaluate the data for every board
		for board_nr, data in session_data.items():
			if data['items'] == 0:
				logging.info('no matching lines found for board {}'.format(board_nr))
				continue

			# calculate means
			arrival_dist_mean = data['arrival_dist_sum'] / data['items']
			c3_offset_mean = data['c3_offset_sum'] / data['items']

			# calculate mean absolute deviations
			arrival_dist_sum = 0
			c3_offset_sum = 0
			for i in range(data['items']):
				dst = abs(data['arrival_dist_list'][i] - arrival_dist_mean)
				arrival_dist_sum += dst

				dst = abs(data['c3_offset_list'][i] - c3_offset_mean)
				c3_offset_sum += dst
			arrival_dist_mad = arrival_dist_sum / data['items']
			c3_offset_mad = c3_offset_sum / data['items']

			logging.info(
				'statistics for board {} in group {} ("adt" = arrival time distance, "c3 offset" = offset of counter3 to UTC):\n'
				'relevant entries found: {}\n'
				'packets with more than {}s arrival time distance: {}\n'
				'averages: adt: {}, c3 offset: {}\n'
				'mean absolute deviations: adt {}, c3 offset: {}\n'.format(
					board_nr, key,
					data['items'],
					max_packet_arrival_dist, data['packet_losses'],
					arrival_dist_mean, c3_offset_mean,
					arrival_dist_mad, c3_offset_mad
				)
			)

			# prepare the plot
			t = np.arange(1, data['items'] + 1, 1)
			ax1 = plt.subplot(board_amount, 1, board_nr - board_numbering_offset + 1)
			ax2 = ax1.twinx()
			ax1.set_label('arrival time distance (s)')
			ax2.set_label('counter3 offset to UTC (s)')

			# plot the data
			ax1.plot(t, np.array(data['arrival_dist_list']), color=color_adt)
			ax2.plot(t, np.array(data['c3_offset_list']), color=color_c3o)

			# color the axes
			def color_axis(ax, color):
				for label in ax.get_yticklabels():
					label.set_color(color)
				return None
			color_axis(ax1, color_adt)
			color_axis(ax2, color_c3o)

		# display the plot
		plt.show()

	logging.debug('we\'re done!')


if __name__ == '__main__':
	main()
	import os
	import re
	import logging
	import matplotlib.pyplot as plt
	import numpy as np

	board_amount = 7
	board_numbering_offset = 1 # the smallest number a board has
	max_packet_arrival_dist = .1 # how big can the distance between packets be until we count it as a packet loss?
	color_adt = 'b'
	color_c3o = 'r'


	def main():
	# get the files in the input folder
	path = os.path.join(os.path.dirname(__file__), 'input')
	dirpath, _, filenames = list(os.walk(path))[0]

	logging.basicConfig(
	level=logging.DEBUG,
	format='%(asctime)s %(name)s:%(levelname)s - %(message)s'
	)
	logging.debug('{} files found'.format(len(filenames)))

	# get the first log files (where the log starts)
	start_files = []
	for filename in filenames:
	if filename.endswith('.log'):
	start_files.append(filename)
	logging.debug('identified the following groups: {!r}'.format(start_files))

	# create a dict for the filenames
	files = {}
	for filename in start_files:
	files[filename] = []

	# save the related files in the dict
	for filename in filenames:
	for prefix in start_files:
	if filename.startswith(prefix):
	files[prefix].append(filename)
	break

	# sort the lists in the dict
	for prefix, file_list in files.items():
	def get_key(item: str):
	key = item.replace(prefix, '')
	key = key.replace('.', '')
	if key == '':
	key = 0
	return int(key)

	file_list.sort(key=get_key)
	file_list.reverse()

	# create a dict for the data
	session_data = {}
	for filename in start_files:
	session_data[filename] = []

	# create the regex
	# a line looks like this
	# 2018-02-12 13:47:53,477 DEBUG_V mr-keyboard.board4 - packet arrival time dist.: 0.0042400360107421875, c3 utc offset: 3727420072.5741544
	pattern = re.compile(
	r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}),'
	r'(?P<milliseconds>\d{3}) INFO mr-keyboard\.board(?P<board_nr>\d+) - packet arrival time dist.: '
	r'(?P<packet_arrival_time_dist>[0-9.]+), c3 utc offset: (?P<c3_utc_offset>[0-9.]+)'
	)

	# read and evaluate the data from the log files
	for key, session in files.items():
	logging.info('parsing group "{}"'.format(key))

	# generate data structures to save the data serparately for every board
	session_data = {}
	for i in range(board_amount):
	session_data[i + board_numbering_offset] = {
	'packet_losses': 0,
	'items': 0, # amount of entries we evaluated
	'arrival_dist_list': [], # list of all packet arrival distances
	'arrival_dist_sum': 0, # the sum of entries in the list above
	'c3_offset_list': [], # list of all counter3 utc offsets
	'c3_offset_sum': 0,
	}

	for filename in session:
	logging.info('parsing {}'.format(filename))

	# open the file and parse it
	filepath = os.path.join(dirpath, filename)
	with open(filepath) as f:
	# create an iterator
	lines = iter(f.readlines())

	while True:
	try:
	line = next(lines)

	# match the line with the regex, continue if it didn't match
	m = re.match(pattern, line)
	if m is None:
	continue

	board_nr = int(m.group('board_nr'))

	session_data[board_nr]['items'] += 1

	# handle arrival time distance information
	arrival_dist = float(m.group('packet_arrival_time_dist'))
	session_data[board_nr]['arrival_dist_list'].append(arrival_dist)
	session_data[board_nr]['arrival_dist_sum'] += arrival_dist

	# check if it took too long
	if arrival_dist > max_packet_arrival_dist:
	session_data[board_nr]['packet_losses'] += 1

	# handle information on the offset between counter3 and UTC
	c3_offset = float(m.group('c3_utc_offset'))
	session_data[board_nr]['c3_offset_list'].append(c3_offset)
	session_data[board_nr]['c3_offset_sum'] += c3_offset

	except StopIteration:
	# we hit EOF, stop reading
	break

	logging.debug('files parsed, calculating statistics for group {}'.format(key))

	# set the name of the window
	fig = plt.figure()
	fig.canvas.set_window_title(key)

	# evaluate the data for every board
	for board_nr, data in session_data.items():
	if data['items'] == 0:
	logging.info('no matching lines found for board {}'.format(board_nr))
	continue

	# calculate means
	arrival_dist_mean = data['arrival_dist_sum'] / data['items']
	c3_offset_mean = data['c3_offset_sum'] / data['items']

	# calculate mean absolute deviations
	arrival_dist_sum = 0
	c3_offset_sum = 0
	for i in range(data['items']):
	dst = abs(data['arrival_dist_list'][i] - arrival_dist_mean)
	arrival_dist_sum += dst

	dst = abs(data['c3_offset_list'][i] - c3_offset_mean)
	c3_offset_sum += dst
	arrival_dist_mad = arrival_dist_sum / data['items']
	c3_offset_mad = c3_offset_sum / data['items']

	logging.info(
	'statistics for board {} in group {} ("adt" = arrival time distance, "c3 offset" = offset of counter3 to UTC):\n'
	'relevant entries found: {}\n'
	'packets with more than {}s arrival time distance: {}\n'
	'averages: adt: {}, c3 offset: {}\n'
	'mean absolute deviations: adt {}, c3 offset: {}\n'.format(
	board_nr, key,
	data['items'],
	max_packet_arrival_dist, data['packet_losses'],
	arrival_dist_mean, c3_offset_mean,
	arrival_dist_mad, c3_offset_mad
	)
	)

	# prepare the plot
	t = np.arange(1, data['items'] + 1, 1)
	ax1 = plt.subplot(board_amount, 1, board_nr - board_numbering_offset + 1)
	ax2 = ax1.twinx()
	ax1.set_label('arrival time distance (s)')
	ax2.set_label('counter3 offset to UTC (s)')

	# plot the data
	ax1.plot(t, np.array(data['arrival_dist_list']), color=color_adt)
	ax2.plot(t, np.array(data['c3_offset_list']), color=color_c3o)

	# color the axes
	def color_axis(ax, color):
	for label in ax.get_yticklabels():
	label.set_color(color)
	return None
	color_axis(ax1, color_adt)
	color_axis(ax2, color_c3o)

	# display the plot
	plt.show()

	logging.debug('we\'re done!')


	if __name__ == '__main__':
	main()