This Python script extracts data from txt files containing raw numerical data, notably the increase average, the relative evolution and the standard deviation of the values. All of these values require a period on which they are calculated. This period is set via the command line.
#! /usr/bin/env python3
import statistics
import sys
if len(sys.argv) != 2:
print("""SYNOPSIS
./%s period
DESCRIPTION
period the number of days defining a period""" % (sys.argv[0]))
exit(1)
increases = []
period = int(sys.argv[1])
values = []
def check_length(array):
if len(array) > period:
array.pop(0)
def mean(data):
"""Return the arithmetic mean of data."""
return round(sum(data) / len(data), 2)
def calculate_g():
increase = 0
try:
increase = (values[-1] * 10000 - values[-2] * 10000) / 10000
except IndexError:
pass
if increase > 0:
increases.append(increase)
else:
increases.append(0)
if len(values) < period + 1:
print("g=nan ", end="")
else:
increases_average = mean(increases[1:])
print("g=%.2f " % (increases_average), end="")
def calculate_r():
if len(values) < period + 1:
print("r=nan% ", end="")
return "nan"
else:
evolution = round((values[period] - values[0]) * 100 / values[0])
print("r=%.0f%% " % (evolution), end="")
return (1, -1)[evolution < 0]
def _ss(data):
"""Return sum of square deviations of sequence data."""
average = mean(data)
ss = sum((x-average)**2 for x in data)
return ss
def stddev():
"""Calculates the population standard deviation by default."""
n = len(values)
if n < period:
print("s=nan", end="")
else:
ss = _ss(values[-(period):])
print("s=%.2f" % (round((ss / period) ** 0.5, 2)), end="")
def main():
line = input()
prev_sign = 0
say = ""
switch = 0
while line != "STOP":
values.append(float(line))
calculate_g()
sign = calculate_r()
stddev()
if sign != "nan":
if sign == prev_sign * -1:
say = " a switch occurs"
switch += 1
else:
say = ""
prev_sign = sign
print("%s" % (say))
check_length(values)
check_length(increases)
line = input()
print("STOP\nGlobal tendency switched %i times" % (switch))
if __name__ == "__main__":
main()
and here is sample data:
27.7
31.0
32.7
34.7
35.9
37.4
38.2
39.5
40.3
42.2
41.3
40.4
39.8
38.7
36.5
35.7
33.4
29.8
27.5
25.2
24.7
23.1
22.8
22.7
23.6
24.3
24.5
26.7
27.0
27.4
29.8
29.4
31.5
29.6
29.8
28.9
28.7
27.2
25.7
26.0
25.2
21.6
20.3
21.1
20.4
19.8
19.1
19.6
21.2
21.0
21.4
24.0
25.5
25.5
26.4
29.4
32.1
31.4
32.3
35.2
38.3
36.6
38.4
39.9
40.5
39.4
39.0
40.5
42.1
38.7
37.5
38.1
36.5
35.4
STOP
I'm pretty sure my code isn't that clean since I'm pretty new to Python and I would like to know good practices.