I recently wrote some code that was dinged on not having well defined tests and poor modularity. Was hoping someone could review my code and give me pointers on how to create more well-defined tests (Which is where I might be the weakest) and secondly any tips on how I can increase modularity would help.
The code receives a csv file with data. Valid data will be in three types of string format and will return an ordered list of json string representation if the string is valid. The code consists of 3 separate files main.py, errors.py, and regexes.py. Finally I'll list my tests at the bottom. Thanks in advance for any and all help.
regexes.py:
import re
def get_zip():
return r'(?P<zip_code>\d\d\d\d\d)'
def get_regex1():
"""Regex for 1st instance
Format: Lastname, Firstname, (703)-742-0996, Blue, 10013"""
reg = r'(?P<lastname>.+), (?P<firstname>.+),'
reg += r'\s\((?P<area_code>\d+)\)\-(?P<central>\d+)\-'
reg += r'(?P<line_number>\d+),'
reg += r'\s(?P<color>.+),\s' + get_zip()
reg += r'$'
pattern = re.compile(reg)
return pattern
def get_regex2():
"""Regex for 2nd instance
Format: Firstname Lastname, Red, 11237, 703 955 0373"""
reg = r'(?P<firstname>.+) (?P<lastname>.+),'
reg += r'\s(?P<color>.+),\s' + get_zip() + ','
reg += r'\s(?P<area_code>\d+) (?P<central>\d+)'
reg += r' (?P<line_number>\d+)'
reg += r'$'
pattern = re.compile(reg)
return pattern
def get_regex3():
"""Regex for 3rd instance
Format: Firstname, Lastname, 10013, 646 111 0101, Green"""
reg = r'(?P<firstname>.+), (?P<lastname>.+),'
reg += r'\s' + get_zip() + ','
reg += r'\s(?P<area_code>\d+) (?P<central>\d+)'
reg += r' (?P<line_number>\d+), (?P<color>.+)'
reg += r'$'
pattern = re.compile(reg)
return pattern
def find_dict(line, regex_list):
"""Check all regex's in list for match"""
line = line.strip()
for pattern in regex_list:
m = pattern.match(line)
if m:
return m.groupdict()
return None
errors.py:
class PhoneNumberException(Exception):
"""Invalid Phone Number exceptions"""
def __init__(self,errors):
Exception.__init__(self,
"Phone number passed in was: {0}".format(errors))
class FormatException(Exception):
"""Invalid Format Exceptions"""
def __init__(self,errors):
Exception.__init__(self,
"""Line was formatted incorrectly,
Passed in line was {0}""".format(errors))
main.py:
import re
import json
import regexes
import errors
from operator import itemgetter
def is_phone_valid(phone):
"""Check if phone has proper number of digits"""
num_string = re.sub('[^\d]', '', phone)
if len(num_string) != 10:
return False
return True
def make_phone(area_code, central, line_number):
"""Make Phone number from parsed phone number"""
return (area_code + '-' +
central + '-' +
line_number)
def remove_keys(in_dict):
"""Remove keys to arrange dictionary in printable format"""
keys_to_remove =['area_code', 'central', 'line_number']
for key in keys_to_remove:
if key in in_dict:
del in_dict[key]
return in_dict
def check_line(line, regex_list):
"""All Regexes in list"""
in_dict = regexes.find_dict(line, regex_list)
if not in_dict:
raise errors.FormatException(line)
phonenumber = make_phone(in_dict['area_code'],
in_dict['central'],
in_dict['line_number'])
in_dict['phonenumber'] = phonenumber
if not is_phone_valid(in_dict['phonenumber']):
raise errors.PhoneNumberException(in_dict['phonenumber'])
return remove_keys(in_dict) # This line may be changed in future
def run_input(filename, regex_list):
"""Main Function to read files and return entries"""
# Collect all regexes
entry_list = []
error_list = []
# Open file and run through items
with open(filename, 'r') as f:
for i, line in enumerate(f):
try:
item = check_line(line, regex_list)
entry_list.append(item)
except (errors.FormatException, errors.PhoneNumberException):
error_list.append(i)
except:
error_list.append(i)
#all lines are processed, now sort and print
json_dict = {}
json_dict['entries'] = sorted(entry_list,
key=itemgetter('lastname', 'firstname'))
json_dict['errors'] = error_list
return json.dumps(json_dict, sort_keys=True, indent=2)
if __name__ == '__main__':
all_regex = [regexes.get_regex1(), regexes.get_regex2(),
regexes.get_regex3()]
json_return = run_input('data.in', all_regex)
print json_return
with open('output.txt', 'w') as write_file:
write_file.write(json_return)
Tests!:
import pytest
import task.regexes as regexes
import task.main as main
import task.errors as errors
@pytest.fixture(scope='module')
def valid_lines():
valid = {}
valid["line1"] = "Chandler, Kerri, (623)-668-9293, Dark Fuschia, 12312"
valid["line2"] = "James T Murphy, green, 83880, 018 154 6474"
valid['line3'] = "Booker T., Washington, 87360, 373 781 7380, yellow"
return valid
@pytest.fixture(scope='module')
def invalid_lines():
invalid = {}
invalid['line1'] = "Booker T., Washington"
invalid['line2'] = "James T Murphy, yellow, 83180, 018x154x6474"
invalid['line3'] = "Chandler, Kerri, (623)-668-9293, pink, 123122"
invalid['line4'] = "James T Murphy, yellow, 83880, 018-154 6474"
return invalid
def test_regex1(valid_lines, invalid_lines):
"""Test method for regex1"""
m = regexes.find_dict(valid_lines['line1'], [regexes.get_regex1()])
assert m['firstname'] == 'Kerri'
assert m['lastname'] == 'Chandler'
assert m['zip_code'] == '12312'
assert m['area_code'] == '623'
assert m['central'] == '668'
assert m['line_number'] == '9293'
assert m['color'] == 'Dark Fuschia'
for line in invalid_lines.values():
m = regexes.find_dict(line, [regexes.get_regex1()])
assert not m
def test_regex2(valid_lines, invalid_lines):
"""Test method for regex2"""
m = regexes.find_dict(valid_lines['line2'], [regexes.get_regex2()])
assert m['firstname'] == 'James T'
assert m['lastname'] == 'Murphy'
assert m['zip_code'] == '83880'
assert m['area_code'] == '018'
assert m['central'] == '154'
assert m['line_number'] == '6474'
assert m['color'] == 'green'
for line in invalid_lines.values():
m = regexes.find_dict(line, [regexes.get_regex2()])
assert not m
def test_regex3(valid_lines, invalid_lines):
"""Test method for regex3"""
m = regexes.find_dict(valid_lines['line3'], [regexes.get_regex3()])
assert m['firstname'] == 'Booker T.'
assert m['lastname'] == 'Washington'
assert m['zip_code'] == '87360'
assert m['area_code'] == '373'
assert m['central'] == '781'
assert m['line_number'] == '7380'
assert m['color'] == 'yellow'
for line in invalid_lines.values():
m = regexes.find_dict(line, [regexes.get_regex3()])
assert not m
def test_phone():
"""Test if Phone Number is valid"""
phone = '123-223-1212'
assert main.is_phone_valid(phone)
phone = '123-123-12312'
assert not main.is_phone_valid(phone)
def test_all_regex(valid_lines, invalid_lines):
"""Test all regexes"""
all_regex = [regexes.get_regex1(), regexes.get_regex2(), regexes.get_regex3()]
for line in valid_lines.values():
m = regexes.find_dict(line, all_regex)
assert m
for line in invalid_lines.values():
m = regexes.find_dict(line, all_regex)
assert not m
def test_check_line(invalid_lines):
"""Test main function for Exceptions"""
all_regex = [regexes.get_regex1(), regexes.get_regex2(), regexes.get_regex3()]
for line in invalid_lines.values():
try:
m = main.check_line(line, all_regex)
assert False # should never happen
except (errors.FormatException) as e:
assert True
line = "Booker T., Washington, 87360, 373 781 738012, yellow"
try:
m = percolate.check_line(line, all_regex)
assert False # should never happen
except (errors.PhoneNumberException) as e:
assert True