import re
import sys
from collections import namedtuple
# Simple data objects are handy as hell. Use more of them.
Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')
Months = (
Month('January', 1, 31),
Month('February', 2, 28),
Month('March', 3, 31),
Month('April', 4, 30),
Month('May', 5, 31),
Month('June', 6, 30),
Month('July', 7, 31),
Month('August', 8, 31),
Month('September', 9, 30),
Month('October', 10, 31),
Month('November', 11, 30),
Month('December', 12, 31),
)
Cultures = (
Culture('yyyy-MM-dd', '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy/MM/dd', '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('MM/dd/yyyy', '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
Culture('MMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
Culture('yyyy, MMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)
# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.
def main(args):
start, end = args
ddate = diffdate(start, end)
print(ddate)
def diffdate(start, end):
d1 = totaldays(parsedate(start))
d2 = totaldays(parsedate(end))
return d2 - d1
def totaldays(date):
y = date.year
if date.month <= 2:
y -= 1
leaps = y // 4 - y // 100 + y // 400
m = sum(Months[b].days for b in range(date.month - 1))
days = (date.year - 1) * 365 + m + date.day + leaps
return days
def parsedate(date):
for c in Cultures:
if re.match(c.regex, date):
parts = date.replace(',', '').split(c.separator)
yearreturn =Date(
int(parts[c.indexes[0]]),
month = parsemonth(parts[c.indexes[1]]),
day = int(parts[c.indexes[2]]),
return Date(year, month, day)
# Raise an error if we get here.
def parsemonth(month):
if month.isdigit():
return int(month)
else:
for m in Months:
if m.name.lower().startswith(month.lower()):
return m.number
# Raise an error if we get here.
if __name__ == '__main__':
main(sys.argv[1:])
import re
import sys
from collections import namedtuple
# Simple data objects are handy as hell. Use more of them.
Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')
Months = (
Month('January', 1, 31),
Month('February', 2, 28),
Month('March', 3, 31),
Month('April', 4, 30),
Month('May', 5, 31),
Month('June', 6, 30),
Month('July', 7, 31),
Month('August', 8, 31),
Month('September', 9, 30),
Month('October', 10, 31),
Month('November', 11, 30),
Month('December', 12, 31),
)
Cultures = (
Culture('yyyy-MM-dd', '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy/MM/dd', '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('MM/dd/yyyy', '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
Culture('MMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
Culture('yyyy, MMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)
# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.
def main(args):
start, end = args
ddate = diffdate(start, end)
print(ddate)
def diffdate(start, end):
d1 = totaldays(parsedate(start))
d2 = totaldays(parsedate(end))
return d2 - d1
def totaldays(date):
y = date.year
if date.month <= 2:
y -= 1
leaps = y // 4 - y // 100 + y // 400
m = sum(Months[b].days for b in range(date.month - 1))
days = (date.year - 1) * 365 + m + date.day + leaps
return days
def parsedate(date):
for c in Cultures:
if re.match(c.regex, date):
parts = date.replace(',', '').split(c.separator)
year = int(parts[c.indexes[0]])
month = parsemonth(parts[c.indexes[1]])
day = int(parts[c.indexes[2]])
return Date(year, month, day)
# Raise an error if we get here.
def parsemonth(month):
if month.isdigit():
return int(month)
else:
for m in Months:
if m.name.lower().startswith(month.lower()):
return m.number
# Raise an error if we get here.
if __name__ == '__main__':
main(sys.argv[1:])
import re
import sys
from collections import namedtuple
# Simple data objects are handy as hell. Use more of them.
Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')
Months = (
Month('January', 1, 31),
Month('February', 2, 28),
Month('March', 3, 31),
Month('April', 4, 30),
Month('May', 5, 31),
Month('June', 6, 30),
Month('July', 7, 31),
Month('August', 8, 31),
Month('September', 9, 30),
Month('October', 10, 31),
Month('November', 11, 30),
Month('December', 12, 31),
)
Cultures = (
Culture('yyyy-MM-dd', '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy/MM/dd', '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('MM/dd/yyyy', '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
Culture('MMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
Culture('yyyy, MMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)
# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.
def main(args):
start, end = args
ddate = diffdate(start, end)
print(ddate)
def diffdate(start, end):
d1 = totaldays(parsedate(start))
d2 = totaldays(parsedate(end))
return d2 - d1
def totaldays(date):
y = date.year
if date.month <= 2:
y -= 1
leaps = y // 4 - y // 100 + y // 400
m = sum(Months[b].days for b in range(date.month - 1))
days = (date.year - 1) * 365 + m + date.day + leaps
return days
def parsedate(date):
for c in Cultures:
if re.match(c.regex, date):
parts = date.replace(',', '').split(c.separator)
return Date(
int(parts[c.indexes[0]]),
parsemonth(parts[c.indexes[1]]),
int(parts[c.indexes[2]]),
)
# Raise an error if we get here.
def parsemonth(month):
if month.isdigit():
return int(month)
else:
for m in Months:
if m.name.lower().startswith(month.lower()):
return m.number
# Raise an error if we get here.
if __name__ == '__main__':
main(sys.argv[1:])
Another case in point: don't drive your algorithm with data that requires
parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic
code with the unpacking logic. Just store the indexes directly as a tuple of
integers. The core problem in software writing is reducing and managing
complexity, and the greatest complexity is always algorithmic, and it's usually
lumpy, in the sense of being concentrated in specific parts of a program.
Investing more in creating "smarter" or "richer" data will help you keep the
algorithmic parts of the code easier to understand and maintain.
After the simplification from those changes are made, the splitter() function
seems unnecessary. Instead, I would suggest a different helper function just to
parse the month. And that logic doesn't require regex: you can use simpler
things like isdigit() and startswith(). Also, a Month should know its
number. Again, use richer data to simplify algorithm. After 30 years of writing
code, that's my top piece of advice.
If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in
Culture.separatorandCulture.indexes. Instead, you can grab the year, month, and day directly from there.Matchobject, by name. AgainYet again, smart data, simple code.When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use
datetime?
Another case in point: don't drive your algorithm with data that requires
parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic
code with the unpacking logic. Just store the indexes directly as a tuple of
integers. The core problem in software writing is reducing and managing
complexity, and the greatest complexity is always algorithmic, and it's usually
lumpy, in the sense of being concentrated in specific parts of a program.
Investing more in creating "smarter" or "richer" data will help you keep the
algorithmic parts of the code easier to understand and maintain.
After the simplification from those changes are made, the splitter() function
seems unnecessary. Instead, I would suggest a different helper function just to
parse the month. And that logic doesn't require regex: you can use simpler
things like isdigit() and startswith(). Also, a Month should know its
number. Again, use richer data to simplify algorithm. After 30 years of writing
code, that's my top piece of advice.
If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in
Culture.separatorandCulture.indexes. Instead, you can grab the year, month, and day directly from there.Matchobject, by name. Again, smart data, simple code.When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use
datetime?
Another case in point: don't drive your algorithm with data that requires
parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic
code with the unpacking logic. Just store the indexes directly as a tuple of
integers. The core problem in software writing is reducing and managing
complexity, and the greatest complexity is always algorithmic.
Investing more in creating "smarter" or "richer" data will help you keep the
algorithmic parts of the code easier to understand and maintain.
After the simplification from those changes are made, the splitter() function
seems unnecessary. Instead, I would suggest a different helper function just to
parse the month. And that logic doesn't require regex: you can use simpler
things like isdigit() and startswith(). Also, a Month should know its
number. Again, use richer data to simplify algorithm.
If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in
Culture.separatorandCulture.indexes. Instead, you can grab the year, month, and day directly from there.Matchobject, by name. Yet again, smart data, simple code.When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use
datetime?
Modern Python has so many conveniences for creating simple data objects (namedtuple, dataclass, and the excellent attrs library), that it's usually a mistake to structure your internal data using ordinary dicts, lists, or tuples. These simple data objects not only clean up the code from a readability perspective (as shown in multiple places below) but also encourage you to focus your attention where it should be: defining the meaningful data entities to facilitate and support the algorithmic needs of the program. Organize the data the right way, and the algorithms will usually flow naturally.
Case in point: you don't need an 8-way switch; you need richer data. The
splitter() arguments belong in the realm of data (Culture instances in the
code below), not algorithm.
Another case in point: don't drive your algorithm with data that requires
parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic
code with the unpacking logic. Just store the indexes directly as a tuple of
integers. The core problem in software writing is reducing and managing
complexity, and the greatest complexity is always algorithmic, and it's usually
lumpy, in the sense of being concentrated in specific parts of a program.
Investing more in creating "smarter" or "richer" data will help you keep the
algorithmic parts of the code easier to understand and maintain.
Python has a built-in sum() function.
When possible (which means almost always) iterate directly
over a collection (ie, list, tuple) rather than over the indexes.
If you need the indexes too, use enumerate().
Abandon your style of trying to cram extra logic on each line:
if foo > 1233: x = 12; y = 34
I went through that phase for a brief time early in my Python journey. It doesn't scale well and it's harder to maintain over the long term on a project. Again, complexity is the biggest foe in software engineering: one way to reducing complexity slightly (and increase readability) is to favor shorter lines of code over longer ones. Within reason, taller/skinnier texts are easier on readers than shorter/wider texts (the publishing industry has understood this for decades), so if you are cramming more code into single lines, you are economizing on the wrong thing (ie, saving lines while spending readability).
Similarly, abandon your aversion to importing parts of the Python standard
library. It's fine to avoid those things in the name of learning. But that's
the only good reason I know of. So get command-line arguments from sys.argv
guilt-free. It's the way to go!
After the simplification from those changes are made, the splitter() function
seems unnecessary. Instead, I would suggest a different helper function just to
parse the month. And that logic doesn't require regex: you can use simpler
things like isdigit() and startswith(). Also, a Month should know its
number. Again, use richer data to simplify algorithm. After 30 years of writing
code, that's my top piece of advice.
Other improvements for you to consider:
If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in
Culture.separatorandCulture.indexes. Instead, you can grab the year, month, and day directly from there.Matchobject, by name. Again, smart data, simple code.When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use
datetime?
import re
import sys
from collections import namedtuple
# Simple data objects are handy as hell. Use more of them.
Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')
Months = (
Month('January', 1, 31),
Month('February', 2, 28),
Month('March', 3, 31),
Month('April', 4, 30),
Month('May', 5, 31),
Month('June', 6, 30),
Month('July', 7, 31),
Month('August', 8, 31),
Month('September', 9, 30),
Month('October', 10, 31),
Month('November', 11, 30),
Month('December', 12, 31),
)
Cultures = (
Culture('yyyy-MM-dd', '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy/MM/dd', '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('MM/dd/yyyy', '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
Culture('MMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
Culture('yyyy, MMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)
# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.
def main(args):
start, end = args
ddate = diffdate(start, end)
print(ddate)
def diffdate(start, end):
d1 = totaldays(parsedate(start))
d2 = totaldays(parsedate(end))
return d2 - d1
def totaldays(date):
y = date.year
if date.month <= 2:
y -= 1
leaps = y // 4 - y // 100 + y // 400
m = sum(Months[b].days for b in range(date.month - 1))
days = (date.year - 1) * 365 + m + date.day + leaps
return days
def parsedate(date):
for c in Cultures:
if re.match(c.regex, date):
parts = date.replace(',', '').split(c.separator)
year = int(parts[c.indexes[0]])
month = parsemonth(parts[c.indexes[1]])
day = int(parts[c.indexes[2]])
return Date(year, month, day)
# Raise an error if we get here.
def parsemonth(month):
if month.isdigit():
return int(month)
else:
for m in Months:
if m.name.lower().startswith(month.lower()):
return m.number
# Raise an error if we get here.
if __name__ == '__main__':
main(sys.argv[1:])