Revisions to Python code to find difference between two dates

deleted 8 characters in body

Source Link

edited May 22, 2021 at 20:26

13.1k
2
29
40


import re
import sys
from collections import namedtuple

# Simple data objects are handy as hell. Use more of them.

Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')

Months = (
    Month('January', 1, 31),
    Month('February', 2, 28),
    Month('March', 3, 31),
    Month('April', 4, 30),
    Month('May', 5, 31),
    Month('June', 6, 30),
    Month('July', 7, 31),
    Month('August', 8, 31),
    Month('September', 9, 30),
    Month('October', 10, 31),
    Month('November', 11, 30),
    Month('December', 12, 31),
)

Cultures = (
    Culture('yyyy-MM-dd',    '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy/MM/dd',    '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('MM/dd/yyyy',    '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
    Culture('MMM dd, yyyy',  ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMM, yyyy',  ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
    Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
    Culture('yyyy, MMM dd',  ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)

# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.

def main(args):
    start, end = args
    ddate = diffdate(start, end)
    print(ddate)

def diffdate(start, end):
    d1 = totaldays(parsedate(start))
    d2 = totaldays(parsedate(end))
    return d2 - d1

def totaldays(date):
    y = date.year
    if date.month <= 2:
        y -= 1
    leaps = y // 4 - y // 100 + y // 400
    m = sum(Months[b].days for b in range(date.month - 1))
    days = (date.year - 1) * 365 + m + date.day + leaps
    return days

def parsedate(date):
    for c in Cultures:
        if re.match(c.regex, date):
            parts = date.replace(',', '').split(c.separator)
            yearreturn =Date(
                int(parts[c.indexes[0]]),
            month =   parsemonth(parts[c.indexes[1]]),
            day =   int(parts[c.indexes[2]]),
            return Date(year, month, day)
    # Raise an error if we get here.

def parsemonth(month):
    if month.isdigit():
        return int(month)
    else:
        for m in Months:
            if m.name.lower().startswith(month.lower()):
                return m.number
    # Raise an error if we get here.

if __name__ == '__main__':
    main(sys.argv[1:])


import re
import sys
from collections import namedtuple

# Simple data objects are handy as hell. Use more of them.

Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')

Months = (
    Month('January', 1, 31),
    Month('February', 2, 28),
    Month('March', 3, 31),
    Month('April', 4, 30),
    Month('May', 5, 31),
    Month('June', 6, 30),
    Month('July', 7, 31),
    Month('August', 8, 31),
    Month('September', 9, 30),
    Month('October', 10, 31),
    Month('November', 11, 30),
    Month('December', 12, 31),
)

Cultures = (
    Culture('yyyy-MM-dd',    '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy/MM/dd',    '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('MM/dd/yyyy',    '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
    Culture('MMM dd, yyyy',  ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMM, yyyy',  ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
    Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
    Culture('yyyy, MMM dd',  ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)

# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.

def main(args):
    start, end = args
    ddate = diffdate(start, end)
    print(ddate)

def diffdate(start, end):
    d1 = totaldays(parsedate(start))
    d2 = totaldays(parsedate(end))
    return d2 - d1

def totaldays(date):
    y = date.year
    if date.month <= 2:
        y -= 1
    leaps = y // 4 - y // 100 + y // 400
    m = sum(Months[b].days for b in range(date.month - 1))
    days = (date.year - 1) * 365 + m + date.day + leaps
    return days

def parsedate(date):
    for c in Cultures:
        if re.match(c.regex, date):
            parts = date.replace(',', '').split(c.separator)
            year = int(parts[c.indexes[0]])
            month = parsemonth(parts[c.indexes[1]])
            day = int(parts[c.indexes[2]])
            return Date(year, month, day)
    # Raise an error if we get here.

def parsemonth(month):
    if month.isdigit():
        return int(month)
    else:
        for m in Months:
            if m.name.lower().startswith(month.lower()):
                return m.number
    # Raise an error if we get here.

if __name__ == '__main__':
    main(sys.argv[1:])


import re
import sys
from collections import namedtuple

# Simple data objects are handy as hell. Use more of them.

Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')

Months = (
    Month('January', 1, 31),
    Month('February', 2, 28),
    Month('March', 3, 31),
    Month('April', 4, 30),
    Month('May', 5, 31),
    Month('June', 6, 30),
    Month('July', 7, 31),
    Month('August', 8, 31),
    Month('September', 9, 30),
    Month('October', 10, 31),
    Month('November', 11, 30),
    Month('December', 12, 31),
)

Cultures = (
    Culture('yyyy-MM-dd',    '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy/MM/dd',    '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('MM/dd/yyyy',    '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
    Culture('MMM dd, yyyy',  ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMM, yyyy',  ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
    Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
    Culture('yyyy, MMM dd',  ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)

# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.

def main(args):
    start, end = args
    ddate = diffdate(start, end)
    print(ddate)

def diffdate(start, end):
    d1 = totaldays(parsedate(start))
    d2 = totaldays(parsedate(end))
    return d2 - d1

def totaldays(date):
    y = date.year
    if date.month <= 2:
        y -= 1
    leaps = y // 4 - y // 100 + y // 400
    m = sum(Months[b].days for b in range(date.month - 1))
    days = (date.year - 1) * 365 + m + date.day + leaps
    return days

def parsedate(date):
    for c in Cultures:
        if re.match(c.regex, date):
            parts = date.replace(',', '').split(c.separator)
            return Date(
                int(parts[c.indexes[0]]),
                parsemonth(parts[c.indexes[1]]),
                int(parts[c.indexes[2]]),
            )
    # Raise an error if we get here.

def parsemonth(month):
    if month.isdigit():
        return int(month)
    else:
        for m in Months:
            if m.name.lower().startswith(month.lower()):
                return m.number
    # Raise an error if we get here.

if __name__ == '__main__':
    main(sys.argv[1:])

deleted 152 characters in body

Source Link

edited May 22, 2021 at 20:18

FMc

13.1k
2
29
40

Another case in point: don't drive your algorithm with data that requires parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic code with the unpacking logic. Just store the indexes directly as a tuple of integers. The core problem in software writing is reducing and managing complexity, and the greatest complexity is always algorithmic, and it's usually lumpy, in the sense of being concentrated in specific parts of a program. Investing more in creating "smarter" or "richer" data will help you keep the algorithmic parts of the code easier to understand and maintain.

After the simplification from those changes are made, the splitter() function seems unnecessary. Instead, I would suggest a different helper function just to parse the month. And that logic doesn't require regex: you can use simpler things like isdigit() and startswith(). Also, a Month should know its number. Again, use richer data to simplify algorithm. After 30 years of writing code, that's my top piece of advice.

If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in Culture.separator and Culture.indexes. Instead, you can grab the year, month, and day directly from the re.Match object, by name. AgainYet again, smart data, simple code.
When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use datetime?

Another case in point: don't drive your algorithm with data that requires parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic code with the unpacking logic. Just store the indexes directly as a tuple of integers. The core problem in software writing is reducing and managing complexity, and the greatest complexity is always algorithmic, and it's usually lumpy, in the sense of being concentrated in specific parts of a program. Investing more in creating "smarter" or "richer" data will help you keep the algorithmic parts of the code easier to understand and maintain.

After the simplification from those changes are made, the splitter() function seems unnecessary. Instead, I would suggest a different helper function just to parse the month. And that logic doesn't require regex: you can use simpler things like isdigit() and startswith(). Also, a Month should know its number. Again, use richer data to simplify algorithm. After 30 years of writing code, that's my top piece of advice.

If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in Culture.separator and Culture.indexes. Instead, you can grab the year, month, and day directly from the re.Match object, by name. Again, smart data, simple code.
When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use datetime?

Another case in point: don't drive your algorithm with data that requires parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic code with the unpacking logic. Just store the indexes directly as a tuple of integers. The core problem in software writing is reducing and managing complexity, and the greatest complexity is always algorithmic. Investing more in creating "smarter" or "richer" data will help you keep the algorithmic parts of the code easier to understand and maintain.

After the simplification from those changes are made, the splitter() function seems unnecessary. Instead, I would suggest a different helper function just to parse the month. And that logic doesn't require regex: you can use simpler things like isdigit() and startswith(). Also, a Month should know its number. Again, use richer data to simplify algorithm.

If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in Culture.separator and Culture.indexes. Instead, you can grab the year, month, and day directly from the re.Match object, by name. Yet again, smart data, simple code.
When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use datetime?

Source Link

answered May 22, 2021 at 20:08

FMc

13.1k
2
29
40

Modern Python has so many conveniences for creating simple data objects (namedtuple, dataclass, and the excellent attrs library), that it's usually a mistake to structure your internal data using ordinary dicts, lists, or tuples. These simple data objects not only clean up the code from a readability perspective (as shown in multiple places below) but also encourage you to focus your attention where it should be: defining the meaningful data entities to facilitate and support the algorithmic needs of the program. Organize the data the right way, and the algorithms will usually flow naturally.

Case in point: you don't need an 8-way switch; you need richer data. The splitter() arguments belong in the realm of data (Culture instances in the code below), not algorithm.

Another case in point: don't drive your algorithm with data that requires parsing. For example, if you think you need a tuple of indexes like (2, 0, 1), don't store it in a comma-delimited string and complicate your algorithmic code with the unpacking logic. Just store the indexes directly as a tuple of integers. The core problem in software writing is reducing and managing complexity, and the greatest complexity is always algorithmic, and it's usually lumpy, in the sense of being concentrated in specific parts of a program. Investing more in creating "smarter" or "richer" data will help you keep the algorithmic parts of the code easier to understand and maintain.

Python has a built-in sum() function.

When possible (which means almost always) iterate directly over a collection (ie, list, tuple) rather than over the indexes. If you need the indexes too, use enumerate().

Abandon your style of trying to cram extra logic on each line:

if foo > 1233: x = 12; y = 34

I went through that phase for a brief time early in my Python journey. It doesn't scale well and it's harder to maintain over the long term on a project. Again, complexity is the biggest foe in software engineering: one way to reducing complexity slightly (and increase readability) is to favor shorter lines of code over longer ones. Within reason, taller/skinnier texts are easier on readers than shorter/wider texts (the publishing industry has understood this for decades), so if you are cramming more code into single lines, you are economizing on the wrong thing (ie, saving lines while spending readability).

Similarly, abandon your aversion to importing parts of the Python standard library. It's fine to avoid those things in the name of learning. But that's the only good reason I know of. So get command-line arguments from sys.argv guilt-free. It's the way to go!

After the simplification from those changes are made, the splitter() function seems unnecessary. Instead, I would suggest a different helper function just to parse the month. And that logic doesn't require regex: you can use simpler things like isdigit() and startswith(). Also, a Month should know its number. Again, use richer data to simplify algorithm. After 30 years of writing code, that's my top piece of advice.

Other improvements for you to consider:

If you invest more in your regular expressions so that they use named captures, you can drop the tedious details in Culture.separator and Culture.indexes. Instead, you can grab the year, month, and day directly from the re.Match object, by name. Again, smart data, simple code.
When given invalid input, your code just blows up. Give some thought to whether you want to invest in better error handling/reporting.
Dates are notoriously complicated. There are probably weird edge-case bugs in your code. Are you sure you don't want to use datetime?


import re
import sys
from collections import namedtuple

# Simple data objects are handy as hell. Use more of them.

Month = namedtuple('Month', 'name number days')
Culture = namedtuple('Culture', 'format separator indexes regex')
Date = namedtuple('Date', 'year month day')

Months = (
    Month('January', 1, 31),
    Month('February', 2, 28),
    Month('March', 3, 31),
    Month('April', 4, 30),
    Month('May', 5, 31),
    Month('June', 6, 30),
    Month('July', 7, 31),
    Month('August', 8, 31),
    Month('September', 9, 30),
    Month('October', 10, 31),
    Month('November', 11, 30),
    Month('December', 12, 31),
)

Cultures = (
    Culture('yyyy-MM-dd',    '-', (0,1,2), '^\d{4}\-0?([2-9]|1[0-2]?)\-(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy/MM/dd',    '/', (0,1,2), '^\d{4}\/0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('MM/dd/yyyy',    '/', (2,0,1), '^0?([2-9]|1[0-2]?)\/(0?(3[01]|[12][0-9]|[1-9]))\/\d{4}$'),
    Culture('MMM dd, yyyy',  ' ', (2,0,1), '^[A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMM, yyyy',  ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3}, \d{4}$'),
    Culture('MMMM dd, yyyy', ' ', (2,0,1), '^[A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9])), \d{4}$'),
    Culture('dd MMMM, yyyy', ' ', (2,1,0), '^(0?(3[01]|[12][0-9]|[1-9])) [A-Za-z]{3,9}, \d{4}$'),
    Culture('yyyy, MMM dd',  ' ', (0,1,2), '^\d{4}, [A-Za-z]{3} (0?(3[01]|[12][0-9]|[1-9]))$'),
    Culture('yyyy, MMMM dd', ' ', (0,1,2), '^\d{4}, [A-Za-z]{3,9} (0?(3[01]|[12][0-9]|[1-9]))$'),
)

# A purely stylistic point: organize modules the way you want to read them.
# Most humans like to read things top to bottom.

def main(args):
    start, end = args
    ddate = diffdate(start, end)
    print(ddate)

def diffdate(start, end):
    d1 = totaldays(parsedate(start))
    d2 = totaldays(parsedate(end))
    return d2 - d1

def totaldays(date):
    y = date.year
    if date.month <= 2:
        y -= 1
    leaps = y // 4 - y // 100 + y // 400
    m = sum(Months[b].days for b in range(date.month - 1))
    days = (date.year - 1) * 365 + m + date.day + leaps
    return days

def parsedate(date):
    for c in Cultures:
        if re.match(c.regex, date):
            parts = date.replace(',', '').split(c.separator)
            year = int(parts[c.indexes[0]])
            month = parsemonth(parts[c.indexes[1]])
            day = int(parts[c.indexes[2]])
            return Date(year, month, day)
    # Raise an error if we get here.

def parsemonth(month):
    if month.isdigit():
        return int(month)
    else:
        for m in Months:
            if m.name.lower().startswith(month.lower()):
                return m.number
    # Raise an error if we get here.

if __name__ == '__main__':
    main(sys.argv[1:])

Stack Exchange Network

Return to Answer