Skip to main content
Dedent over-indentation.
Source Link
Peilonrayz
  • 44.5k
  • 7
  • 80
  • 158
    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)
    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)
import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
    temp['Race'] = name
    return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)
    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)
    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)
Source Link

First program in Python for data analysis

I'm just starting my adventure with python and I wanted to share my first project with you and ask for feedback and advice. This is a script for my friend to automate all calculations and database operations.

What my code does:

  1. Merge together the csv files in the specified folder forming two sets.
  2. Calculate the difference in days between the two date columns.
  3. Filter the data over a given date range.
  4. Checking that the id from the data frame L is not part of the M.
  5. Creates output tables where the results will be stored.
  6. Mainly calculation of the average of individual columns after preparation
  7. Saving the results in a table

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

In the future, I would like to transfer this code to Django as one of the tools.

Please advise me what I should change, what I should avoid and if there is something correct in this code you can also let me know.