Dedent over-indentation.

Source Link

edited Jan 7, 2020 at 13:16

44.5k
7
80
158

    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)

    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
    temp['Race'] = name
    return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

add identation and python syntax

Source Link

edit approved Jan 7, 2020 at 12:21

Raphael

160
5

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

    import re
    from datetime import datetime
    import pandas as pd
    import numpy as np
    from tkinter import filedialog
    from tkinter import *
    import os, sys
    import glob
    from matplotlib.backends.backend_pdf import PdfPages
    import time

    listOfFiles = []
    tempDF = pd.DataFrame()
    tempDF2 = pd.DataFrame()


    def get_filenames():
        Tk().withdraw()
        print("Initializing Dialogue... \\nPlease select a file.")
        tk_filenames = filedialog.askdirectory()
        tempDir = tk_filenames
        return tempDir


    choosen_dir = get_filenames()

    os.chdir(choosen_dir)
    for file in glob.glob("*.csv"):
        listOfFiles.append(file)

    for file in listOfFiles:
        if file.startswith('L'):
            Table = pd.read_csv(file, sep=';')
            DF1 = pd.DataFrame(Table)
            tempDF = tempDF.append(DF1, sort=False)
        elif file.startswith('M'):
            Table2 = pd.read_csv(file, sep=';')
            DF2 = pd.DataFrame(Table2)
            tempDF2 = tempDF2.append(DF2, sort=False)
        else:
            print('404')

    DFL = tempDF
    DFM = tempDF2


    class ToDate():

        def change_to_date(self, a, b):
            a[b] = pd.to_datetime(a[b])


    dat = ToDate()
    dat.change_to_date(DFL, 'LDUR')
    dat.change_to_date(DFL, 'LDOP')
    dat.change_to_date(DFM, 'LDOP')
    dat.change_to_date(DFM, 'LDUR')
    DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
    DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
    DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
    DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

    date1 = input('Date range from YYYY-MM-DD')
    date2 = input('Date range to YYYY-MM-DD')

    DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
    DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

    DFM_BL = DFM
    ListL = DFL.ID1.tolist()
    DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

    def createDT(name):
        temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                                   'first litter.','more than first litter',
                                   'amount_21','lose_21','nip_count',
                                   'age','between_litter'],index =[1])
        temp['Race'] = name
        return temp

    def sortedDT(DT, col, number):
        newDT = DT[DT[col] == number]
        return newDT

    def months(DT, col, col2):
        newDT = DT[DT[col] != DT[col2]]
        return newDT


    def birth(DFL, DFM):
        if len(DFL) > 0:
            pigs = DFL[['LIL11', 'LIL21']]
            pigs2 = DFM[['LIL11', 'LIL21']]
            pigs = pigs.append(pigs2)
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs
        else:
            pigs = DFM[['LIL11', 'LIL21']]
            pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
            pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
            return pigs


    def sum_digits(n):
        s = 0
        while n.all():
            s += n % 10
            n //= 10
        return s


    def all_count(DFL, DFM, DFM_BL, n, new):
        if len(DFL) > 0:
            s = sum_digits(n)
            pigs = birth(DFL, DFM)
            DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
            new['first litter'] = len(DFL)
            new['more than first litter'] = len(DFL) + len(DFM)
            new['number of individuals'] = len(DFL) + len(DFM_BL)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = np.around(DFL['M_WAGUR'].mean())
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = np.around(s.mean(), decimals=2)

            return new

        else:
            pigs = birth(DFL, DFM)
            dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
            new['first litter'] = "-"
            new['more than first litter'] = len(DFM)
            new['number of individuals'] = len(dfm)
            new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
            new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
            new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
            new['age'] = "-"
            new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
            new['nip_count'] = "-"

            return new

    W1R43 = createDT('W1R43')
    L_W1R43 = sortedDT(DFL,'ID1',10).copy()
    M_W1R43 = sortedDT(DFM,'ID1',10).copy()
    BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
    all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

    last = pd.concat([W1R43, ... ])
    last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
    last.to_html('Results.html',index=False)

Source Link

asked Jan 7, 2020 at 11:51

Kepasere

43
3

First program in Python for data analysis

I'm just starting my adventure with python and I wanted to share my first project with you and ask for feedback and advice. This is a script for my friend to automate all calculations and database operations.

What my code does:

Merge together the csv files in the specified folder forming two sets.
Calculate the difference in days between the two date columns.
Filter the data over a given date range.
Checking that the id from the data frame L is not part of the M.
Creates output tables where the results will be stored.
Mainly calculation of the average of individual columns after preparation
Saving the results in a table

Code:

import re
from datetime import datetime
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
from matplotlib.backends.backend_pdf import PdfPages
import time

listOfFiles = []
tempDF = pd.DataFrame()
tempDF2 = pd.DataFrame()


def get_filenames():
    Tk().withdraw()
    print("Initializing Dialogue... \\nPlease select a file.")
    tk_filenames = filedialog.askdirectory()
    tempDir = tk_filenames
    return tempDir


choosen_dir = get_filenames()

os.chdir(choosen_dir)
for file in glob.glob("*.csv"):
    listOfFiles.append(file)

for file in listOfFiles:
    if file.startswith('L'):
        Table = pd.read_csv(file, sep=';')
        DF1 = pd.DataFrame(Table)
        tempDF = tempDF.append(DF1, sort=False)
    elif file.startswith('M'):
        Table2 = pd.read_csv(file, sep=';')
        DF2 = pd.DataFrame(Table2)
        tempDF2 = tempDF2.append(DF2, sort=False)
    else:
        print('404')

DFL = tempDF
DFM = tempDF2


class ToDate():

    def change_to_date(self, a, b):
        a[b] = pd.to_datetime(a[b])


dat = ToDate()
dat.change_to_date(DFL, 'LDUR')
dat.change_to_date(DFL, 'LDOP')
dat.change_to_date(DFM, 'LDOP')
dat.change_to_date(DFM, 'LDUR')
DFL['M_WAGUR'] = DFL['LDOP'] - DFL['LDUR']
DFM['M_WAGUR'] = DFM['LDUR'] - DFM['LDOP']
DFL['M_WAGUR'] = DFL['M_WAGUR'] / np.timedelta64(1, 'D')
DFM['M_WAGUR'] = DFM['M_WAGUR'] / np.timedelta64(1, 'D')

date1 = input('Date range from YYYY-MM-DD')
date2 = input('Date range to YYYY-MM-DD')

DFL = DFL[(DFL['LDOP'] >= date1) & (DFL['LDOP'] <= date2)]
DFM = DFM[(DFM['LDUR'] >= date1) & (DFM['LDUR'] <= date2)]

DFM_BL = DFM
ListL = DFL.ID1.tolist()
DFM_BL = DFM_BL[~DFM_BL.ID1.isin(ListL)]

def createDT(name):
    temp = pd.DataFrame(columns = ['Race','number of individuals','littering         youngsters',
                               'first litter.','more than first litter',
                               'amount_21','lose_21','nip_count',
                               'age','between_litter'],index =[1])
temp['Race'] = name
return temp

def sortedDT(DT, col, number):
    newDT = DT[DT[col] == number]
    return newDT

def months(DT, col, col2):
    newDT = DT[DT[col] != DT[col2]]
    return newDT


def birth(DFL, DFM):
    if len(DFL) > 0:
        pigs = DFL[['LIL11', 'LIL21']]
        pigs2 = DFM[['LIL11', 'LIL21']]
        pigs = pigs.append(pigs2)
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs
    else:
        pigs = DFM[['LIL11', 'LIL21']]
        pigs['loses'] = pigs['LIL11'] - pigs['LIL21']
        pigs['ST21'] = (pigs['loses'] / pigs['LIL11']) * 100
        return pigs


def sum_digits(n):
    s = 0
    while n.all():
        s += n % 10
        n //= 10
    return s


def all_count(DFL, DFM, DFM_BL, n, new):
    if len(DFL) > 0:
        s = sum_digits(n)
        pigs = birth(DFL, DFM)
        DFM_BL = DFM_BL.drop_duplicates(subset='ID1', keep="first").copy()
        new['first litter'] = len(DFL)
        new['more than first litter'] = len(DFL) + len(DFM)
        new['number of individuals'] = len(DFL) + len(DFM_BL)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = np.around(DFL['M_WAGUR'].mean())
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = np.around(s.mean(), decimals=2)

        return new

    else:
        pigs = birth(DFL, DFM)
        dfm = DFM.drop_duplicates(subset='LPROS1', keep="first").copy()
        new['first litter'] = "-"
        new['more than first litter'] = len(DFM)
        new['number of individuals'] = len(dfm)
        new['littering youngsters'] = np.around(pigs['LIL11'].mean(), decimals=2)
        new['amount_21'] = np.around(pigs['LIL21'].mean(), decimals=2)
        new['lose_21'] = np.around(pigs['ST21'].mean(), decimals=2)
        new['age'] = "-"
        new['between_litter'] = np.around(DFM['M_WAGUR'].mean())
        new['nip_count'] = "-"

        return new

W1R43 = createDT('W1R43')
L_W1R43 = sortedDT(DFL,'ID1',10).copy()
M_W1R43 = sortedDT(DFM,'ID1',10).copy()
BL_W1R43 = sortedDT(DFM_BL,'ID1',10).copy()
all_count(L_W1R43,MW1R43,BL_W1R43,W1R43['LSUT1'],W1R43)

last = pd.concat([W1R43, ... ])
last.to_csv('Results.txt', header=True, index=False, sep='\t', mode='w')
last.to_html('Results.html',index=False)

In the future, I would like to transfer this code to Django as one of the tools.

Please advise me what I should change, what I should avoid and if there is something correct in this code you can also let me know.

Stack Exchange Network

Return to Question

First program in Python for data analysis