Skip to main content
added 4 characters in body
Source Link
toolic
  • 15.8k
  • 5
  • 29
  • 217

I have written a pythonPython script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv. C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv.csv file?

Here is the code:

import re

data = open('C:\file.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))

I have written a python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?

Here is the code:

import re

data = open('C:\file.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))

I have written a Python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?

Here is the code:

import re

data = open('C:\file.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))
Tweeted twitter.com/StackCodeReview/status/1118031506646155264
deleted 53 characters in body
Source Link
John
  • 41
  • 2

I have written a python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?

Here is the code:

import re

data = open('C:\\Users\\DrewCotten\\Downloads\\drew\\STL-INVENTORY-021319\file.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))

I have written a python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?

Here is the code:

import re

data = open('C:\\Users\\DrewCotten\\Downloads\\drew\\STL-INVENTORY-021319.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))

I have written a python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?

Here is the code:

import re

data = open('C:\file.txt', 'rU')

output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]

for line in data:
    dealer_code = re.search(r'(\w\d\d\d\d\d  )', line)
    dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
    
    if dealer_code and "/" not in line:
        line = line.replace('\n','')
        dealer_code_number = line.split()[0]
        if dealer_code_phone_search:
            dealer_code_phone = dealer_code_phone_search.group(0)
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
        else:
            dealer_code_phone = ''
            dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()


    elif "/" in line:
        line = line.replace('\n','')
        vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
        if vin:
            date = line.split(vin.group(0))[0].split()[0]
            descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
            rest = line.split(vin.group(0))[1].split()
            vin = vin.group(0)

            new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
            for i in rest:
                new_line.append(i)

            output.append(new_line)


for line in output:
    print(','.join(line))
edited tags
Link
200_success
  • 145.6k
  • 22
  • 191
  • 481
Source Link
John
  • 41
  • 2
Loading