I was wondering if there's a way to iterate through each row in a CSV file using Pandas to identify if a word is found in that row (similar to using grep in Linux systems). It doesn't matter which column the word is found, as long as the word is found, the entire row will be parsed. I discovered the iterrows() function but I read that its very inefficient to use this method if the file is going to contain over 1000 rows and my program could be reading over 100,000 rows. Any suggestions is greatly appreciated!
#Code was tested using Python v3.9.5
import os
import pandas as pd
def parse_row(grep_value):
global import_file_path
global export_file_path
#Initializers loop counter for folder name
folder_counter = 0
path = os.path.join(export_file_path, "File Parser Exports")
#Creates extra directory if current directory exists
while os.path.isdir(path):
#Appends a number to the name of the folder
folder_counter += 1
path = os.path.join(export_file_path, "File Parser Exports" + " (" + str(folder_counter) + ")")
#Creates folder for exports after finding a folder name that is available
os.mkdir(path)
#Export file path for parsed file
full_export_path = path + "\Export.csv"
file_count = 0 #Initializer for file number of exported files
tmp_export_path = full_export_path #Temporary place holder for slicing export path
#Reads file with headers
file_data = pd.read_csv(import_file_path, lineterminator='\n')
#Iterate through file
for index, row in file_data.iterrows():
print(index)
print(row)
#Checks if export file exists in the newly created directory
while os.path.isfile(full_export_path):
#Appends a number to the file name
file_count += 1
tmp_export_path = tmp_export_path.rsplit('.', 1)[0]
file_name = "-" + str(file_count) + ".csv"
full_export_path = tmp_export_path + file_name
#Exports file after finding a file name that is available
file_data.to_csv(full_export_path, index=False)
print()
print("File(s) exported to \"" + path + "\"")
print("Successfully completed!")
export_file_path = "C:\\Users\\exportpath"
import_file_path = "C:\\Users\\importpath"
grep_value = "The"
parse_row(grep_value)