Ok, so this ended up being a little bit longer/more complicated than I hoped. I will do my best to explain so that you can follow along. Please ask questions if you get lost or confused!
Code first, then explanations:
Option Explicit
Private Type TRecord
To As String
From As String
Subject As String
ReceivedDate As Date
InFolder As String
Size As String
Weekday As String
RecordDate As Date
Month As String
Year As String
Time As String
InOut As String
End Type
Sub New_Fix_Table()
' Be sure to add 'Option Explicit' at the top of your modules. This prevents undeclared variables from slipping through.
' Never use underscores in names. They have special meaning to the interpreter.
' table_size = Range("Table1").Rows.Count
' ## Not Needed due to UBound/LBound ##
' Dim tableSize As Long
' Be sure to also properly qualify you range references.
' TableSize = ActiveWorkbook.Range("Table1").Rows.Count - Without proper qualification, your Range is really ActiveWorkbook.Range
' tableSize = ThisWorkbook.Range("Table1").Rows.Count
' ## ##
'Goes through Table1 in "To" column and fixes the recipients
Dim i As Long
' For loops such as these, I prefer for loops
' Do While i <= table_size
' I strongly prefer arrays for this purpose. If it was my own project, I even would use classes, but one step at a time for now.
' Change this to point to the correct worksheet.
Dim inputSheet As Worksheet
Set inputSheet = ThisWorkbook.Worksheets("TargetSheet")
' If your data is in a table, then you can use this method instead of referring to the range.
Dim tableData As Variant
tableData = inputSheet.ListObjects(1).Range.value
' Now, here is a trick I use when processing table data in a much more efficient manner.
' This does require a reference to Microsoft Scripting Runtime
Dim headerIndices As Scripting.Dictionary
Set headerIndices = GetHeaderIndices(tableData)
' Now we have a dictionary where we can use a key and return the index position of that key
' This is where it gets a little bit tricky. If we encounter a row with multiple emails, we need to duplicate the records.
' Otherwise, we want to keep the records as is. For this task, collections to the rescue!
' Having declared a Record Type, I can now use the Type as a container for my data (without needing a class)
Dim record As TRecord
' The records collection will contain the created records
Dim records As Collection
Set records = New Collection
Dim i As Long
' We loop through arrays using LBound and Ubound (lower bound, upper bound). The '1' denotes rows, whereas '2' would denote columns.
' I add 1 to the lower bound so I can skip the header row.
For i = LBound(tableData, 1) + 1 To UBound(tableData, 1)
' Set all the properties of the record.
record.From = tableData(i, headerIndices("From"))
record.Subject = tableData(i, headerIndices("Subject"))
record.ReceivedDate = tableData(i, headerIndices("Received_Date"))
record.InFolder = tableData(i, headerIndices("In_Folder"))
record.Size = tableData(i, headerIndices("Size"))
record.Weekday = tableData(i, headerIndices("Weekday"))
record.RecordDate = tableData(i, headerIndices("Date"))
record.Month = tableData(i, headerIndices("Month"))
record.Year = tableData(i, headerIndices("Year"))
record.Time = tableData(i, headerIndices("Time"))
record.InOut = tableData(i, headerIndices("In/out"))
' Split the addresses. If there are multiple addresses we dont need to rewrite the record, we just need to adjust the To field.
Dim splitAddresses As Variant
If InStr(tableData(i, headerIndices("To")), ";") > 0 Then
splitAddresses = Split(tableData(i, headerIndices("To")), ";")
Dim j As Long
For j = LBound(splitAddresses) To UBound(splitAddresses)
If Len(splitAddresses(i)) > 1 Then
record.To = splitAddresses(i)
records.Add record
End If
Next
Else
record.To = tableData(i, headerIndices("To"))
records.Add record
End If
Next
' Now we have a colleciton of all of the records we need. Now, we need to translate those back into an array.
Dim outputData As Variant
' The row is 0 based so we can add headers, but the headerIndices dictionary is already 1-based, so we leave the columns 1 based.
' Admittedly, I would avoid a mis-match of bases for re-dimming an array, I am only doing it this way to prevent confusion.
ReDim outputData(0 To records.Count, 1 To headerIndices.Count)
' An array with the same base-dimensions would be one of the two following:
' ReDim outputData(0 To records.Count, 0 To headerIndices.Count - 1)
' ReDim outputData(1 To records.Count + 1, 1 To headerIndices.Count)
' You would then need to adjust the actual filling of the array as well.
i = LBound(outputData, 2)
Dim header As Variant
' Loop through all of the stored headers
For Each header In headerIndices.Keys
' The LBound here dynamically points to the header row.
outputData(LBound(outputData, 1), i) = header
Next
' This way we can dynamically fill in the array.
Set headerIndices = GetHeaderIndices(outputData)
i = LBound(outputData, 2) + 1
For Each record In records
outputData(i, headerIndices("To")) = record.To
outputData(i, headerIndices("From")) = record.From
outputData(i, headerIndices("Subject")) = record.Subject
outputData(i, headerIndices("Received_Date")) = record.ReceivedDate
outputData(i, headerIndices("In_Folder")) = record.InFolder
outputData(i, headerIndices("Size")) = record.Size
outputData(i, headerIndices("Weekday")) = record.Weekday
outputData(i, headerIndices("Date")) = record.RecordDate
outputData(i, headerIndices("Month")) = record.Month
outputData(i, headerIndices("Year")) = record.Year
outputData(i, headerIndices("Time")) = record.Time
outputData(i, headerIndices("In/out")) = record.InOut
Next
' Now we just have to put the output data somewhere. Let's reuse the sheet we pulled from.
OutputArray outputData, inputSheet, "Output_Data"
End Sub
Public Function GetHeaderIndices(ByVal InputData As Variant) As Scripting.Dictionary
If IsEmpty(InputData) Then Exit Function
Dim headerIndices As Scripting.Dictionary
Set headerIndices = New Scripting.Dictionary
headerIndices.CompareMode = TextCompare
Dim i As Long
For i = LBound(InputData, 2) To UBound(InputData, 2)
If Not headerIndices.Exists(Trim(InputData(LBound(InputData, 1), i))) Then _
headerIndices.Add Trim(InputData(LBound(InputData, 1), i)), i
Next
Set GetHeaderIndices = headerIndices
End Function
Public Sub OutputArray(ByVal InputArray As Variant, ByVal InputWorksheet As Worksheet, ByVal TableName As String)
Dim AddLengthH As Long
Dim AddLengthW As Long
If NumberOfArrayDimensions(InputArray) = 2 Then
If LBound(InputArray, 1) = 0 Then AddLengthH = 1
If LBound(InputArray, 2) = 0 Then AddLengthW = 1
Dim r As Range
If Not InputWorksheet Is Nothing Then
With InputWorksheet
.Cells.Clear
Set r = .Range("A1").Resize(UBound(InputArray, 1) + AddLengthH, UBound(InputArray, 2) + AddLengthW)
r.value = InputArray
.ListObjects.Add(xlSrcRange, r, , xlYes).Name = TableName
With .ListObjects(1).Sort
.header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
End With
End If
End If
End Sub
Initial Notes
The first thing that struck me about your code was that you literally have no variable declarations. Lines like :
cell_value = ...
Are pretty much the same as :
Dim cell_value as Variant
cell_value = ...
The only difference between the two is that the second is at least explicit about wanting a variant. The first is implicit.
First bit of advice avoid implicit commands as much as possible. The reason for this is quite simple, there is a tendency to think that the computer is magically doing something it shouldnt be, but really you told it to do exactly what it is doing and as a result, you have a bug that can be nearly invisible.
Consider for example :
myRange = Range("SomeRange")
This declares a myRange (which the reader expects to be a Range) and converts the range to an array. This in itself is confusing, but, even worse, I can still do :
Set myRange = Range("SomeRange")
And it is now a range reference (the only difference being the Set keyword). While it is easy to read the code and determine what is happening for us, you will inevitably lose a bug in there that you will have to search for.
Option Explicit to the Rescue!
Option Explicit is one of the best things in VBA. It is truly simple, but it makes the simplest of bugs super simple to prevent (and even simpler to find). With Option Explicit at the top of a module, the compiler will throw an error when a variable isnt declared.
' This won't compile. Note the minor (but potentially difficult to find) spelling error between Very and Vary.
Dim SomeVeryLongVariableName as Long
SomeVaryLongVariableName = 10
To make Option Explicit a breeze to use:
- Open the Developer Window
- Press
CTRL+T and then CTRL+O.
- Check the box for
Require Variable Declaration.
- While you're in there, I recommend going to the general tab and selecting
Break in Class Module under error trapping.
Qualifying References
One of the most common mistakes someone new to VBA will make is doing:
SomeVariable = Range("SomeRange")
' or
SomeVariable = Range("SomeRange").Value
There are two problems with the first version. The problem that is solved by the second version is that we specify the property we are accessing. The default property of a Range is .Value so we don't need .Value, but it is discouraged to implicitly access .Value.
The second issue is that we rely implicitly on ActiveSheet.Range("SomeRange"). This is a silent killer. I refuse to work with the Active Anything unless I absolutely must and even then, I prefer not to. It is always best to specifically call the object you are working with.
' This is literally better than not using a worksheet reference at all
Dim Foo as Worksheet
Set Foo = ActiveSheet
...many lines later...
DoSomething Foo
Why is this little change better? It is using the active sheet! While not ideal, it at least ensures that Foo points to the same worksheet unless we explicitly change the worksheet it is pointing to. It is stronger than the ActiveSheet reference.
Even better would be:
Dim Foo as Worksheet
Set Foo = ThisWorkbook.WorkSheets("SomeFoo")
The ThisWorkbook object is the workbook that is running the code, and by using a string argument as a call to the collection we can return the specific worksheet back.
Working on the Worksheet
Now lets get into the meat of the problem. Your code is slow because you are operating on the worksheet. Its that simple. A hundred calls to the Worksheet.Range will be slower than a hundred calls to Data(x, y). An array is faster, and is easier to use.
Even worse is when you not only access the cells by a range reference, but when you do :
EntireRow.Delete
Now you've really upset the worksheet. It has to update calculations, it has to resize stuff, fix formatting (if in a table), check number formatting, etc. It is a costly operation. If you're deleting a lot of rows...avoid it at all cost.
Enter the world of arrays. Not only are they fast but they are easy. Worksheet.Cells(1, 1) becomes Data(1, 1). Once you load in the data to the array (Data) you can manipulate, access, delete the values all you want. The worksheet doesn't care. It doesnt see what is happening to the same data it was previously responsible for.
Putting It Together
I am not going to go through the code line by line, particularly because I already provided in-line comments to help make the code a bit easier to read. This will be a broad explanation of what the code does.
- First, it loads in all the data from a
ListObject (excel Table)
into an array. That's the easy part.
- Once we have the data, we need to know the indices of the headers.
This makes manipulating the data much easier. It also allows you to
move columns around all you want without breaking the code (just
ensure the names are still there).
- Using a custom
Type we can store all of the data in a defined
structure. A Type is similar to a Struct in other languages. In
essence, it is a variable that has properties, but that isnt an
object. Thus, it cannot be New'ed up.
- Loop through all of the rows, and create new records for cells with
multiple addresses. Since the
Type cannot be New'ed, it will
retain old values. This means we dont need to re-create the entire
record for each new row. We just need to change the new values,
then add it to the collection.
- Once the collection is loaded with records, we can translate them
into a new array that is appropriately sized. No need to add/remove
rows. It is just the right size at creation.
- The
OutputArray method will take an array and a worksheet and it
will clear the cells on that worksheet, put the array onto the
worksheet, and then turn that output into a table. Point it where
you want the output to go, and it will do the rest.
I didn't test the code on my end (I didnt want to bother creating a test table), but I imagine it should run in a matter of seconds.
Note : Microsoft Scripting Runtime
To use the code as is, you will need to add a reference to the Microsoft Scripting Runtime Library. There are plenty of resources for adding references, but ask if you get lost.
RubberDuck. Use It. Love It. Prof-It
I tend to be shameless about my plugs for Rubberduck, largely because the head of the tool, Mat's Mug, is unabashed in his attempts to convert everyone on SO to using RD. That said, it is an amazing tool, especially for beginners, and it would make most of the above comments stupidly easy to implement. Honestly, it would.
Check it out here: http://rubberduckvba.com/.
Wrapping Up
Let me know how the code above works for you, and do your best to use it as an example for future projects. If you manage to implement even half of those suggestions you'll save yourself potentially months of costly learning experiences through failed projects. Best of luck!
EDIT: Use this code instead to fix the error from above. The error is caused by adding a custom-type to a collection (I have never used Types outside of a class before so I didn't think of the error in advance). This approach is slightly more advanced, but it shouldnt be too complex.
In a Class Module named 'Record'
Option Explicit
Private Type TRecord
ToField As String
FromField As String
Subject As String
ReceivedDate As Date
InFolder As String
Size As String
WeekDay As String
RecordDate As Date
Month As String
Year As String
Time As String
InOut As String
End Type
Private this As TRecord
Public Property Get ToField() As String
ToField = this.ToField
End Property
Public Property Get FromField() As String
FromField = this.FromField
End Property
Public Property Get Subject() As String
Subject = this.Subject
End Property
Public Property Get ReceivedDate() As Date
ReceivedDate = this.ReceivedDate
End Property
Public Property Get InFolder() As String
InFolder = this.InFolder
End Property
Public Property Get Size() As String
Size = this.Size
End Property
Public Property Get WeekDay() As String
WeekDay = this.WeekDay
End Property
Public Property Get RecordDate() As Date
RecordDate = this.RecordDate
End Property
Public Property Get Month() As String
Month = this.Month
End Property
Public Property Get Year() As String
Year = this.Year
End Property
Public Property Get Time() As String
Time = this.Time
End Property
Public Property Get InOut() As String
InOut = this.InOut
End Property
Public Property Let ToField(value As String)
this.ToField = value
End Property
Public Property Let FromField(value As String)
this.FromField = value
End Property
Public Property Let Subject(value As String)
this.Subject = value
End Property
Public Property Let ReceivedDate(value As Date)
this.ReceivedDate = value
End Property
Public Property Let InFolder(value As String)
this.InFolder = value
End Property
Public Property Let Size(value As String)
this.Size = value
End Property
Public Property Let WeekDay(value As String)
this.WeekDay = value
End Property
Public Property Let RecordDate(value As Date)
this.RecordDate = value
End Property
Public Property Let Month(value As String)
this.Month = value
End Property
Public Property Let Year(value As String)
this.Year = value
End Property
Public Property Let Time(value As String)
this.Time = value
End Property
Public Property Let InOut(value As String)
this.InOut = value
End Property
This class uses a code pattern I learned from Mat's Mug. Declare the Type for the class as a Private Type, then declare a private this that refers to that type. As a result, you have an organized Type to hold your variables, and you get intellisense.
Once you do that, you just need to open up the property accessors. In this case, I made everything public. This isnt good practice, but I am avoiding teaching you too much at once (I would prefer not to use a class as is, but it is the best approach at this point).
This Code Goes in Your Module
Option Explicit
Sub New_Fix_Table()
' Be sure to add 'Option Explicit' at the top of your modules. This prevents undeclared variables from slipping through.
' Never use underscores in names. They have special meaning to the interpreter.
' table_size = Range("Table1").Rows.Count
' ## Not Needed due to UBound/LBound ##
' Dim tableSize As Long
' Be sure to also properly qualify you range references.
' TableSize = ActiveWorkbook.Range("Table1").Rows.Count - Without proper qualification, your Range is really ActiveWorkbook.Range
' tableSize = ThisWorkbook.Range("Table1").Rows.Count
' ## ##
'Goes through Table1 in "To" column and fixes the recipients
' For loops such as these, I prefer for loops
' Do While i <= table_size
' I strongly prefer arrays for this purpose. If it was my own project, I even would use classes, but one step at a time for now.
' Change this to point to the correct worksheet.
Dim inputSheet As Worksheet
Set inputSheet = ThisWorkbook.Worksheets("TargetSheet")
' If your data is in a table, then you can use this method instead of referring to the range.
Dim tableData As Variant
tableData = inputSheet.ListObjects(1).Range.value
' Now, here is a trick I use when processing table data in a much more efficient manner.
' This does require a reference to Microsoft Scripting Runtime
Dim headerIndices As Scripting.Dictionary
Set headerIndices = GetHeaderIndices(tableData)
' Now we have a dictionary where we can use a key and return the index position of that key
' This is where it gets a little bit tricky. If we encounter a row with multiple emails, we need to duplicate the records.
' Otherwise, we want to keep the records as is. For this task, collections to the rescue!
' Having declared a Record Type, I can now use the Type as a container for my data (without needing a class)
Dim initialRecord As record
' The records collection will contain the created records
Dim records As Collection
Set records = New Collection
Dim i As Long
' We loop through arrays using LBound and Ubound (lower bound, upper bound). The '1' denotes rows, whereas '2' would denote columns.
' I add 1 to the lower bound so I can skip the header row.
For i = LBound(tableData, 1) + 1 To UBound(tableData, 1)
Set initialRecord = New record
' Set all the properties of the record.
initialRecord.FromField = tableData(i, headerIndices("From"))
initialRecord.Subject = tableData(i, headerIndices("Subject"))
initialRecord.ReceivedDate = tableData(i, headerIndices("Received_Date"))
initialRecord.InFolder = tableData(i, headerIndices("In_Folder"))
initialRecord.Size = tableData(i, headerIndices("Size"))
initialRecord.WeekDay = tableData(i, headerIndices("Weekday"))
initialRecord.RecordDate = tableData(i, headerIndices("Date"))
initialRecord.Month = tableData(i, headerIndices("Month"))
initialRecord.Year = tableData(i, headerIndices("Year"))
initialRecord.Time = tableData(i, headerIndices("Time"))
initialRecord.InOut = tableData(i, headerIndices("In/out"))
' Split the addresses. If there are multiple addresses we dont need to rewrite the record, we just need to adjust the To field.
Dim splitAddresses As Variant
If InStr(tableData(i, headerIndices("To")), ";") > 0 Then
splitAddresses = Split(tableData(i, headerIndices("To")), ";")
Dim j As Long
For j = LBound(splitAddresses) To UBound(splitAddresses)
If Len(splitAddresses(i)) > 1 Then
Dim splitRecord As record
Set splitRecord = New record
' Because of how objects are passed around, you cant copy a class through assignment. You must duplicate the properties manually into a new class.
splitRecord.FromField = initialRecord.FromField
splitRecord.Subject = initialRecord.Subject
splitRecord.ReceivedDate = initialRecord.ReceivedDate
splitRecord.InFolder = initialRecord.InFolder
splitRecord.Size = initialRecord.Size
splitRecord.WeekDay = initialRecord.WeekDay
splitRecord.RecordDate = initialRecord.RecordDate
splitRecord.Month = initialRecord.Month
splitRecord.Year = initialRecord.Year
splitRecord.Time = initialRecord.Time
splitRecord.InOut = initialRecord.InOut
initialRecord.ToField = splitAddresses(i)
records.Add splitRecord
End If
Next
Else
initialRecord.ToField = tableData(i, headerIndices("To"))
records.Add initialRecord
End If
Next
' Now we have a colleciton of all of the records we need. Now, we need to translate those back into an array.
Dim outputData As Variant
' The row is 0 based so we can add headers, but the headerIndices dictionary is already 1-based, so we leave the columns 1 based.
' Admittedly, I would avoid a mis-match of bases for re-dimming an array, I am only doing it this way to prevent confusion.
ReDim outputData(0 To records.Count, 1 To headerIndices.Count)
' An array with the same base-dimensions would be one of the two following:
' ReDim outputData(0 To records.Count, 0 To headerIndices.Count - 1)
' ReDim outputData(1 To records.Count + 1, 1 To headerIndices.Count)
' You would then need to adjust the actual filling of the array as well.
i = LBound(outputData, 2)
Dim header As Variant
' Loop through all of the stored headers
For Each header In headerIndices.Keys
' The LBound here dynamically points to the header row.
outputData(LBound(outputData, 1), i) = header
Next
' This way we can dynamically fill in the array.
Set headerIndices = GetHeaderIndices(outputData)
i = LBound(outputData, 2) + 1
Dim outputRecord As record
For Each initialRecord In records
outputData(i, headerIndices("To")) = outputRecord.ToField
outputData(i, headerIndices("From")) = outputRecord.FromField
outputData(i, headerIndices("Subject")) = outputRecord.Subject
outputData(i, headerIndices("Received_Date")) = outputRecord.ReceivedDate
outputData(i, headerIndices("In_Folder")) = outputRecord.InFolder
outputData(i, headerIndices("Size")) = outputRecord.Size
outputData(i, headerIndices("Weekday")) = outputRecord.WeekDay
outputData(i, headerIndices("Date")) = outputRecord.RecordDate
outputData(i, headerIndices("Month")) = outputRecord.Month
outputData(i, headerIndices("Year")) = outputRecord.Year
outputData(i, headerIndices("Time")) = outputRecord.Time
outputData(i, headerIndices("In/out")) = outputRecord.InOut
Next
' Now we just have to put the output data somewhere. Let's reuse the sheet we pulled from.
OutputArray outputData, inputSheet, "Output_Data"
End Sub
Public Function GetHeaderIndices(ByVal InputData As Variant) As Scripting.Dictionary
If IsEmpty(InputData) Then Exit Function
Dim headerIndices As Scripting.Dictionary
Set headerIndices = New Scripting.Dictionary
headerIndices.CompareMode = TextCompare
Dim i As Long
For i = LBound(InputData, 2) To UBound(InputData, 2)
If Not headerIndices.Exists(Trim(InputData(LBound(InputData, 1), i))) Then _
headerIndices.Add Trim(InputData(LBound(InputData, 1), i)), i
Next
Set GetHeaderIndices = headerIndices
End Function
Public Sub OutputArray(ByVal InputArray As Variant, ByVal InputWorksheet As Worksheet, ByVal TableName As String)
Dim AddLengthH As Long
Dim AddLengthW As Long
If NumberOfArrayDimensions(InputArray) = 2 Then
If LBound(InputArray, 1) = 0 Then AddLengthH = 1
If LBound(InputArray, 2) = 0 Then AddLengthW = 1
Dim r As Range
If Not InputWorksheet Is Nothing Then
With InputWorksheet
.Cells.Clear
Set r = .Range("A1").Resize(UBound(InputArray, 1) + AddLengthH, UBound(InputArray, 2) + AddLengthW)
r.value = InputArray
.ListObjects.Add(xlSrcRange, r, , xlYes).Name = TableName
With .ListObjects(1).Sort
.header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
End With
End If
End If
End Sub
The only main difference is that now we are using an object instead of a type, and we must manually copy the object any time we want to create a new one (whereas, with the Type, we just changed the To field).
Application.ScreenUpdatingandApplication.Calculationto speed up the processing - while somewhat useful, note that this recommendation doesn't make your code/logic any more efficient. Glad you took your question over here - you'll learn a ton of things Stack Overflow wouldn't bother showing you =) \$\endgroup\$