Traditionally, VBA developers rely on the FileSystemObject to retrieve files within a directory. I decided to test and compare different methods (Windows API, PowerQuery, PowerShell, and FileSystemObject) to determine which is the most efficient for large-scale file retrieval. Below are the results of my tests, along with the code implementations for each approach.
- Windows API: 18.86328 seconds to retrieve 512191 files from D:\
- PowerQuery: 27.48828 seconds to retrieve 512191 files from D:\
- PowerShell: 74.35938 seconds to retrieve 512191 files from D:\
- FileSystemObject: 212.4434 seconds to retrieve 512171 files from D:\
Windows API: 18.86328 seconds
Attribute VB_Name = "WinAPI"
Option Explicit
#If VBA7 Then
Private Declare PtrSafe Function FindFirstFile Lib "kernel32" Alias "FindFirstFileA" _
(ByVal lpFileName As String, lpFindFileData As WIN32_FIND_DATA) As LongPtr
Private Declare PtrSafe Function FindNextFile Lib "kernel32" Alias "FindNextFileA" _
(ByVal hFindFile As LongPtr, lpFindFileData As WIN32_FIND_DATA) As Long
Private Declare PtrSafe Function FindClose Lib "kernel32" (ByVal hFindFile As LongPtr) As Long
#Else
Private Declare Function FindFirstFile Lib "kernel32" Alias "FindFirstFileA" _
(ByVal lpFileName As String, lpFindFileData As WIN32_FIND_DATA) As Long
Private Declare Function FindNextFile Lib "kernel32" Alias "FindNextFileA" _
(ByVal hFindFile As Long, lpFindFileData As WIN32_FIND_DATA) As Long
Private Declare Function FindClose Lib "kernel32" (ByVal hFindFile As Long) As Long
#End If
Private Type FILETIME
dwLowDateTime As Long
dwHighDateTime As Long
End Type
Private Type WIN32_FIND_DATA
dwFileAttributes As Long
ftCreationTime As FILETIME
ftLastAccessTime As FILETIME
ftLastWriteTime As FILETIME
nFileSizeHigh As Long
nFileSizeLow As Long
dwReserved0 As Long
dwReserved1 As Long
cFileName As String * 260
cAlternate As String * 14
End Type
Function GetFilesWindowsAPIDictionary(RootFolder As String, Optional FileDictionary As Scripting.Dictionary) As Scripting.Dictionary
Dim hFind As LongPtr
Dim WFD As WIN32_FIND_DATA
Dim FileName As String
' Initialize the dictionary if not already initialized
If FileDictionary Is Nothing Then
Set FileDictionary = New Scripting.Dictionary
End If
' Ensure the folder path ends with a backslash
If Right(RootFolder, 1) <> "\" Then RootFolder = RootFolder & "\"
' Add wildcard to search all files and directories
hFind = FindFirstFile(RootFolder & "*.*", WFD)
If hFind <> -1 Then
Do
FileName = Left(WFD.cFileName, InStr(WFD.cFileName, vbNullChar) - 1)
' Skip the "." and ".." entries
If FileName <> "." And FileName <> ".." Then
' Check if it is a directory
If (WFD.dwFileAttributes And vbDirectory) = vbDirectory Then
' Recursive call for subdirectories
GetFilesWindowsAPIDictionary RootFolder & FileName, FileDictionary
Else
' Add the file to the dictionary
FileDictionary.Add RootFolder & FileName, ""
End If
End If
Loop While FindNextFile(hFind, WFD)
FindClose hFind
End If
' Return the dictionary of file paths
Set GetFilesWindowsAPIDictionary = FileDictionary
End Function
PowerQuery: 27.48828 seconds
Attribute VB_Name = "PowerQuery"
Option Explicit
Function GetFilesPowerQuery(RootFolder As String, QueryName As String, TableName As String)
On Error Resume Next
ActiveWorkbook.Queries(QueryName).Delete
On Error GoTo 0
ActiveWorkbook.Queries.Add Name:=QueryName, Formula:= _
"let" & vbNewLine & _
" Source = Folder.Files(" & Chr(34) & RootFolder & Chr(34) & ")," & vbNewLine & _
" FullFileName = Table.AddColumn(Source, ""FullFileName"", each [Folder Path] & [Name])," & vbNewLine & _
" Result = Table.SelectColumns(FullFileName, {""FullFileName""})" & vbNewLine & _
"in" & vbNewLine & _
" Result"
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=" & Chr(34) & QueryName & Chr(34) & ";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [" & QueryName & "]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.Name = TableName
.Refresh BackgroundQuery:=False
End With
GetFilesPowerQuery = ActiveSheet.ListObjects(TableName).DataBodyRange.Value
End Function
PowerShell: 74.35938 seconds
Attribute VB_Name = "PowerShell"
Option Explicit
Function GetFilesPowerShell(RootFolder As String) As Variant
Dim PowerShellScript As String
Dim PowerShellExec As Object
Dim Output As String
Dim Files() As String
' PowerShell script to list all files in the specified folder and its subfolders
PowerShellScript = "Get-ChildItem -Path '" & RootFolder & "' -Recurse -Force -File | Select-Object -ExpandProperty FullName"
' Create PowerShell object and execute the script with no window
Set PowerShellExec = CreateObject("WScript.Shell").Exec("powershell -command "" & { " & PowerShellScript & " }" & " -WindowStyle Hidden")
' Capture the output
Output = PowerShellExec.StdOut.ReadAll
' Split the output into an array (each line is a file path)
Files = Split(Output, vbCrLf)
' Return the array of file paths, removing any trailing empty element
If Len(Files(UBound(Files))) = 0 Then
ReDim Preserve Files(0 To UBound(Files) - 1)
End If
GetFilesPowerShell = Files
End Function
FileSystemObject: 212.4434 seconds
Attribute VB_Name = "FSO"
Option Explicit
Function GetFileDictionary(RootFolderPath As String, Optional FileDictionary As Scripting.Dictionary, Optional FSO As Scripting.FileSystemObject) As Scripting.Dictionary
If FSO Is Nothing Then
Set FileDictionary = New Scripting.Dictionary
Set FSO = New Scripting.FileSystemObject
End If
Dim SubFolder As Folder
For Each SubFolder In FSO.GetFolder(RootFolderPath).SubFolders
GetFileDictionary SubFolder.Path, FileDictionary, FSO
Next
Dim File As File
For Each File In FSO.GetFolder(RootFolderPath).Files
FileDictionary.Add File.Path, ""
Next
Set GetFileDictionary = FileDictionary
End Function
Test
Attribute VB_Name = "Test"
Option Explicit
Private Const RootFolder As String = "D:\"
Sub TestAll()
TestGetFilesWindowsAPIDictionary
TestGetFilesPowerQuery
TestGetFilesPowerShell
TestFileSystemObject
End Sub
Sub TestGetFilesWindowsAPIDictionary()
Dim FileDictionary As Scripting.Dictionary
Dim StartTime As Single
Dim EndTime As Single
Dim ElapsedTime As Single
' Start the timer
StartTime = Timer
' Execute the function and retrieve the file paths into the dictionary
Set FileDictionary = GetFilesWindowsAPIDictionary(RootFolder)
' End the timer
EndTime = Timer
' Calculate elapsed time
ElapsedTime = EndTime - StartTime
' Output the elapsed time and number of files retrieved
If Not FileDictionary Is Nothing Then
Debug.Print "Windows API: "; ElapsedTime & " seconds to retrieve "; FileDictionary.Count; " files from " & RootFolder
DictionaryDump FileDictionary
Else
Debug.Print "No files found or an error occurred."
End If
End Sub
Sub TestGetFilesPowerQuery()
Const QueryName As String = "TestPowerQueryGetFiles"
Const TableName As String = "TestPowerQueryGetFilesTable"
Dim StartTime As Single
Dim EndTime As Single
Dim ElapsedTime As Single
' Start the timer
StartTime = Timer
Dim Data
Data = GetFilesPowerQuery(RootFolder, QueryName, TableName)
' End the timer
EndTime = Timer
' Calculate elapsed tim
ElapsedTime = EndTime - StartTime
' Output the elapsed time
Debug.Print "PowerQuery: "; ElapsedTime & " seconds to retrieve "; UBound(Data) - LBound(Data) + 1; " files from " & RootFolder
End Sub
Sub TestGetFilesPowerShell()
Dim Results As Variant
Dim StartTime As Single
Dim EndTime As Single
Dim ElapsedTime As Single
' Start the timer
StartTime = Timer
' Execute the function and retrieve the file paths
Results = GetFilesPowerShell(RootFolder)
' End the timer
EndTime = Timer
' Calculate elapsed time
ElapsedTime = EndTime - StartTime
' Optionally, output the number of files retrieved
If IsArray(Results) Then
' Output the elapsed time
Debug.Print "PowerShell: "; ElapsedTime & " seconds to retrieve "; UBound(Results) - LBound(Results) + 1; " files from " & RootFolder
ArrayDump Results
Else
Debug.Print "No files found or an error occurred."
End If
End Sub
Sub TestFileSystemObject()
Dim StartTime As Single
Dim EndTime As Single
Dim ElapsedTime As Single
Dim FileDictionary As Scripting.Dictionary
' Start the timer
StartTime = Timer
' Execute the function and retrieve the file paths
Set FileDictionary = GetFileDictionary(RootFolder)
' End the timer
EndTime = Timer
' Calculate elapsed time
ElapsedTime = EndTime - StartTime
' Output the elapsed time
Debug.Print "FileSystemObject: "; ElapsedTime & " seconds to retrieve "; FileDictionary.Count; " files from " & RootFolder
DictionaryDump FileDictionary
End Sub
Sub ArrayDump(Results)
Dim Data
ReDim Data(1 To UBound(Results) - LBound(Results) + 1, 1 To 1)
Dim n As Long, Count As Long
For n = LBound(Results) To UBound(Results)
Count = Count + 1
Data(Count, 1) = Results(n)
Next
With Worksheets.Add
.Range("$A$1").Resize(UBound(Data)).Value = Data
End With
End Sub
Sub DictionaryDump(FileDictionary As Scripting.Dictionary)
Dim Data, Keys
Keys = FileDictionary.Keys
ReDim Data(1 To FileDictionary.Count, 1 To 1)
Dim n As Long
For n = 1 To FileDictionary.Count
Data(n, 1) = Keys(n - 1)
Next
With Worksheets.Add
.Range("$A$1").Resize(FileDictionary.Count).Value = Data
End With
End Sub
Notes
I'm adamant in my use of Pascal Case with the exception of lower case for counters. Feel free to refute it but I will not change. This is an on-going rant of mine spurred by the fact that Dim value changes Range().Value to Range().value.
I also tried unsuccessfully tested shelling Python without a py file.
Questions
- Why does the FileSystemObject method returns 20 fewer files than the other approaches?
- Is there a way to get around the DIR() 260-character limit?
- Can we do a file search using PowerQuery on MACOS?
- Are there any other candidates or a file search that I should consider?
- Should I add any tags other than VBA to this post?
Thanks in advance for your feedback.
GetFilesfrom VBA-FileTools? You need theincludeSubFoldersset toTrueand maybe set the hidden and system arguments as well. I am curious how long it takes compared to the others in your question. \$\endgroup\$