In order to make my life easier with parsing some VBA code, I have written a few extension methods to extend the string type, and put them in this StringExtensions class:
[ComVisible(false)]
public static class StringExtensions
{
public static readonly char StringDelimiter = '"';
public static readonly char CommentMarker = '\'';
/// <summary>
/// Strips any trailing comment from specified line of code.
/// </summary>
/// <param name="line"></param>
/// <returns>Returns a new string, without the trailing comment.</returns>
public static string StripTrailingComment(this string line)
{
int index;
if (line.HasComment(out index))
{
return line.Substring(0, index).TrimEnd();
}
return line;
}
/// <summary>
/// Returns a value indicating whether line of code is/contains a comment.
/// </summary>
/// <param name="line"></param>
/// <param name="index">Returns the start index of the comment string, including the comment marker.</param>
/// <returns></returns>
public static bool HasComment(this string line, out int index)
{
index = -1;
var instruction = line.StripStringLiterals();
for (int cursor = 0; cursor < instruction.Length - 1; cursor++)
{
if (instruction[cursor] == CommentMarker)
{
index = cursor;
return true;
}
}
return false;
}
/// <summary>
/// Strips all string literals from a line of code or instruction.
/// Replaces string literals with whitespace characters, to maintain original length.
/// </summary>
/// <param name="line"></param>
/// <returns>Returns a new string, stripped of all string literals and string delimiters.</returns>
public static string StripStringLiterals(this string line)
{
var builder = new StringBuilder(line.Length);
var isInsideString = false;
for (int cursor = 0; cursor < line.Length; cursor++)
{
if (line[cursor] == StringDelimiter)
{
if (isInsideString)
{
isInsideString = cursor + 1 == line.Length || line[cursor + 1] == StringDelimiter || cursor > 0 && (line[cursor - 1] == StringDelimiter);
}
else
{
isInsideString = true;
}
}
if (!isInsideString && line[cursor] != StringDelimiter)
{
builder.Append(line[cursor]);
}
else
{
builder.Append(' ');
}
}
return builder.ToString();
}
}
This works as intended, at least per simple unit tests I've written for it:
[TestMethod]
public void StripsStringLiteral()
{
var value = "\"Hello, World!\"";
var instruction = "Debug.Print " + value;
var result = instruction.StripStringLiterals();
var replacement = new string(' ', value.Length);
Assert.AreEqual("Debug.Print " + replacement, result);
}
[TestMethod]
public void StripsAllStringLiterals()
{
var value = "\"Hello, World!\"";
var instruction = "Debug.Print " + value + " & " + value;
var result = instruction.StripStringLiterals();
var replacement = new string(' ', value.Length);
Assert.AreEqual("Debug.Print " + replacement + " & " + replacement, result);
}
[TestMethod]
public void IsComment()
{
var instruction = "'Debug.Print mwahaha this is just a comment.";
int index;
var result = instruction.HasComment(out index);
Assert.IsTrue(result);
Assert.AreEqual(index, 0);
}
[TestMethod]
public void HasComment()
{
var comment = "'but this is one.";
var instruction = "Debug.Print \"'this isn't a comment\" " + comment;
int index;
var result = instruction.HasComment(out index);
Assert.IsTrue(result);
Assert.AreEqual(comment, instruction.Substring(index));
}
There has to be a better way to implement this code - and perhaps even to test it.
Anything weird in sight?