Over the past couple of weeks I had been writing a utility for a friend of mine that involves parsing a CSV file. One of the fields in the file was for song names. Song names, as you know, can contain commas in them, so a simple split on the comma was not viable.
The great thing about CSV files is that when a field itself contains a comma, the entire field is "surrounded, in quotation marks". This features allows us to write code that splits on the commas while ignoring those between quotes, and adds the results to a List<string>.
Here is the code I wrote to accomplish this. You will want to read the CSV file line by line and pass each line into the ParseFile(string text) method.
private enum EntryType
{
Word,
Comma,
Quote
}
public IList<string> ParseLine(string text)
{
IList<string> vals = new List<string>();
int i = 0;
while (i < text.Length)
{
vals.Add(GetWord(text, ref i));
}
//is last character empty?
if (text[text.Length - 1].ToString() == ",")
{
vals.Add(string.Empty);
}
return vals;
}
private string GetWord(string text, ref int position)
{
EntryType entry = EntryType.Word;
string word = string.Empty;
while (position < text.Length)
{
string letter = text[position].ToString();
position++;
switch (letter)
{
case ",":
if (entry == EntryType.Word)
return word;
else if (entry == EntryType.Quote)
word += letter;
else if (entry == EntryType.Comma)
return word;
break;
case "\"":
if (entry == EntryType.Word || entry == EntryType.Comma)
entry = EntryType.Quote; //open quote
else if (entry == EntryType.Quote)
return word; //closing quote
break;
default:
if (entry == EntryType.Word)
word += letter;
else if (entry == EntryType.Quote)
word += letter;
else if (entry == EntryType.Comma)
{
word += letter;
entry = EntryType.Word;
}
break;
}
}
return word;
}
Of course, a week after I wrote this, my Google Reader presents me with a post called LINQ to CSV using DynamicObject and TextFieldParser. It looks like I'll try refactoring the utility at some point in the near future.
But before that, I have at least one more post I wish to write about this utility.