//-----------------------------------------------------------------------
//
// Copyright (c) 2010 Deltares. All rights reserved.
//
// B.S.T. The
// tom.the@deltares.nl
// 04-11-2010
// n.a.
//-----------------------------------------------------------------------
namespace Deltares.Dam.Data
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
public static class CsvParser
{
///
/// Parses the header of an csv import file
///
/// The header string
/// The regular expression
/// An array of strings containing the header names
public static string[] ParseHeader(string header, Regex splitter)
{
ThrowHelper.ThrowIfStringArgumentNullOrEmpty(header, StringResourceNames.CsvHeaderEmptyOrNull);
ThrowHelper.ThrowIfArgumentNull(splitter, StringResourceNames.CsvSplitterPatternNotValid);
ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitter.ToString(), StringResourceNames.CsvSplitterPatternNotValid);
return splitter.Split(header);
}
///
/// This regex splits comma or semicolon separated lists of optionally quoted strings. It handles quoted delimiters and escaped quotes.
/// Whitespace inside quotes is preserved, outside is eaten.
/// See: http://regexlib.com/REDetails.aspx?regexp_id=1325
///
public static string CsvSplitPatternCommaAndSemicolonCombined =
@"\s*[;,]\s*(?!(?<=(?:^|[;,])\s*""(?:[^""]|""""|\\"")*[;,]\s*)(?:[^""]|""""|\\"")*""\s*(?:[;,]|$))";
public static string CsvSplitPatternSemicolonOnly =
@"\s*[;]\s*(?!(?<=(?:^|[;])\s*""(?:[^""]|""""|\\"")*[;]\s*)(?:[^""]|""""|\\"")*""\s*(?:[;]|$))";
public static string CsvSplitPatternCommaOnly =
@"\s*[,]\s*(?!(?<=(?:^|[,])\s*""(?:[^""]|""""|\\"")*[,]\s*)(?:[^""]|""""|\\"")*""\s*(?:[,]|$))";
///
/// Loads entities of an specific type from a csv file
///
/// The type to construct
/// The object materializer (property setters)
/// The file name to parse
/// A sequance containing materialized intances of the type to construct from the csv data
public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName)
where T : new()
{
return LoadFromCsvFile(setter, fileName, true);
}
///
/// Loads entities of an specific type from a csv file. Also returns the parsed headers
///
/// The type to construct
/// The object materializer (property setters)
/// The file name to parse
/// A sequance containing materialized intances of the type to construct from the csv data
public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, ref string[] headers)
where T : new()
{
// Assemble headers that are available in headerline
headers = new string[] { };
var headerInfoHasBeenParsed = false;
var splitter = new Regex(CsvSplitPatternSemicolonOnly, RegexOptions.IgnoreCase | RegexOptions.Compiled);
using (var sr = new StreamReader(fileName))
{
while (!sr.EndOfStream && !headerInfoHasBeenParsed)
{
if (!headerInfoHasBeenParsed)
{
headers = ParseHeader(sr.ReadLine(), splitter);
headerInfoHasBeenParsed = true;
continue;
}
}
}
// Do the actual load of the data
return LoadFromCsvFile(setter, fileName, true);
}
///
/// Loads entities of an specific type from a csv file
///
/// The type to construct
/// The object materializer (property setters)
/// The file name to parse
/// The regular expression for splitting the items in a string
/// A sequance containing materialized intances of the type to construct from the csv data
public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, string splitPattern)
where T : new()
{
return LoadFromCsvFile(setter, fileName, true, splitPattern);
}
///
/// Loads entities of an specific type from a csv file
///
/// The type to construct
/// The object materializer (property setters)
/// The file name to parse
/// Indicates if the first line in the csv file contains the header info
/// A sequance containing materialized intances of the type to construct from the csv data
public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, bool headerInfoInFirstLine)
where T : new()
{
return LoadFromCsvFile(setter, fileName, true, CsvSplitPatternSemicolonOnly);
}
///
/// Loads entities of an specific type from a csv file
///
/// The type to construct
/// The object materializer (property setters)
/// The file name to parse
/// Indicates if the first line in the csv file contains the header info
/// The regular expression for splitting the items in a string
/// A sequance containing materialized intances of the type to construct from the csv data
public static IEnumerable LoadFromCsvFile(this ObjectMaterializer materializer, string fileName, bool headerInfoInFirstLine, string splitPattern)
where T : new()
{
ThrowHelper.ThrowIfStringArgumentNullOrEmpty(fileName, StringResourceNames.CsvFileNotValid);
ThrowHelper.ThrowIfFileNotExist(fileName, StringResourceNames.CsvFileNotFound);
ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitPattern, StringResourceNames.CsvSplitterPatternNotValid);
ThrowHelper.ThrowIfArgumentNull(materializer, StringResourceNames.CsvObjectMaterializerNotValid);
ThrowHelper.ThrowWhenConditionIsTrue(
materializer,
StringResourceNames.CsvObjectMaterializerNotValid,
c => materializer.Count == 0,
r => new CsvParserException(ThrowHelper.GetResourceString(r)));
var splitter = new Regex(splitPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
using (var sr = new StreamReader(fileName))
{
var headers = new string[] { };
var headerInfoHasBeenParsed = false;
// the first line item count will be used as an extra check for each
// next row to parse. If the count doesn match
var firstLineItemCount = 0;
while (!sr.EndOfStream)
{
if (headerInfoInFirstLine && !headerInfoHasBeenParsed)
{
headers = ParseHeader(sr.ReadLine(), splitter);
headerInfoHasBeenParsed = true;
firstLineItemCount = headers.Length;
foreach (string key in materializer.MappingKeys)
{
if (materializer.IsRequired(key) && !headers.Contains(key))
throw new CsvParserException(string.Format("The mapped key '{0}' doesn't exist in the header of csv file '{1}'", key, fileName));
}
continue;
}
var items = splitter.Split(sr.ReadLine());
if (firstLineItemCount == 0)
firstLineItemCount = items.Length;
// TODO: Add a message to some log that this line could not be parsed
// For now just throw an exception
if (items.Length != firstLineItemCount)
throw new CsvParserException(string.Format("The number of items parsed in the current line don't match with the number of the first line in csv file '{0}'", fileName));
var entity = new T();
try
{
if (headers.Length > 0)
{
for (var i = 0; i < items.Length; i++)
{
// Only call the materializer when the is a match between the
// columns declared in the csv and the materializer
if (materializer.MappingKeys.Contains(headers[i]))
materializer[headers[i]](entity, items[i]);
}
}
else
{
for (var i = 0; i < items.Length; i++)
{
materializer[i](entity, items[i]);
}
}
}
catch (Exception e)
{
throw new CsvParserException(String.Format("Error parsing CSV file. Check if no empty lines or columns are defined in file '{0}. (Parser error is: '{1}')", fileName, e.Message));
}
yield return entity;
}
}
}
public static IEnumerable Lines(this StreamReader source)
{
string line;
if (source == null)
throw new ArgumentNullException("source");
while ((line = source.ReadLine()) != null)
{
yield return line;
}
}
}
}