//----------------------------------------------------------------------- // // Copyright (c) 2010 Deltares. All rights reserved. // // B.S.T. The // tom.the@deltares.nl // 04-11-2010 // n.a. //----------------------------------------------------------------------- namespace Deltares.Dam.Data { using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Text.RegularExpressions; public static class CsvParser { /// /// Parses the header of an csv import file /// /// The header string /// The regular expression /// An array of strings containing the header names public static string[] ParseHeader(string header, Regex splitter) { ThrowHelper.ThrowIfStringArgumentNullOrEmpty(header, StringResourceNames.CsvHeaderEmptyOrNull); ThrowHelper.ThrowIfArgumentNull(splitter, StringResourceNames.CsvSplitterPatternNotValid); ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitter.ToString(), StringResourceNames.CsvSplitterPatternNotValid); return splitter.Split(header); } /// /// This regex splits comma or semicolon separated lists of optionally quoted strings. It handles quoted delimiters and escaped quotes. /// Whitespace inside quotes is preserved, outside is eaten. /// See: http://regexlib.com/REDetails.aspx?regexp_id=1325 /// public static string CsvSplitPatternCommaAndSemicolonCombined = @"\s*[;,]\s*(?!(?<=(?:^|[;,])\s*""(?:[^""]|""""|\\"")*[;,]\s*)(?:[^""]|""""|\\"")*""\s*(?:[;,]|$))"; public static string CsvSplitPatternSemicolonOnly = @"\s*[;]\s*(?!(?<=(?:^|[;])\s*""(?:[^""]|""""|\\"")*[;]\s*)(?:[^""]|""""|\\"")*""\s*(?:[;]|$))"; public static string CsvSplitPatternCommaOnly = @"\s*[,]\s*(?!(?<=(?:^|[,])\s*""(?:[^""]|""""|\\"")*[,]\s*)(?:[^""]|""""|\\"")*""\s*(?:[,]|$))"; /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName) where T : new() { return LoadFromCsvFile(setter, fileName, true); } /// /// Loads entities of an specific type from a csv file. Also returns the parsed headers /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, ref string[] headers) where T : new() { // Assemble headers that are available in headerline headers = new string[] { }; var headerInfoHasBeenParsed = false; var splitter = new Regex(CsvSplitPatternSemicolonOnly, RegexOptions.IgnoreCase | RegexOptions.Compiled); using (var sr = new StreamReader(fileName)) { while (!sr.EndOfStream && !headerInfoHasBeenParsed) { if (!headerInfoHasBeenParsed) { headers = ParseHeader(sr.ReadLine(), splitter); headerInfoHasBeenParsed = true; continue; } } } // Do the actual load of the data return LoadFromCsvFile(setter, fileName, true); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// The regular expression for splitting the items in a string /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, string splitPattern) where T : new() { return LoadFromCsvFile(setter, fileName, true, splitPattern); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// Indicates if the first line in the csv file contains the header info /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, bool headerInfoInFirstLine) where T : new() { return LoadFromCsvFile(setter, fileName, true, CsvSplitPatternSemicolonOnly); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// Indicates if the first line in the csv file contains the header info /// The regular expression for splitting the items in a string /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer materializer, string fileName, bool headerInfoInFirstLine, string splitPattern) where T : new() { ThrowHelper.ThrowIfStringArgumentNullOrEmpty(fileName, StringResourceNames.CsvFileNotValid); ThrowHelper.ThrowIfFileNotExist(fileName, StringResourceNames.CsvFileNotFound); ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitPattern, StringResourceNames.CsvSplitterPatternNotValid); ThrowHelper.ThrowIfArgumentNull(materializer, StringResourceNames.CsvObjectMaterializerNotValid); ThrowHelper.ThrowWhenConditionIsTrue( materializer, StringResourceNames.CsvObjectMaterializerNotValid, c => materializer.Count == 0, r => new CsvParserException(ThrowHelper.GetResourceString(r))); var splitter = new Regex(splitPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); using (var sr = new StreamReader(fileName)) { var headers = new string[] { }; var headerInfoHasBeenParsed = false; // the first line item count will be used as an extra check for each // next row to parse. If the count doesn match var firstLineItemCount = 0; while (!sr.EndOfStream) { if (headerInfoInFirstLine && !headerInfoHasBeenParsed) { headers = ParseHeader(sr.ReadLine(), splitter); headerInfoHasBeenParsed = true; firstLineItemCount = headers.Length; foreach (string key in materializer.MappingKeys) { if (materializer.IsRequired(key) && !headers.Contains(key)) throw new CsvParserException(string.Format("The mapped key '{0}' doesn't exist in the header of csv file '{1}'", key, fileName)); } continue; } var items = splitter.Split(sr.ReadLine()); if (firstLineItemCount == 0) firstLineItemCount = items.Length; // TODO: Add a message to some log that this line could not be parsed // For now just throw an exception if (items.Length != firstLineItemCount) throw new CsvParserException(string.Format("The number of items parsed in the current line don't match with the number of the first line in csv file '{0}'", fileName)); var entity = new T(); try { if (headers.Length > 0) { for (var i = 0; i < items.Length; i++) { // Only call the materializer when the is a match between the // columns declared in the csv and the materializer if (materializer.MappingKeys.Contains(headers[i])) materializer[headers[i]](entity, items[i]); } } else { for (var i = 0; i < items.Length; i++) { materializer[i](entity, items[i]); } } } catch (Exception e) { throw new CsvParserException(String.Format("Error parsing CSV file. Check if no empty lines or columns are defined in file '{0}. (Parser error is: '{1}')", fileName, e.Message)); } yield return entity; } } } public static IEnumerable Lines(this StreamReader source) { string line; if (source == null) throw new ArgumentNullException("source"); while ((line = source.ReadLine()) != null) { yield return line; } } } }