// Copyright (C) Stichting Deltares 2026. All rights reserved. // // This file is part of the application DAM - UI. // // DAM - UI is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // All names, logos, and references to "Deltares" are registered trademarks of // Stichting Deltares and remain full property of Stichting Deltares at all times. // All rights reserved. using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; namespace Deltares.Dam.Data; /// /// Parses a csv file and returns a sequence of entities of a specific type /// public static class CsvParser { /// /// This regex splits semicolon separated lists of optionally quoted strings. It handles quoted delimiters and escaped /// quotes. /// Whitespace inside quotes is preserved, outside is eaten. /// See: http://regexlib.com/REDetails.aspx?regexp_id=1325 /// public static readonly string CsvSplitPatternSemicolonOnly = @"\s*[;]\s*(?!(?<=(?:^|[;])\s*""(?:[^""]|""""|\\"")*[;]\s*)(?:[^""]|""""|\\"")*""\s*(?:[;]|$))"; /// /// Parses the header of an csv import file /// /// The header string /// The regular expression /// An array of strings containing the header names public static string[] ParseHeader(string header, Regex splitter) { try { ThrowHelper.ThrowIfStringArgumentNullOrEmpty(header, StringResourceNames.CsvHeaderNullOrEmpty); ThrowHelper.ThrowIfArgumentNull(splitter, "splitter", StringResourceNames.CsvSplitterPatternNotValid); ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitter.ToString(), StringResourceNames.CsvSplitterPatternNotValid); } catch (Exception e) { throw new CsvParserException(e.Message); } return splitter.Split(header); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// A sequence containing materialized instances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName) where T : new() { return setter.LoadFromCsvFile(fileName, true); } /// /// Loads entities of an specific type from a csv file. Also returns the parsed headers /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, ref string[] headers) where T : new() { // Assemble headers that are available in headerline headers = new string[] {}; var headerInfoHasBeenParsed = false; var splitter = new Regex(CsvSplitPatternSemicolonOnly, RegexOptions.IgnoreCase | RegexOptions.Compiled); using (var sr = new StreamReader(fileName)) { while (!sr.EndOfStream && !headerInfoHasBeenParsed) { headers = ParseHeader(sr.ReadLine(), splitter); headerInfoHasBeenParsed = true; } } // Do the actual load of the data return setter.LoadFromCsvFile(fileName, true); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// The regular expression for splitting the items in a string /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, string splitPattern) where T : new() { return setter.LoadFromCsvFile(fileName, true, splitPattern); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// Indicates if the first line in the csv file contains the header info /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer setter, string fileName, bool headerInfoInFirstLine) where T : new() { return setter.LoadFromCsvFile(fileName, true, CsvSplitPatternSemicolonOnly); } /// /// Loads entities of an specific type from a csv file /// /// The type to construct /// The object materializer (property setters) /// The file name to parse /// Indicates if the first line in the csv file contains the header info /// The regular expression for splitting the items in a string /// A sequance containing materialized intances of the type to construct from the csv data public static IEnumerable LoadFromCsvFile(this ObjectMaterializer materializer, string fileName, bool headerInfoInFirstLine, string splitPattern) where T : new() { ThrowHelper.ThrowIfStringArgumentNullOrEmpty(fileName, StringResourceNames.CsvFileNotValid); ThrowHelper.ThrowIfFileNotExist(fileName, StringResourceNames.CsvFileNotFound); ThrowHelper.ThrowIfStringArgumentNullOrEmpty(splitPattern, StringResourceNames.CsvSplitterPatternNotValid); ThrowHelper.ThrowIfArgumentNull(materializer, nameof(materializer), StringResourceNames.CsvObjectMaterializerNotValid); ThrowHelper.ThrowWhenConditionIsTrue( materializer, StringResourceNames.CsvObjectMaterializerNotValid, _ => materializer.Count == 0, r => new CsvParserException(ThrowHelper.GetResourceString(r))); var splitter = new Regex(splitPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); using (var sr = new StreamReader(fileName)) { var headers = new string[] {}; var headerInfoHasBeenParsed = false; // the first line item count will be used as an extra check for each // next row to parse. If the count doesn match var firstLineItemCount = 0; while (!sr.EndOfStream) { if (headerInfoInFirstLine && !headerInfoHasBeenParsed) { headers = ParseHeader(sr.ReadLine(), splitter); headerInfoHasBeenParsed = true; firstLineItemCount = headers.Length; foreach (string key in materializer.MappingKeys) { if (materializer.IsRequired(key) && !headers.Contains(key)) { throw new CsvParserException($"The mapped key '{key}' doesn't exist in the header of csv file '{fileName}'"); } } continue; } string[] items = splitter.Split(sr.ReadLine()); if (firstLineItemCount == 0) { firstLineItemCount = items.Length; } // TODO: Add a message to some log that this line could not be parsed // For now just throw an exception if (items.Length != firstLineItemCount) { throw new CsvParserException($"The number of items parsed in the current line don't match with the number of the first line in csv file '{fileName}'"); } var entity = new T(); try { if (headers.Length > 0) { for (var i = 0; i < items.Length; i++) { // Only call the materializer when the is a match between the // columns declared in the csv and the materializer if (materializer.MappingKeys.Contains(headers[i])) { materializer[headers[i]](entity, items[i]); } } } else { for (var i = 0; i < items.Length; i++) { materializer[i](entity, items[i]); } } } catch (Exception e) { throw new CsvParserException($"Error parsing CSV file. Check if no empty lines or columns are defined in file '{fileName}. (Parser error is: '{e.Message}')"); } yield return entity; } } } }