Open utterances-bot opened 2 years ago
The comment and code below was migrated from the Wordpress version of this post, and is from a commenter named DECLAN. I added the using directives and made a small code change from commenter AMELIAATLINWASIS so that it would compile. I have not tested the code.
Not a faster way but if the columns of the different csv files are partially mismatched then the below will solve the problem. Also included are the helper classes containing the methods to move the data from column to row-based format then to csv format.
using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Data;
using System.Text;
public static class CSVHelpers
{
public static void CombineCsvFiles(string sourceFolder, string destinationFile, string searchPattern = "*.csv", bool isMismatched = false)
{
// Specify wildcard search to match CSV files that will be combined
string[] filePaths = Directory.GetFiles(sourceFolder, searchPattern);
if (isMismatched)
CombineMisMatchedCsvFiles(filePaths, destinationFile);
else
CombineCsvFiles(filePaths, destinationFile);
}
public static void CombineCsvFiles(string[] filePaths, string destinationFile)
{
StreamWriter fileDest = new StreamWriter(destinationFile, true);
int i;
for (i = 0; i < filePaths.Length; i++)
{
var file = filePaths[i];
string[] lines = File.ReadAllLines(file);
// Skip header row for all but first file
if (i > 0)
{
lines = lines.Skip(1).ToArray();
}
foreach (string line in lines)
{
fileDest.WriteLine(line);
}
}
fileDest.Close();
}
public static void CombineMisMatchedCsvFiles(string[] filePaths, string destinationFile, char splitter = ',')
{
HashSet<string> combinedheaders = new HashSet<string>();
int i;
// aggregate headers
for (i = 0; i < filePaths.Length; i++)
{
string file = filePaths[i];
combinedheaders.UnionWith(File.ReadLines(file).First().Split(splitter));
}
var hdict = combinedheaders.ToDictionary(y => y, y => new List<object>());
string[] combinedHeadersArray = combinedheaders.ToArray();
for (i = 0; i < filePaths.Length; i++)
{
var fileheaders = File.ReadLines(filePaths[i]).First().Split(splitter);
var notfileheaders = combinedheaders.Except(fileheaders);
File.ReadLines(filePaths[i]).Skip(1).Select(line => line.Split(splitter)).ToList().ForEach(spline =>
{
for (int j = 0; j < fileheaders.Length; j++)
{
hdict[fileheaders[j]].Add(spline[j]);
}
foreach (string header in notfileheaders)
{
hdict[header].Add(null);
}
});
}
DataTable dt = hdict.ToDataTable();
dt.ToCSV(destinationFile);
}
}
public static class DataTableHelper
{
public static DataTable ToDataTable(this Dictionary<string, List<object>> dict)
{
DataTable dataTable = new DataTable();
dataTable.Columns.AddRange(dict.Keys.Select(c => new DataColumn(c)).ToArray());
for (int i = 0; i < dict.Values.Max(item => item.Count()); i++)
{
DataRow dataRow = dataTable.NewRow();
foreach (var key in dict.Keys)
{
if (dict[key].Count > i)
dataRow[key] = dict[key][i];
}
dataTable.Rows.Add(dataRow);
}
return dataTable;
}
public static void ToCSV(this DataTable dt, string destinationfile)
{
StringBuilder sb = new StringBuilder();
IEnumerable<string> columnNames = dt.Columns.Cast<DataColumn>().
Select(column => column.ColumnName);
sb.AppendLine(string.Join(",", columnNames));
foreach (DataRow row in dt.Rows)
{
IEnumerable<string> fields = row.ItemArray.Select(field => field.ToString());
sb.AppendLine(string.Join(",", fields));
}
File.WriteAllText(destinationfile, sb.ToString());
}
}
Combine CSV Files using C# | Chris Koester
Combine multiple CSV Files without duplicating header rows using C#
https://thankful-cliff-0ea3c960f.azurestaticapps.net/index.php/2017/01/27/combine-csv-files/