cjkoester / chris.koester.io.comments

Repo issues are used as comment system for chris.koester.io blog
0 stars 0 forks source link

Combine CSV Files using C# #5

Open utterances-bot opened 2 years ago

utterances-bot commented 2 years ago

Combine CSV Files using C# | Chris Koester

Combine multiple CSV Files without duplicating header rows using C#

https://thankful-cliff-0ea3c960f.azurestaticapps.net/index.php/2017/01/27/combine-csv-files/

cjkoester commented 2 years ago

The comment and code below was migrated from the Wordpress version of this post, and is from a commenter named DECLAN. I added the using directives and made a small code change from commenter AMELIAATLINWASIS so that it would compile. I have not tested the code.

Not a faster way but if the columns of the different csv files are partially mismatched then the below will solve the problem. Also included are the helper classes containing the methods to move the data from column to row-based format then to csv format.

using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Data;
using System.Text;

public static class CSVHelpers
{
    public static void CombineCsvFiles(string sourceFolder, string destinationFile, string searchPattern = "*.csv", bool isMismatched = false)
    {
        // Specify wildcard search to match CSV files that will be combined
        string[] filePaths = Directory.GetFiles(sourceFolder, searchPattern);
        if (isMismatched)
            CombineMisMatchedCsvFiles(filePaths, destinationFile);
        else
            CombineCsvFiles(filePaths, destinationFile);
    }
    public static void CombineCsvFiles(string[] filePaths, string destinationFile)
    {
        StreamWriter fileDest = new StreamWriter(destinationFile, true);
        int i;
        for (i = 0; i < filePaths.Length; i++)
        {
            var file = filePaths[i];
            string[] lines = File.ReadAllLines(file);
            // Skip header row for all but first file
            if (i > 0)
            {
                lines = lines.Skip(1).ToArray();
            }
            foreach (string line in lines)
            {
                fileDest.WriteLine(line);
            }
        }
        fileDest.Close();
    }
    public static void CombineMisMatchedCsvFiles(string[] filePaths, string destinationFile, char splitter = ',')
    {
        HashSet<string> combinedheaders = new HashSet<string>();
        int i;
        // aggregate headers
        for (i = 0; i < filePaths.Length; i++)
        {
            string file = filePaths[i];
            combinedheaders.UnionWith(File.ReadLines(file).First().Split(splitter));
        }
        var hdict = combinedheaders.ToDictionary(y => y, y => new List<object>());
        string[] combinedHeadersArray = combinedheaders.ToArray();
        for (i = 0; i < filePaths.Length; i++)
        {
            var fileheaders = File.ReadLines(filePaths[i]).First().Split(splitter);
            var notfileheaders = combinedheaders.Except(fileheaders);
            File.ReadLines(filePaths[i]).Skip(1).Select(line => line.Split(splitter)).ToList().ForEach(spline =>
            {
                for (int j = 0; j < fileheaders.Length; j++)
                {
                    hdict[fileheaders[j]].Add(spline[j]);
                }
                foreach (string header in notfileheaders)
                {
                    hdict[header].Add(null);
                }
            });
        }
        DataTable dt = hdict.ToDataTable();
        dt.ToCSV(destinationFile);
    }
}
public static class DataTableHelper
{
    public static DataTable ToDataTable(this Dictionary<string, List<object>> dict)
    {
        DataTable dataTable = new DataTable();
        dataTable.Columns.AddRange(dict.Keys.Select(c => new DataColumn(c)).ToArray());
        for (int i = 0; i < dict.Values.Max(item => item.Count()); i++)
        {
            DataRow dataRow = dataTable.NewRow();
            foreach (var key in dict.Keys)
            {
                if (dict[key].Count > i)
                    dataRow[key] = dict[key][i];
            }
            dataTable.Rows.Add(dataRow);
        }
        return dataTable;
    }
    public static void ToCSV(this DataTable dt, string destinationfile)
    {
        StringBuilder sb = new StringBuilder();
        IEnumerable<string> columnNames = dt.Columns.Cast<DataColumn>().
                                          Select(column => column.ColumnName);
        sb.AppendLine(string.Join(",", columnNames));
        foreach (DataRow row in dt.Rows)
        {
            IEnumerable<string> fields = row.ItemArray.Select(field => field.ToString());
            sb.AppendLine(string.Join(",", fields));
        }
        File.WriteAllText(destinationfile, sb.ToString());
    }
}