Open paillave opened 2 years ago
Here is a sample to read excel:
using System;
using System.Linq;
using Paillave.Etl.Core;
using Paillave.Etl.ExcelFile;
using Paillave.Etl.FileSystem;
using System.Data;
namespace Sample;
class Program
{
static void Main(string[] args)
{
// The most convenient way
StreamProcessRunner.Create<string>(triggerStream
=> triggerStream
.CrossApplyFolderFiles("Get all excel files", "*.xlsx")
.CrossApplyExcelSheets("Get all excel sheets")
.CrossApplyExcelRows("get rows", o => o
.UseMap(m => new
{
A = m.ToNumberColumn<int>("a", "."),
B = m.ToColumn("b"),
C = m.ToNumberColumn<int>("c", ".")
})
.HasColumnHeader("A1:C1")
.WithDataset("A2:C2"))
.Do("show on screen", i => Console.WriteLine($"First way: {i.A}\t{i.B}\t{i.C}")))
.ExecuteAsync(args[0])
.Wait();
// The fastest way to read a large excel file
// This RegisterProvider is required for fast excel reader
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
StreamProcessRunner.Create<string>(triggerStream
=> triggerStream
.CrossApplyFolderFiles("Get all excel files", "*.xlsx")
.CrossApplyExcelDataTables("Get all excel data", dt => dt
.AsEnumerable()
.Skip(1)
.Select(i => new
{
A = i.Field<double>(0),
B = i.Field<string>(1),
C = i.Field<double>(2),
}))
.Do("show on screen", i => Console.WriteLine($"Second way: {i.A}\t{i.B}\t{i.C}")))
.ExecuteAsync(args[0])
.Wait();
}
}
Thanks for the reply. Exactly what I needed
Hi, is it possible to read only one sheet?
Like .CrossApplyExcelSheets("Get all excel sheets", [insert here name of sheet])
Hi, is it possible to read only one sheet?
Like .CrossApplyExcelSheets("Get all excel sheets", [insert here name of sheet])
Of course you can:
using System;
using System.Linq;
using Paillave.Etl.Core;
using Paillave.Etl.ExcelFile;
using Paillave.Etl.FileSystem;
using System.Data;
namespace Sample;
class Program
{
static void Main(string[] args)
{
// The most convenient way
StreamProcessRunner.Create<string>(triggerStream
=> triggerStream
.CrossApplyFolderFiles("Get all excel files", "*.xlsx")
.CrossApplyExcelSheets("Get all excel sheets")
.Where("Keep only the sheet I'm interested in", i => i.Name == "TheSheetNameImInterestedIn")
.CrossApplyExcelRows("get rows", o => o
.UseMap(m => new
{
A = m.ToNumberColumn<int>("a", "."),
B = m.ToColumn("b"),
C = m.ToNumberColumn<int>("c", ".")
})
.HasColumnHeader("A1:C1")
.WithDataset("A2:C2"))
.Do("show on screen", i => Console.WriteLine($"First way: {i.A}\t{i.B}\t{i.C}")))
.ExecuteAsync(args[0])
.Wait();
// The fastest way to read a large excel file
// This RegisterProvider is required for fast excel reader
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
StreamProcessRunner.Create<string>(triggerStream
=> triggerStream
.CrossApplyFolderFiles("Get all excel files", "*.xlsx")
.CrossApplyExcelDataTables("Get all excel data", dt =>
{
if(dt.TableName != "TheSheetNameImInterestedIn")
{
// I recon this is the most terrible hack.
// In the next version, returning a null won't cause any exception.
return new []{new
{
A = 0d,
B = "",
C = 0d,
}}.Take(0);
}
return dt
.AsEnumerable()
.Skip(1)
.Select(i => new
{
A = i.Field<double>(0),
B = i.Field<string>(1),
C = i.Field<double>(2),
});
}
)
.Do("show on screen", i => Console.WriteLine($"Second way: {i.A}\t{i.B}\t{i.C}")))
.ExecuteAsync(args[0])
.Wait();
}
}
Thank you so much
Discussed in https://github.com/paillave/Etl.Net/discussions/375