sinshu / numflat

A numerical computation library for C#
MIT License
22 stars 3 forks source link

Add ICA #4

Closed sinshu closed 5 months ago

sinshu commented 9 months ago

https://github.com/accord-net/framework/blob/development/Sources/Accord.Statistics/Analysis/IndependentComponentAnalysis.cs

sinshu commented 6 months ago

コア部分のコード https://github.com/accord-net/framework/blob/1ab0cc0ba55bcc3d46f20e7bbe7224b58cd01854/Sources/Accord.Statistics/Analysis/IndependentComponentAnalysis.cs#L691

sinshu commented 6 months ago
sinshu commented 5 months ago

https://github.com/akcarsten/Independent_Component_Analysis

sinshu commented 5 months ago

This naive implementation seems to be working 😁✨

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

namespace NumFlat.MultivariateAnalyses
{
    public sealed class IndependentComponentAnalysis : IVectorToVectorTransform<double>
    {
        private PrincipalComponentAnalysis pca;
        private int componentCount;

        public IndependentComponentAnalysis(IReadOnlyList<Vec<double>> xs, int componentCount)
        {
            this.pca = xs.Pca();
            this.componentCount = componentCount;

            var scale = pca.EigenValues.Map(Math.Sqrt);
            var whiten = xs.Select(x => pca.Transform(x).PointwiseDiv(scale)).ToArray();

            Console.WriteLine(whiten.Covariance());

            xs = whiten;

            // Vectors stored as rows.
            // Each row corresponds to a component.
            var random = new Random(57);
            var w = new Mat<double>(componentCount, componentCount);
            foreach (ref var value in w.Memory.Span)
            {
                value = 2 * random.NextDouble() - 1;
            }
            Console.WriteLine(w);

            Console.ReadKey();

            for (var ite = 0; ite < 30; ite++)
            {
                var svd = w.Svd();
                var u = svd.U;
                var sinv = svd.S.Select(s => 1 / s).ToDiagonalMatrix();
                var k = u * sinv * u.Transpose();
                w = k * w;
                Console.WriteLine(w);

                for (var c = 0; c < componentCount; c++)
                {
                    var wc = w.Rows[c];
                    var wxs = xs.Select(x => wc * x).ToArray();
                    var gwxs = wxs.Select(G).ToArray();
                    var gpwxs = wxs.Select(Gp).ToArray();
                    var e1 = xs.Zip(gwxs, (x, g) => x * g).Mean();
                    var e2 = gpwxs.Average();
                    var wc2 = e1 - e2 * wc;
                    wc2.CopyTo(wc);
                }
            }

            using (var writer = new StreamWriter("ica.csv"))
            {
                foreach (var y in xs.Select(x => w * x))
                {
                    writer.WriteLine(string.Join(',', y));
                }
            }
        }

        public void Transform(in Vec<double> source, in Vec<double> destination)
        {
        }

        /// <inheritdoc/>
        public int SourceDimension => pca.SourceDimension;

        /// <inheritdoc/>
        public int DestinationDimension => componentCount;

        private static double G(double value)
        {
            return Math.Tanh(value);
        }

        private static double Gp(double value)
        {
            var tanh = Math.Tanh(value);
            return 1 - tanh * tanh;
        }
    }
}
sinshu commented 5 months ago

収束判定 https://github.com/scikit-learn/scikit-learn/blob/329a1cf4bd46664fbf81806cde82a27f44b760e0/sklearn/decomposition/_fastica.py#L124

デフォルト値 tol=1e-4

sinshu commented 5 months ago

Python の実装では fit したベクトル集合を transform すると、次元ごとの二乗和が 1 になるような正規化が掛かる。そのため fit 対象の点数が多いほど transform したときのスケーリングは小さくなる。

sinshu commented 5 months ago

DONE 😁