pgf-tikz / pgfplots

pgfplots - A TeX package to draw normal and/or logarithmic plots directly in TeX in two and three dimensions with a user-friendly interface and pgfplotstable - a TeX package to round and format numerical tables. Examples in manuals and/or on web site.
http://pgfplots.sourceforge.net/
196 stars 35 forks source link

linear regression log mode doesn't correctly compute exponential regressions #352

Closed JasonGross closed 3 years ago

JasonGross commented 4 years ago

I suspect the issue is that linear regression in log mode doesn't apply log-scaling on the variances (and thus is drastically overweighting points closer to zero), but it should. Here's a test-case:

\documentclass{article}
\usepackage[margin=0.5in]{geometry}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.17}
\usepackage{gnuplottex}
\usepackage{pgfkeys}
\usepackage{pgfplotstable}
\usepackage{xparse}
\usepackage{xstring}
\makeatletter
% https://tex.stackexchange.com/a/50113/2066
\newcommand*{\IsInteger}[3]{%
    \IfStrEq{#1}{ }{%
        #3% is a blank string
    }{%
    \IfInteger{#1}{#2}{#3}%
}%
}%
\newcommand{\pgftognucolumnset}[2]{%
    \IsInteger{\pgfkeysvalueof{#1}}{%
        % pgf 0-indexes columns, while gnuplot 1-indexes columns, so we add 1 to adjust
        \edef#2{\the\numexpr\pgfkeysvalueof{#1}+1\relax}%
    }{%
    \edef#2{(column("\pgfkeysvalueof{#1}"))}%
}%
}
\makeatletter
% \addplotexponentialregression[params for \addplot][default settings for a and b, also for x and y columns]{table file}
\NewDocumentCommand{\addplotexponentialregression}{ O{no markers} o m}{%
    \pgfkeyssetvalue{/addplotexponentialregression/x}{0}
    \pgfkeyssetvalue{/addplotexponentialregression/y}{1}
    \pgfkeyssetvalue{/addplotexponentialregression/a}{1}
    \pgfkeyssetvalue{/addplotexponentialregression/b}{1}
    \pgfkeys{/addplotexponentialregression/.cd,#2}
    \pgftognucolumnset{/addplotexponentialregression/x}{\@addplotexponentialregression@colx}%
    \pgftognucolumnset{/addplotexponentialregression/y}{\@addplotexponentialregression@coly}%
    \edef\@addplotexponentialregression@inita{\pgfkeysvalueof{/addplotexponentialregression/a}}%
    \edef\@addplotexponentialregression@initb{\pgfkeysvalueof{/addplotexponentialregression/b}}%
    \addplot [#1] gnuplot [raw gnuplot] { % allows arbitrary gnuplot commands
        f(x) = a*exp(b*x);     % Define the function to fit
        % Set reasonable starting values here
        a=\@addplotexponentialregression@inita;
        b=\@addplotexponentialregression@initb;
        fit f(x) '#3' u \@addplotexponentialregression@colx:\@addplotexponentialregression@coly\space via a,b; % Select the file
        stats '#3' u \@addplotexponentialregression@colx;
        plot [x=STATS_min:STATS_max] f(x);    % Specify the range to plot
        set print "#3-parameters.dat";  % Open a file to save the parameters
        print a, b;                  % Write the parameters to file
    };
    \pgfplotstableread{#3-parameters.dat}\parameters % Open the file Gnuplot wrote
    \pgfplotstablegetelem{0}{0}\of\parameters \xdef\pgfplotstableregressiona{\pgfplotsretval} % Get first element, save into \pgfplotstableregressiona
    \pgfplotstablegetelem{0}{1}\of\parameters \xdef\pgfplotstableregressionb{\pgfplotsretval}
}
\makeatother
\begin{document}
    %python3 -c 'import math, random; fact0 = (lambda fact0, n: n * fact0(fact0, n - 1) if n > 0 else 1); fact = (lambda n: fact0(fact0, n)); print("\n".join(f"{n:<2} {3*n*n+n+1:<10} {2*math.exp(0.5*n):<20} {2*fact(n):<15} {math.exp(n)+int(random.uniform(0,100)):<15}" for n in range(1, 12)))'
    \begin{filecontents*}{data.dat}
        n  q          e                    f               eapprox
        1  5          3.2974425414002564   2               97.71828182845904
        2  15         5.43656365691809     4               101.38905609893065
        3  31         8.963378140676129    12              87.08553692318768
        4  53         14.7781121978613     48              92.59815003314424
        5  81         24.364987921406946   240             194.4131591025766
        6  115        40.171073846375336   1440            410.4287934927351
        7  155        66.23090391738462    10080           1174.6331584284585
        8  201        109.19630006628847   80640           3032.9579870417283
        9  253        180.03426260104362   725760          8198.083927575384
        10 311        296.8263182051532    7257600         22124.465794806718
        11 375        489.38386452844077   79833600        59973.14171519782
    \end{filecontents*}
    \begin{tikzpicture}
    \begin{axis}[xlabel=$x$,ylabel=$y$,legend pos=north west,axis lines=left]
    \addplot[only marks,color=black] table[x=n,y=eapprox]{data.dat};
    \addplotexponentialregression[no markers,black][x=n,y=eapprox]{data.dat};
    \edef\gnua{\pgfplotstableregressiona}
    \edef\gnub{\pgfplotstableregressionb}
    \addplot[no markers,red,thick] table [x=n,y={create col/linear regression={y=eapprox,ymode=log}}]
    {data.dat};
    \edef\ra{\pgfplotstableregressiona}
    \edef\rb{\pgfplotstableregressionb}
    \addlegendentry{data}
    \addlegendentry{
        $\pgfmathprintnumber{\gnua}e^{\pgfmathprintnumber{\gnub}x}$
    }
    \addlegendentry{
        $\pgfmathprintnumber{\ra}e^{\pgfmathprintnumber{\rb}x}$
    }
    \end{axis}
    \end{tikzpicture}
    \begin{tikzpicture}
    \begin{axis}[xlabel=$x$,ylabel=$y$,legend pos=north west,axis lines=left,ymode=log]
    \addplot[only marks,color=black] table[x=n,y=eapprox]{data.dat};
    \addplotexponentialregression[no markers,black][x=n,y=eapprox]{data.dat};
    \edef\gnua{\pgfplotstableregressiona}
    \edef\gnub{\pgfplotstableregressionb}
    \addplot[no markers,red,thick] table [x=n,y={create col/linear regression={y=eapprox,ymode=log}}]
    {data.dat};
    \edef\ra{\pgfplotstableregressiona}
    \edef\rb{\pgfplotstableregressionb}
    \addlegendentry{data}
    \addlegendentry{
        $\pgfmathprintnumber{\gnua}e^{\pgfmathprintnumber{\gnub}x}$
    }
    \addlegendentry{
        $\pgfmathprintnumber{\ra}e^{\pgfmathprintnumber{\rb}x}$
    }
    \end{axis}
    \end{tikzpicture}
\end{document}

image

JasonGross commented 3 years ago

With #386 and a slight adjustment to the code above, we get

\documentclass{article}
\usepackage[margin=0.5in]{geometry}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.16}
\usepackage{gnuplottex}
\usepackage{pgfkeys}
\usepackage{pgfplotstable}
\usepackage{xparse}
\usepackage{xstring}
\makeatletter
% https://tex.stackexchange.com/a/50113/2066
\newcommand*{\IsInteger}[3]{%
  \IfStrEq{#1}{ }{%
    #3% is a blank string
  }{%
    \IfInteger{#1}{#2}{#3}%
  }%
}%
\newcommand{\pgftognucolumnset}[2]{%
  \IsInteger{\pgfkeysvalueof{#1}}{%
    % pgf 0-indexes columns, while gnuplot 1-indexes columns, so we add 1 to adjust
    \edef#2{\the\numexpr\pgfkeysvalueof{#1}+1\relax}%
  }{%
    \edef#2{(column("\pgfkeysvalueof{#1}"))}%
  }%
}
\makeatletter
% \addplotexponentialregression[params for \addplot][default settings for a and b, also for x and y columns]{table file}
\NewDocumentCommand{\addplotexponentialregression}{ O{no markers} o m}{%
  \pgfkeyssetvalue{/addplotexponentialregression/x}{0}
  \pgfkeyssetvalue{/addplotexponentialregression/y}{1}
  \pgfkeyssetvalue{/addplotexponentialregression/a}{1}
  \pgfkeyssetvalue{/addplotexponentialregression/b}{1}
  \pgfkeys{/addplotexponentialregression/.cd,#2}
  \pgftognucolumnset{/addplotexponentialregression/x}{\@addplotexponentialregression@colx}%
  \pgftognucolumnset{/addplotexponentialregression/y}{\@addplotexponentialregression@coly}%
  \edef\@addplotexponentialregression@inita{\pgfkeysvalueof{/addplotexponentialregression/a}}%
  \edef\@addplotexponentialregression@initb{\pgfkeysvalueof{/addplotexponentialregression/b}}%
  \addplot [#1] gnuplot [raw gnuplot] { % allows arbitrary gnuplot commands
    f(x) = exp(a*x+b);     % Define the function to fit
    % Set reasonable starting values here
    a=\@addplotexponentialregression@inita;
    b=\@addplotexponentialregression@initb;
    fit f(x) '#3' u \@addplotexponentialregression@colx:\@addplotexponentialregression@coly\space via a,b; % Select the file
    stats '#3' u \@addplotexponentialregression@colx;
    plot [x=STATS_min:STATS_max] f(x);    % Specify the range to plot
    set print "#3-parameters.dat";  % Open a file to save the parameters
    print a, b;                  % Write the parameters to file
  };
  \pgfplotstableread{#3-parameters.dat}\parameters % Open the file Gnuplot wrote
  \pgfplotstablegetelem{0}{0}\of\parameters \xdef\pgfplotstableregressiona{\pgfplotsretval} % Get first element, save into \pgfplotstableregressiona
  \pgfplotstablegetelem{0}{1}\of\parameters \xdef\pgfplotstableregressionb{\pgfplotsretval}
}
\makeatother
\begin{document}
%python3 -c 'import math, random; fact0 = (lambda fact0, n: n * fact0(fact0, n - 1) if n > 0 else 1); fact = (lambda n: fact0(fact0, n)); print("\n".join(f"{n:<2} {3*n*n+n+1:<10} {2*math.exp(0.5*n):<20} {2*fact(n):<15} {math.exp(n)+int(random.uniform(0,100)):<15}" for n in range(1, 12)))'
\begin{filecontents*}{data.dat}
  n  q          e                    f               eapprox
  1  5          3.2974425414002564   2               97.71828182845904
  2  15         5.43656365691809     4               101.38905609893065
  3  31         8.963378140676129    12              87.08553692318768
  4  53         14.7781121978613     48              92.59815003314424
  5  81         24.364987921406946   240             194.4131591025766
  6  115        40.171073846375336   1440            410.4287934927351
  7  155        66.23090391738462    10080           1174.6331584284585
  8  201        109.19630006628847   80640           3032.9579870417283
  9  253        180.03426260104362   725760          8198.083927575384
  10 311        296.8263182051532    7257600         22124.465794806718
  11 375        489.38386452844077   79833600        59973.14171519782
\end{filecontents*}
\begin{tikzpicture}
  \begin{axis}[xlabel=$x$,ylabel=$y$,legend pos=north west,axis lines=left]
    \addplot[only marks,color=black] table[x=n,y=eapprox]{data.dat};
    \addplotexponentialregression[no markers,black][x=n,y=eapprox]{data.dat};
    \edef\gnua{\pgfplotstableregressiona}
    \edef\gnub{\pgfplotstableregressionb}
    \tracingcommands=1\relax
    \tracingmacros=1\relax
    \addplot[no markers,red,thick,smooth] table [x=n,y={create col/linear regression={y=eapprox,ymode=log}}]
            {data.dat};
            \edef\ra{\pgfplotstableregressiona}
            \edef\rb{\pgfplotstableregressionb}
            \addlegendentry{data}
            \addlegendentry{
              $e^{\pgfmathprintnumber{\gnua}x + \pgfmathprintnumber{\gnub}}$
            }
            \addlegendentry{
              $e^{\pgfmathprintnumber{\ra}x + \pgfmathprintnumber{\rb}}$
            }
  \end{axis}
\end{tikzpicture}
\begin{tikzpicture}
  \begin{axis}[xlabel=$x$,ylabel=$y$,legend pos=north west,axis lines=left,ymode=log]
    \addplot[only marks,color=black] table[x=n,y=eapprox]{data.dat};
    \addplotexponentialregression[no markers,black][x=n,y=eapprox]{data.dat};
    \edef\gnua{\pgfplotstableregressiona}
    \edef\gnub{\pgfplotstableregressionb}
    \addplot[no markers,red,thick] table [x=n,y={create col/linear regression={y=eapprox,ymode=log}}]
            {data.dat};
            \edef\ra{\pgfplotstableregressiona}
            \edef\rb{\pgfplotstableregressionb}
            \addlegendentry{data}
            \addlegendentry{
              $e^{\pgfmathprintnumber{\gnua}x + \pgfmathprintnumber{\gnub}}$
            }
            \addlegendentry{
              $e^{\pgfmathprintnumber{\ra}x + \pgfmathprintnumber{\rb}}$
            }
  \end{axis}
\end{tikzpicture}
\end{document}

image

which is not perfect, but is much, much better