amv-dev / yata

Yet Another Technical Analysis library [for Rust]
Apache License 2.0
321 stars 49 forks source link

Questionable accuracy #40

Open YaTaUser opened 7 months ago

YaTaUser commented 7 months ago

When inspecting some suspect behavior in my application it turned out moving average values depend on the calculation length. For example, calculating an EMA20 over 500 numbers gives a different last moving average value than calculating the same average over only its last 200 numbers.

Being a programmer, I wrote some code to investigate this:

use yata::core::{ Method, MovingAverageConstructor };
use yata::helpers::MA;
use std::str::FromStr;

fn main() {
  let prices = [
    195.78, 196.10, 196.48, 196.96, 194.46, 185.14, 182.48, 185.10, 189.12, 188.98,
    191.88, 191.34, 191.74, 197.80, 190.50, 192.62, 190.60, 190.06, 188.98, 190.50,
    189.36, 188.48, 188.50, 195.32, 195.80, 199.00, 193.50, 189.40, 184.70, 192.30,
    196.30, 206.30, 205.50, 199.46, 194.28, 194.76, 194.88, 192.50, 187.54, 187.08,
    189.70, 187.50, 185.90, 185.00, 185.12, 187.78, 188.98, 186.64, 187.00, 185.14,
    181.48, 177.22, 175.28, 171.94, 167.22, 165.50, 162.50, 164.70, 165.68, 169.26,
    167.46, 184.34, 184.90, 181.64, 181.30, 182.82, 182.50, 182.40, 181.20, 170.60,
    174.24, 174.28, 176.66, 176.90, 178.78, 178.76, 177.00, 178.00, 181.84, 187.28,
    187.30, 189.72, 189.00, 189.00, 189.90, 187.80, 190.64, 194.74, 191.10, 189.04,
    191.62, 188.00, 185.50, 178.70, 179.50, 179.90, 186.82, 185.66, 184.02, 186.02,
    183.22, 182.26, 177.04, 175.96, 179.00, 188.14, 184.20, 181.90, 182.10, 187.82,
    186.38, 184.12, 181.50, 168.50, 189.16, 176.00, 181.76, 179.50, 176.72, 162.00,
    159.04, 151.00, 139.00, 132.82, 142.60, 151.50, 144.00, 154.00, 148.20, 157.66,
    158.90, 155.38, 153.18, 154.52, 156.74, 152.52, 152.62, 151.58, 154.50, 159.46,
    160.88, 157.64, 158.92, 160.30, 156.08, 153.02, 152.78, 151.48, 148.80, 150.00,
    151.68, 148.50, 154.06, 154.72, 149.46, 151.22, 147.80, 148.58, 149.00, 148.64,
    151.52, 152.52, 145.88, 145.18, 144.56, 144.10, 143.60, 146.18, 146.82, 144.02,
    146.48, 148.00, 146.50, 148.30, 154.50, 155.88, 160.18, 158.44, 156.84, 158.70,
    158.12, 156.30, 148.32, 146.84, 146.30, 146.80, 143.48, 142.10, 144.20, 144.10,
    143.86, 140.00, 140.18, 140.50, 138.30, 133.00, 126.10, 129.02, 127.98, 122.74,
    122.48, 126.88, 130.18, 129.58, 127.82, 127.42, 133.74, 134.70, 142.28, 139.92,
    135.50, 132.50, 131.90, 131.06, 133.00, 137.40, 138.44, 137.80, 140.12, 144.48,
    143.80, 145.00, 145.30, 142.00, 147.14, 147.50, 148.60, 150.40, 150.92, 147.30,
    147.54, 145.00, 138.02, 141.42, 142.80, 142.68, 139.80, 143.30, 148.64, 137.48,
    142.32, 145.00, 144.94, 147.00, 149.50, 146.50, 149.28, 151.34, 149.00, 150.98,
    147.50, 147.38, 148.38, 145.40, 145.44, 147.80, 139.32, 139.90, 135.00, 138.80,
    129.62, 125.90, 129.20, 130.62, 130.48, 131.82, 127.84, 123.44, 123.62, 122.60,
    126.90, 126.50, 130.48, 131.50, 129.22, 129.34, 131.00, 131.44, 131.00, 130.56,
    127.50, 129.04, 130.88, 132.50, 129.62, 128.52, 133.02, 137.36, 135.60, 134.46,
    140.92, 143.90, 144.00, 142.96, 138.12, 140.06, 141.02, 139.00, 140.20, 137.78,
    137.28, 135.80, 135.86, 138.60, 141.22, 138.38, 140.00, 140.88, 139.18, 136.10,
    137.00, 137.20, 136.24, 137.16, 136.44, 133.76, 121.10, 121.50, 119.82, 120.18,
    115.04, 117.06, 115.88, 114.96, 116.04, 117.20, 120.04, 122.56, 126.00, 127.42,
    129.62, 127.50, 127.80, 128.32, 127.82, 125.72, 126.00, 125.50, 124.06, 124.42,
    125.40, 125.44, 123.76, 125.06, 126.20, 126.00, 127.20, 129.00, 132.00, 131.20,
    131.32, 130.50, 129.60, 130.00, 128.76, 129.16, 130.60, 131.98, 130.30, 132.46,
    132.50, 130.80, 131.76, 132.56, 128.32, 129.00, 130.10, 128.00, 127.82, 142.20,
    139.32, 138.76, 140.24, 133.92, 134.70, 128.70, 129.40, 125.56, 122.00, 120.20,
    121.24, 123.36, 123.50, 120.12, 120.48, 122.60, 122.50, 123.50, 125.60, 126.20,
    126.50, 125.68, 124.14, 125.50, 125.50, 126.84, 128.66, 127.54, 126.26, 124.28,
    122.24, 122.20, 121.96, 120.32, 121.50, 123.00, 124.20, 123.44, 125.12, 125.52,
    127.64, 127.56, 127.26, 120.00, 119.98, 119.66, 118.12, 116.18, 118.24, 120.80,
    120.20, 120.52, 119.72, 118.82, 120.40, 120.02, 118.20, 119.08, 121.72, 122.38,
    123.90, 124.70, 126.02, 130.00, 131.34, 131.10, 130.46, 128.30, 127.50, 126.58,
    123.84, 123.04, 122.52, 123.00, 120.98, 122.72, 123.28, 123.94, 124.00, 122.90,
    123.08, 121.00, 122.16, 122.52, 123.30, 123.40, 123.64, 122.44, 122.14, 123.02,
    123.50, 123.90, 124.62, 126.08, 125.60, 121.50, 120.00, 121.26, 120.66, 118.80,
    118.50, 119.44, 117.92, 118.28, 117.84, 117.48, 116.78, 116.90, 115.50, 115.00,
    113.98, 114.08, 115.12, 114.02, 114.26, 111.82, 112.82, 113.30, 114.16, 113.70,
    110.48, 108.60, 107.56, 108.00, 107.00, 106.72, 107.74, 108.78, 108.66, 108.38,
    109.34, 109.50, 109.20, 109.52, 111.30, 112.10, 113.90, 113.04, 109.62, 108.40,
    108.96, 109.46, 108.30, 106.22, 107.12, 106.54, 105.90, 106.44, 107.86, 110.00,
    108.70, 107.68, 107.92, 108.80, 106.00, 104.46, 102.76, 102.14, 102.12,  99.87,
    100.06,  99.52,  99.40, 100.56, 101.00, 103.98, 105.50, 104.32, 103.70, 104.50,
    103.96, 104.02, 105.58, 108.48, 109.66, 108.30, 109.60, 108.74, 107.08, 107.00,
    106.06, 107.00, 105.50, 104.26, 107.38, 107.20, 106.92, 107.36, 110.42, 113.74,
    113.00, 115.02, 116.40, 115.36, 116.50, 116.42, 115.10, 114.22, 114.30, 112.20,
    112.26, 112.36, 112.62, 112.06, 112.32, 112.86, 111.42, 111.96, 112.50, 114.20,
    113.30, 113.98, 113.30, 112.18, 111.16, 109.94, 108.62, 108.06, 107.50, 108.30,
    115.68, 115.64, 114.90, 115.00, 116.38, 116.74, 119.66, 119.16, 120.14, 119.68,
  ];

  for ma_name in [
    "sma", "wma", "ema", "dma", "tma", "dema", "tema", "smm", "linreg",
    "swma", "hma", "rma", "trima", "wsma", "vidya"
  ] {
    for ma_length in [10, 20, 100] {
      println!("{} {}:", ma_name, ma_length);
      for calc_length in [10, 20, 100, 400] {
        let mut ma_result = Vec::new();
        let price_pointer = prices.len() - calc_length - ma_length - 1;
        let mut ma = MA::from_str(&format!("{}-{}", ma_name, ma_length))
                     .unwrap().init(prices[price_pointer]).unwrap();
        ma_result.push(prices[price_pointer]);
        for price_pointer in price_pointer .. prices.len() {
          ma_result.push(ma.next(&prices[price_pointer]));
        }

        let result_len = ma_result.len();
        println!(
          "{:3.4} {:3.4} {:3.4} {:3.4} {:3.4}",
          ma_result[result_len - 5],
          ma_result[result_len - 4],
          ma_result[result_len - 3],
          ma_result[result_len - 2],
          ma_result[result_len - 1]
        );
      }
    }
  }
}

Lo' and behold, this code confirms my observation. The list this code prints out shows good accuracy for some moving averages ...

sma 10:
112.6820 113.7860 114.8960 116.1600 117.2980
112.6820 113.7860 114.8960 116.1600 117.2980
112.6820 113.7860 114.8960 116.1600 117.2980
112.6820 113.7860 114.8960 116.1600 117.2980

... still reasonable accuracy for others ...

wma 20:
113.0019 113.6802 114.2716 114.9221 115.4924
113.0018 113.6801 114.2715 114.9220 115.4923
113.0015 113.6798 114.2712 114.9217 115.4920
112.9991 113.6774 114.2688 114.9193 115.4896

... and not really usable numbers (deviation > 1%) for some:

ema 100:
110.8011 110.9766 111.1386 111.3169 111.4825
110.9291 111.1020 111.2615 111.4373 111.6006
111.6408 111.7996 111.9454 112.1076 112.2576
111.7380 111.8949 112.0388 112.1992 112.3473
[...]
vidya 20:
112.1891 112.4278 112.6161 112.8354 112.9860
113.1411 113.3494 113.5119 113.7051 113.8366
110.5090 110.8014 111.0352 111.3006 111.4850
110.9301 111.2091 111.4314 111.6853 111.8612

My current working hypothesis is that these mathematical shortcuts taken when calculating moving averages sum up numerical inaccuracies, thousands of tiny errors end up in substantial ones. While calculating the next value from the previous one is certainly faster, accurate results would require to calculate "fresh" values from values in the moving average window, ignoring the previous moving average value.

amv-dev commented 7 months ago

Technically speaking, exponential moving average family uses the whole historical values you provide and does not have a fixed size window (like SMA, WMA, etc do). Something similar happening with Vidaya, which holds it's force coefficient for the entire history. So there is nothing wrong in this kind of behavior.

YaTaUser commented 7 months ago

My impression of this crate is, it's less for demonstrating mathematical exercises, but for giving reliable and usable moving averages. For example, this is Vidya-100 over 200 candles (last 100 shown, last value ~15'830):

Screenshot 2024-02-07 at 16-08-45 Eispickel

The same over entire DAX history (currently 7640 candles), last value is ~14'540:

Screenshot 2024-02-07 at 16-19-39 Eispickel

Yahoo! Finance reports a latest value of that moving average of 16'235:

image

While I think I understand the technical strategy, the above findings don't look like I can draw reliable conclusions from the graphs calculated. Which means, math algorithms need adjustments.

amv-dev commented 6 months ago

I don't know what do you mean by "reliable and usable moving averages", but all the methods are pure mathematical operations with all the pros and cons of this fact. You may like it or not, but there is nothing to change. There is nothing to "adjust". Also I don't know what's the algorithm behind Yahoo finances. Here is two different VIDYAs from TradingView on all the data they have. As you can see both lines are pretty close to what is drawn on your second chart. You can also investigate their implementation and Yatas implementation. tradingview

YaTaUser commented 6 months ago

Perhaps we can at least agree that YaTa is typically used for analysis of trading prices. Here "reliable" means that calculated averages depend only on prices in history (and sometimes trading volume), not on unrelated factors like the length of price history looked at.

Another thing is that many market participants look at such moving averages for their analysis and following on that, their trading decisions. Which means, a moving average calculated by YaTa should be the same as what other analysis softwares do. If professional trader Smith at Goldman Sachs sees EMA200 at a particular price, you and me should see the very same. This way these averages get useful.

Yahoo calculates averages right in the browser. I tried to decipher that, but it's heavily obfuscated. A pity, because that is the only software I've seen so far which calculates a Vidya which makes sense. By nature, moving averages have crossing points with the not averaged price, which implementations of TradingView and YaTa don't.

Looking around even more, TA-Lib appears to be kind of a gold standard for technical analysis. It does have extra code for dealing with different history lengths:

4.2 Unstable Period

Some TA functions provides different results depending of the "starting point" of the data being involve. This is often referred as a function having memories. An example of such function is the Exponential Moving Average. It is possible to control the unstable period (the amount of data to strip off) with _TASetUnstablePeriod and _TAGetUnstablePeriod.

I've yet to find out what these two functions do.

Notably, TA-Lib features a whole lot of moving averages, but not Vidya. It does support TRIMA and EMA, so I currently try to get the above test code working with that library, in order to compare results.

amv-dev commented 6 months ago

Perhaps we can at least agree that YaTa is typically used for analysis of trading prices.

No. I personally use moving averages for other technical purposes, that does not have any connection with trading at all. Moving average itself is a pure mathematical construction, which has nothing related to economics or trading. Therefore it does not give any guarantees except mathematical consistency.

Here "reliable" means that calculated averages depend only on prices in history (and sometimes trading volume), not on unrelated factors like the length of price history looked at.

This is totally wrong. The core idea behind EMA is to use all historical data:

An exponential moving average (EMA), also known as an exponentially weighted moving average is a first-order infinite impulse response filter that applies weighting factors which decrease exponentially. The weighting for each older datum decreases exponentially, never reaching zero.

So what is unstable period for this kind of MA? It depends on target precision you need. For someone it's enough 5%, for others - 0.1%.

Again this library gives no guarantees, that values calculated using Yata will be exactly the same, as somewhere else, because there are many other factors, that may affect on final result.

For example, what values are used, when there is no actual values in the history? Zero? Previous value? NaN? Or period just skipped? Each variant may (and with EMA-family will) give different results. Again, what values are used before the very first historical value? In Yata it is guaranteed, that each method and indicator built with concept, that previous values is an infinite series of values equal to first historical value (that's why you need to provide first values when building method or indicator). Other libraries may give other guarantees. Some of them use zero values in such cases (AFAIK, TradingView). Some MAs have fixed unstable period equal to it's length (SMA, WMA, MMA, etc). So this question is not so important for them. But again, EMA uses all the history by it's definition. Calculation of unstable period for this kind of methods/indicators is not covered by this library (yet).

YaTaUser commented 6 months ago

Again this library gives no guarantees, that values calculated using Yata will be exactly the same, as somewhere else

Somehow you try to explain this crate is useless. Even in the technical world users usually want matching results across different ways of computing data by particular algorithms.

Anyways, here's code for doing moving averages with both, YaTa in Rust and TA-Lib in C, in the same folder:

ta-lib-test.tar.gz

The bolder among us unpack it and just run ./compare.sh. Else doing make and cargo build separately and fixing what's missing is a better idea.

About that _TASetUnstablePeriod mentioned earlier, it's described here: https://ta-lib.github.io/d_api/ta_setunstableperiod.html It describes what the issue with EMA/DEMA/TEMA is and how one can set the unstable period for these averages. TA-Lib skips results during the unstable period, one gets only sufficiently stable results.

I was about to write an evaluation of comparison results. As you now stated correctness doesn't matter here, that's probably pointless.

amv-dev commented 6 months ago

About that _TASetUnstablePeriod mentioned earlier, it's described here: https://ta-lib.github.io/d_api/ta_setunstableperiod.html It describes what the issue with EMA/DEMA/TEMA is and how one can set the unstable period for these averages. TA-Lib skips results during the unstable period, one gets only sufficiently stable results.

Now after I told you three times right about the same thing you found it somewhere else. Great.

So, what is your actual suggestions? You want me to add another skip implementation just because... it was made on C lib?

I was about to write an evaluation of comparison results. As you now stated correctness doesn't matter here, that's probably pointless.

I didn't tell that.

n0tgod commented 5 months ago

@amv-dev Hi! How can I contact you?

amv-dev commented 5 months ago

@amv-dev Hi! How can I contact you?

you can write a letter to amv-dev@protonmail.com