Open julienrueter opened 6 years ago
import numpy as np
import pylab
# Daily Bitcoin price last 365 days from today
btc_price = np.array([901.794, 888.635, 890.932, 922.625, 930.491, 923.679, 909.375, 898.577, 906.981, 920.549, 922.622, 922.771, 921.125, 941.198, 974.957, 993.804, 1013.64, 1025.74, 1034.65, 1039.16, 1058.07, 1050.5, 1074.72, 970.852, 1002.01, 1004.24, 992.074, 1003.21, 1003.65, 1017.93, 1034.6, 1056.87, 1044.41, 1055.49, 1103.93, 1107.3, 1138.92, 1159.65, 1170.05, 1160.19, 1166.51, 1175.4, 1194.09, 1224.9, 1279.5, 1266.24, 1252.98, 1273.7, 1219.51, 1203.96, 1172.79, 1192.81, 1184.95, 1179.9, 1235.38, 1232.11, 1251.13, 1227.21, 1159.92, 1088.41, 1007.05, 1056.94, 1098.27, 1065.67, 1055.08, 982.209, 925.862, 982.121, 1011.44, 1064.16, 1053.31, 1025.39, 1041.74, 1083.86, 1082.16, 1128.58, 1135.66, 1124.41, 1159.78, 1173.72, 1183.98, 1178.89, 1186.64, 1196.99, 1198.42, 1185.01, 1163.22, 1185.53, 1177.18, 1173.4, 1216.91, 1208.31, 1216.64, 1224.21, 1218.76, 1220.78, 1222.4, 1261.74, 1277.44, 1293.52, 1310.59, 1315.48, 1316.88, 1372.39, 1466.32, 1465.37, 1543.12, 1606.32, 1570.62, 1569.09, 1661.17, 1827.47, 1741.45, 1838.73, 1809.64, 1762.47, 1802.03, 1756.81, 1742.49, 1826.54, 1839.17, 1957.02, 1994.14, 2058.74, 2212.54, 2258.93, 2458.2, 2729.71, 2457.24, 2007.24, 2234.27, 2197.8, 2286.44, 2201.21, 2435.48, 2412.8, 2505.07, 2526.52, 2576.8, 2898.3, 2838.14, 2749.44, 2850.36, 2796.87, 2914.06, 2794.05, 2736.73, 2608.5, 2264.09, 2480.87, 2592.96, 2561.17, 2584.1, 2643.21, 2729.53, 2712.12, 2744.13, 2694.57, 2658.48, 2520.31, 2520.75, 2532.09, 2549.52, 2508.69, 2435.93, 2494.07, 2613.7, 2557.76, 2581.69, 2559.6, 2514.31, 2579.24, 2496.37, 2404.25, 2410.17, 2348.28, 2195.55, 2042.68, 1892.59, 2178.64, 2352.79, 2280.52, 2699.62, 2645.96, 2785.17, 2758.94, 2757.5, 2571.07, 2527.5, 2671.57, 2809.01, 2729.69, 2752.85, 2872.31, 2713.63, 2711.93, 2802.24, 2899.33, 3253.94, 3218.02, 3374.01, 3419.01, 3345.37, 3391.23, 3654.33, 3883.17, 4062.87, 4306.43, 4181.96, 4374.59, 4349.1, 4128.86, 4193.7, 4087.66, 4001.74, 4100.52, 4151.52, 4334.68, 4371.6, 4352.4, 4383.28, 4387.51, 4555.86, 4557.24, 4718.37, 4910.47, 4631.97, 4539.47, 4157.95, 4453.47, 4533.17, 4575.35, 4145.38, 4203.61, 4209.58, 4207.6, 4079.7, 3912.01, 3343.47, 3716.3, 3564.06, 3645.36, 4000.31, 3912.91, 3858.69, 3670.53, 3608.36, 3782.44, 3700.48, 3935.72, 3909.23, 4162.55, 4170.8, 4212.62, 4335.09, 4417.06, 4420.15, 4312.44, 4247.14, 4320.53, 4362.94, 4431.99, 4614.8, 4826.02, 4778.94, 4846.95, 5509.22, 5618.17, 5835.97, 5669.7, 5606.32, 5521.39, 5634.66, 5697.31, 6074.33, 6058.24, 6040.02, 5696.78, 5430.46, 5745.22, 5956.62, 5840.19, 5739.39, 6129.01, 6144.72, 6423.43, 6883.19, 7042.21, 7073.37, 7404.87, 7384.71, 7119.88, 7331.84, 7391.52, 7276.36, 6837.83, 6172.27, 5959.35, 6689.06, 6796.43, 7203.09, 7894.08, 7542.92, 7841.41, 8012.17, 8221.34, 8126.75, 8231.85, 7953.04, 8208.21, 8841.21, 9671.47, 9750.23, 10271.1, 10618.3, 9895.34, 11157.8, 11054.5, 11420.8, 11716.3, 12269.2, 14286.0, 17839.9, 16065.0, 14228.7, 16556.2, 17059.3, 16855.2, 16255.9, 17766.3, 17672.2, 19345.6, 18621.3, 19053.0, 17040.4, 16674.1, 14895.1, 14427.6, 13732.5, 13565.8, 14140.3, 16282.9, 14750.1, 15167.8, 13538.0, 13529.8, 13760.1, 13795.7, 15288.6, 15396.8, 15809.6, 17355.6, 17151.9, 16141.8, 15362.3, 14434.0, 14521.7, 13608.5, 14285.9, 14360.4, 13747.8, 13390.0, 10954.9, 11887.5, 11644.1])
# Change in price of Bitcoin at time t: price[t]/price[t-1]
change_in_price = btc_price[1:]/btc_price[:-1]
# Assumption: Log of price change can be described through Normal distribution
log_price_change = np.log(change_in_price)
def gauss(x,m,sig):
#gaussian distribution; m = Mean, sig = Variance
y = np.exp(-(x-m)**2/(2*sig))/(2*np.pi*sig)**0.5
return y
def simple_model(m,sig,log_price_change):
# get a random Sample of Gaussian: needs mean, standard deviation and sample length
random_sample = np.random.normal(m,sig**0.5,len(log_price_change))
return random_sample
def calculate_scores(log_price_change,random_sample):
# Matrix for score calculation
mat = np.zeros((2,2))
for i in range(len(random_sample)):
if random_sample[i]>0:
if log_price_change[i] > 0:
mat[0,0] += 1
else:
mat[0,1] += 1
else:
if log_price_change[i] > 0:
mat[1,0] += 1
else:
mat[1,1] += 1
precision = mat[0,0]/(mat[0,0]+mat[0,1])
recall = mat[0,0]/(mat[0,0]+mat[1,0])
f1_score = 2*precision*recall/(precision+recall)
return precision,recall,f1_score
# Data range
x = np.linspace(min(log_price_change)-0.1,max(log_price_change)+0.1,100)
# Mean of log_price_change
mean_log_price_change = np.mean(log_price_change)
# Variance of log_price_change
var_log_price_change = np.var(log_price_change)
# Gaussian distribution
distribution = gauss(x,mean_log_price_change,var_log_price_change)
# Plot Gaussian vs Histogram plot of Real Data
pylab.hist(log_price_change,20,normed=True,label="Hist of Log Price Change")
pylab.plot(x,distribution,label="Gaussian")
pylab.legend()
pylab.show()
# plot simple_model vs Real Data | these kind of plots are a bit misleading because the Error of the prediction one timestep before is erased after each prediction
random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
pylab.subplot(211)
pylab.plot(btc_price[1:],label="Bitcoin Price")
# reverse log of model data and multiply by price at time t-1
pylab.plot(btc_price[:-1]*np.exp(random_sample),label="Model Price")
pylab.legend()
# log of change in price vs model | A more accurate graphical representation of the accuracy of the prediction
pylab.subplot(212)
pylab.plot(log_price_change,label="Log Price Change")
pylab.plot(random_sample,label="Model Sample Data")
pylab.legend()
pylab.show()
# Calculate Scores
template = "Precision: %.4f | Recall: %.4f | F1 Score: %.4f"
scores = calculate_scores(log_price_change,random_sample)
print(template%scores)
print("-"*20)
# Now Test multiple Samples to get a range of Scores
for i in range(10):
random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
scores = calculate_scores(log_price_change,random_sample)
print("Sample",i,template%scores)
# Build mean of scores
template_mean = "Mean Precision: %.4f | Mean Recall: %.4f | Mean F1 Score: %.4f"
template_std = "STD Precision: %.4f | STD Recall: %.4f | STD F1 Score: %.4f"
sample_scores = []
for i in range(1000):
random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
scores = calculate_scores(log_price_change,random_sample)
sample_scores.append([j for j in scores])
sample_scores = np.array(sample_scores)
print(template_mean%tuple(np.mean(sample_scores,axis=0).tolist()))
print(template_std%tuple(np.std(sample_scores,axis=0).tolist()))
# Sample Prediction with accumulating Error of Model vs Reality
prediction = [btc_price[0]]
for i in np.exp(random_sample):
prediction.append(i*prediction[-1])
pylab.plot(btc_price,label="Bitcoin Price")
pylab.plot(prediction,label="Model Price with accumulating Error")
pylab.legend()
pylab.show()
Hi Siraj, I've experimented with RNN/NN/CNN/.. and crypto price predictions in the past and most of the time these models just learned the global statistical distribution of the dataset, because in the short term the random walk model only applies and nothing more. To illustrate this, I build a simple python script which just samples the Data according to a Gaussian distribution and pulls some samples from it to evaluate this simple price prediction Model.
The result is that this simple "Model" has a similar (in the interval of mean+2*std) Precision,Recall and F1 Score as the RNN with just the Price as Input and nothing else. Best regards, Julien
Result Simple Gaussian Model: Mean Precision: 0.5823 | Mean Recall: 0.5561 | Mean F1 Score: 0.5686 STD Precision: 0.0227 | STD Recall: 0.0348 | STD F1 Score: 0.0266