Open nassuphis opened 6 years ago
to produce the above using ggplot2's facet_grid(), we need the postition information to be available for every (date,pair,stock,value) tuple.
if we have a long-format dataset, wei might convert it into wide as follows:
ext_ptf<-ptf[,
c(
list(stat=stat,value=value),
data.table(matrix(value,nrow=1,dimnames=list(NULL,stat))[rep(1,length(stat)),])
),
keyby=c("date","stock")
][order(date)]
and ptf is the result of melting 3 matrices: positions, pnl, tret:
all<-data.table(
date=as.Date(rownames(positions),format="%Y-%m-%d"),
e=apply(positions,2,function(x)rescale(x,from=c(-1,1)*max(abs(x)),c(-1,1))),
p=apply(pnl,2,function(x)rescale(cumsum(x),from=c(-1,1)*max(abs(cumsum(x))),c(-1,1))),
r=apply(tret,2,function(x)rescale(cumprod(1+x),c(-1,1)))
)
all_groups<-list(
position=paste0("e.",colnames(positions)),
pnl=paste0("p.",colnames(pnl)),
price=paste0("r.",colnames(tret))
)
ptf<-melt(
melt(data=all,id.vars="date",measure.vars=all_groups,variable.name = "stock"),
id.vars=c("date","stock"),measure.vars=names(all_groups),variable.name = "stat"
)[,.(
date=date,
stock=colnames(positions)[as.integer(stock)],
stat=stat,
value=value
)]
data.table
will concatenate matrices and prefix column names with names assignment in function call:
all<-data.table(
date=as.Date(rownames(positions),format="%Y-%m-%d"),
e=apply(positions,2,function(x)rescale(x,from=c(-1,1)*max(abs(x)),c(-1,1))),
p=apply(pnl,2,function(x)rescale(cumsum(x),from=c(-1,1)*max(abs(cumsum(x))),c(-1,1))),
r=apply(tret,2,function(x)rescale(cumprod(1+x),c(-1,1)))
)
all three matrices have the same column names, we get these prefixed with e
, p
, r
in the
above case.
note that re-scaling can only be done conveniently in matrix format. the reason for this is we can apply column-wise vector operations to scale numbers well.
and melt will convert column names into factors:
ptf<-melt(
melt(data=all,id.vars="date",measure.vars=all_groups,variable.name = "stock"),
id.vars=c("date","stock"),measure.vars=names(all_groups),variable.name = "stat"
)[,.(
date=date,
stock=colnames(positions)[as.integer(stock)],
stat=stat,
value=value
)]
a better way would be to have 2 groups, one over all factors and another with a repetition of the common factor we want to see:
dt<-data.table(
x=paste0("x",1:100),
y=paste0("y",1:100),
w=runif(100),
v=runif(100),
u=runif(100)
)
m1<-melt(dt,id.vars=c("x","y"),measure.vars=list(set1=c("w","v"),set2=c("u","u")))
> m1<-melt(dt,id.vars=c("x","y"),measure.vars=list(set1=c("w","v"),set2=c("u","u")))
> m1
x y variable set1 set2
1: x1 y1 1 0.88648912 0.8678249
2: x2 y2 1 0.49355149 0.5756011
3: x3 y3 1 0.63612888 0.5217135
4: x4 y4 1 0.04769907 0.2821188
5: x5 y5 1 0.65018871 0.1497894
---
196: x96 y96 2 0.10684100 0.6873902
197: x97 y97 2 0.67042024 0.2436080
198: x98 y98 2 0.09169133 0.5508345
199: x99 y99 2 0.51975673 0.3041294
200: x100 y100 2 0.18278881 0.1893799
> m1[x=="x1" & y=="y1"]
x y variable set1 set2
1: x1 y1 1 0.8864891 0.8678249
2: x1 y1 2 0.6699675 0.8678249
for the portfolio data:
> names(ptf)
[1] "date" "bucket" "security" "security_units" "market_value" "open"
[7] "close" "high" "low" "tret" "ticker"
melt used to get stat and a common position across all:
m2<-melt(ptf,id.vars=c("date","bucket","ticker"),measure.vars=list(
stat=c("market_value","close","tret"),
pos=rep("security_units",3)
))
> m2
date bucket ticker variable stat pos
1: 2017-01-02 ABC_PAIR_03 SXNP 1 -5.046934e+05 -1355
2: 2017-01-02 ABC_PAIR_06 CTEC LN 1 -7.644911e+05 -327125
3: 2017-01-02 ABC_PAIR_07 MC 1 -1.107384e+06 -7200
4: 2017-01-02 ABC_PAIR_07 RI 1 6.928331e+05 7800
5: 2017-01-02 AC_PAIR_53 RDSA 1 1.008752e+07 452609
---
401627: 2018-05-01 MC_PAIR_53 ELTA 3 1.144600e-02 163672
401628: 2018-05-01 MC_PAIR_79 BDEV 3 1.182800e-02 -75000
401629: 2018-05-01 MC_PAIR_79 CSP 3 1.336300e-02 319391
401630: 2018-05-01 MC_PAIR_79 MCS 3 -2.216000e-03 -192001
401631: 2018-05-01 MC_PAIR_96 RWI 3 9.777000e-03 431406
securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")
ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])
scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))
and the plot would look like this:
g1 <- melt(
ptf[bucket=="GJ_PAIR_19"][TRUE,.(
date=date,
bucket=bucket,
security_units=security_units,
market_value=scale2unit(market_value),
close=scale2unit(close),
tret=scale2unit(cumsum(market_value*tret))
),keyby=ticker],
id.vars=c("date","bucket","ticker"),
measure.vars=list(
what=c("market_value","close","tret"),
pos=rep("security_units",3)
),
variable.name="select"
) %>% ggplot() +
ylim(-1.5, 1.5) +
geom_line(
mapping=aes(
x=date,
y=what,
group=interaction(ticker,select),
color=ticker
),
size=1,
show.legend=FALSE
)+
scale_x_date(labels = date_format("%Y-%m-%d")) +
theme(
axis.text.x=element_text(angle=50,size=5,vjust=0.5),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.text.y = element_text(size = 6,angle=90)
)+
facet_grid(
cols=vars(select),
rows=vars(ticker),
labeller=labeller(
select=setNames(
c("market_value","close","tret"),
order(c("market_value","close","tret"))
)
)
)
set to
securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")
ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])
scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))
df<-ptf[bucket=="GJ_PAIR_19"][TRUE,.(
date=date,
bucket=bucket,
security_units=scrub(security_units),
market_value=scale2unit(scrub(market_value)),
close=scale2unit(close),
tret=scale2unit(cumsum(scrub(market_value)*scrub(tret)))
),keyby=ticker]
dfm <- melt(
df,
id.vars=c("date","bucket","ticker"),
measure.vars=list(
what=c("market_value","close","tret"),
pos=rep("security_units",3)
),
variable.name="select"
)
this data: dfm.zip
and this code:
g1 <- dfm %>% ggplot() +
ylim(-1.5, 1.5) +
geom_line(
mapping=aes(
x=date,
y=what,
group=interaction(ticker,select),
color=ticker
),
size=1,
show.legend=FALSE
)+
scale_x_date(labels = date_format("%Y-%m-%d")) +
theme(
axis.text.x=element_text(angle=50,size=5,vjust=0.5),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.text.y = element_text(size = 6,angle=90)
)+
facet_grid(
cols=vars(select),
rows=vars(ticker),
labeller=labeller(
select=setNames(
c("market_value","close","tret"),
order(c("market_value","close","tret"))
)
)
)
produces this plot:
there are some issues with this: the DAX data looks wrong:
the market value for DAX is positive on the plot but this is a a hedge, and hence short, so -ve market value.
the issue was "select" ids are in the sequence they were specified, so
g1+facet_grid(
cols=vars(select),
rows=vars(ticker),
labeller=labeller(
select=setNames(
c("market_value","close","tret"),
c("1","2","3")
)
)
)
and ptf contents are not sorted by date, so cummulative sums dont work.
df<-ptf[bucket=="GJ_PAIR_19"][TRUE,.(
date=sort(date),
bucket=bucket,
security_units=scrub(security_units)[order(date)],
market_value=scale2unit(scrub(market_value))[order(date)],
close=rescale(close[order(date)],to=c(-1,1)),
tret=local({
i<-order(date)
scale2unit(cumsum((scrub(market_value[i])*scrub(tret[i])/100)))
})
),keyby=ticker]
dfm <- melt(
df,
id.vars=c("date","bucket","ticker"),
measure.vars=list(
what=c("market_value","close","tret"),
pos=rep("security_units",3)
),
variable.name="select"
)
now plot looks better:
and finally:
require(ggplot2)
require(grid)
require(dplyr)
require(lubridate)
securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")
ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])
scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))
df<-ptf[bucket=="DH_PAIR_42"][TRUE,.(
date=sort(date),
bucket=bucket,
security_units=scale2unit(scrub(security_units))[order(date)],
market_value=scale2unit(scrub(market_value))[order(date)],
close=rescale(close[order(date)],to=c(-1,1)),
tret=local({
i<-order(date)
scale2unit(cumsum((scrub(market_value[i])*scrub(tret[i])/100)))
})
),keyby=ticker]
dfm <- melt(
df,
id.vars=c("date","bucket","ticker"),
measure.vars=list(
what=c("market_value","close","tret"),
pos=rep("security_units",3)
),
variable.name="select"
)
g1 <- dfm %>% ggplot() +
ylim(-1.5, 1.5) +
geom_hline(aes(yintercept=0),size=0.25,color=rgb(0,0,0,0.5),show.legend=FALSE)+
geom_point(
mapping=aes(
x=date,
y=what,
group=interaction(ticker,select),
color=pos, #as.character(sign(position)),
size=1
),
show.legend=FALSE
) +
scale_colour_gradient2(
low = rgb(1,0,0,0.5),
mid = rgb(1,1,1,0.5),
high = rgb(0,1,0,0.5)
)+
geom_line(
mapping=aes(
x=date,
y=what,
group=interaction(ticker,select)
),
size=1,
color="black",
show.legend=FALSE
)+
scale_x_date(labels = date_format("%Y-%m-%d")) +
theme(
axis.text.x=element_text(angle=50,size=5,vjust=0.5),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.text.y = element_text(size = 6,angle=90)
)
g2 <- g1+facet_grid(
cols=vars(select),
rows=vars(ticker),
labeller=labeller(select=c("1"="market_value","2"="close","3"="tret"))
)