satrapade / pairs

1 stars 1 forks source link

pair detail plot #32

Open nassuphis opened 6 years ago

nassuphis commented 6 years ago

image

nassuphis commented 6 years ago

to produce the above using ggplot2's facet_grid(), we need the postition information to be available for every (date,pair,stock,value) tuple.

if we have a long-format dataset, wei might convert it into wide as follows:

ext_ptf<-ptf[,
  c(
    list(stat=stat,value=value),
    data.table(matrix(value,nrow=1,dimnames=list(NULL,stat))[rep(1,length(stat)),])
  ),
  keyby=c("date","stock")
][order(date)]
nassuphis commented 6 years ago

and ptf is the result of melting 3 matrices: positions, pnl, tret:

all<-data.table(
  date=as.Date(rownames(positions),format="%Y-%m-%d"),
  e=apply(positions,2,function(x)rescale(x,from=c(-1,1)*max(abs(x)),c(-1,1))),
  p=apply(pnl,2,function(x)rescale(cumsum(x),from=c(-1,1)*max(abs(cumsum(x))),c(-1,1))),
  r=apply(tret,2,function(x)rescale(cumprod(1+x),c(-1,1)))
)

all_groups<-list(
  position=paste0("e.",colnames(positions)),
  pnl=paste0("p.",colnames(pnl)),
  price=paste0("r.",colnames(tret))
)

ptf<-melt(
  melt(data=all,id.vars="date",measure.vars=all_groups,variable.name = "stock"),
  id.vars=c("date","stock"),measure.vars=names(all_groups),variable.name = "stat"
)[,.(
  date=date,
  stock=colnames(positions)[as.integer(stock)],
  stat=stat,
  value=value
)]
nassuphis commented 6 years ago

data.table will concatenate matrices and prefix column names with names assignment in function call:

all<-data.table(
  date=as.Date(rownames(positions),format="%Y-%m-%d"),
  e=apply(positions,2,function(x)rescale(x,from=c(-1,1)*max(abs(x)),c(-1,1))),
  p=apply(pnl,2,function(x)rescale(cumsum(x),from=c(-1,1)*max(abs(cumsum(x))),c(-1,1))),
  r=apply(tret,2,function(x)rescale(cumprod(1+x),c(-1,1)))
)

all three matrices have the same column names, we get these prefixed with e, p, r in the above case.

note that re-scaling can only be done conveniently in matrix format. the reason for this is we can apply column-wise vector operations to scale numbers well.

nassuphis commented 6 years ago

Wide format: image

vs Long format: image

more here:

wide vs long format discussion

nassuphis commented 6 years ago

and melt will convert column names into factors:

ptf<-melt(
  melt(data=all,id.vars="date",measure.vars=all_groups,variable.name = "stock"),
  id.vars=c("date","stock"),measure.vars=names(all_groups),variable.name = "stat"
)[,.(
  date=date,
  stock=colnames(positions)[as.integer(stock)],
  stat=stat,
  value=value
)]
nassuphis commented 6 years ago

a better way would be to have 2 groups, one over all factors and another with a repetition of the common factor we want to see:

dt<-data.table(
  x=paste0("x",1:100),
  y=paste0("y",1:100),
  w=runif(100),
  v=runif(100),
  u=runif(100)
)

m1<-melt(dt,id.vars=c("x","y"),measure.vars=list(set1=c("w","v"),set2=c("u","u")))
nassuphis commented 6 years ago
> m1<-melt(dt,id.vars=c("x","y"),measure.vars=list(set1=c("w","v"),set2=c("u","u")))
> m1
        x    y variable       set1      set2
  1:   x1   y1        1 0.88648912 0.8678249
  2:   x2   y2        1 0.49355149 0.5756011
  3:   x3   y3        1 0.63612888 0.5217135
  4:   x4   y4        1 0.04769907 0.2821188
  5:   x5   y5        1 0.65018871 0.1497894
 ---                                        
196:  x96  y96        2 0.10684100 0.6873902
197:  x97  y97        2 0.67042024 0.2436080
198:  x98  y98        2 0.09169133 0.5508345
199:  x99  y99        2 0.51975673 0.3041294
200: x100 y100        2 0.18278881 0.1893799
nassuphis commented 6 years ago
> m1[x=="x1" & y=="y1"]
    x  y variable      set1      set2
1: x1 y1        1 0.8864891 0.8678249
2: x1 y1        2 0.6699675 0.8678249
nassuphis commented 6 years ago

for the portfolio data:

> names(ptf)
 [1] "date"           "bucket"         "security"       "security_units" "market_value"   "open"          
 [7] "close"          "high"           "low"            "tret"           "ticker"        

melt used to get stat and a common position across all:

m2<-melt(ptf,id.vars=c("date","bucket","ticker"),measure.vars=list(
  stat=c("market_value","close","tret"),
   pos=rep("security_units",3)
))
> m2
              date      bucket  ticker variable          stat     pos
     1: 2017-01-02 ABC_PAIR_03    SXNP        1 -5.046934e+05   -1355
     2: 2017-01-02 ABC_PAIR_06 CTEC LN        1 -7.644911e+05 -327125
     3: 2017-01-02 ABC_PAIR_07      MC        1 -1.107384e+06   -7200
     4: 2017-01-02 ABC_PAIR_07      RI        1  6.928331e+05    7800
     5: 2017-01-02  AC_PAIR_53    RDSA        1  1.008752e+07  452609
    ---                                                              
401627: 2018-05-01  MC_PAIR_53    ELTA        3  1.144600e-02  163672
401628: 2018-05-01  MC_PAIR_79    BDEV        3  1.182800e-02  -75000
401629: 2018-05-01  MC_PAIR_79     CSP        3  1.336300e-02  319391
401630: 2018-05-01  MC_PAIR_79     MCS        3 -2.216000e-03 -192001
401631: 2018-05-01  MC_PAIR_96     RWI        3  9.777000e-03  431406
nassuphis commented 6 years ago
securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")

ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])

scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))
nassuphis commented 6 years ago

and the plot would look like this:

g1 <- melt(
        ptf[bucket=="GJ_PAIR_19"][TRUE,.(
          date=date,
          bucket=bucket,
          security_units=security_units,
          market_value=scale2unit(market_value),
          close=scale2unit(close),
          tret=scale2unit(cumsum(market_value*tret))
        ),keyby=ticker],
        id.vars=c("date","bucket","ticker"),
        measure.vars=list(
          what=c("market_value","close","tret"),
          pos=rep("security_units",3)
        ),
        variable.name="select"
      ) %>% ggplot() +
      ylim(-1.5, 1.5) +
      geom_line(
        mapping=aes(
          x=date,
          y=what,
          group=interaction(ticker,select),
          color=ticker
        ),
        size=1,
        show.legend=FALSE
      )+
      scale_x_date(labels = date_format("%Y-%m-%d")) +
      theme(
        axis.text.x=element_text(angle=50,size=5,vjust=0.5),
        axis.ticks.y = element_blank(),    
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        strip.text.y = element_text(size = 6,angle=90)
      )+
      facet_grid(
        cols=vars(select),
        rows=vars(ticker),
        labeller=labeller(
          select=setNames(
            c("market_value","close","tret"),
            order(c("market_value","close","tret"))
          )
        )
      )
nassuphis commented 6 years ago

image

nassuphis commented 6 years ago

set to

securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")

ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])

scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))

df<-ptf[bucket=="GJ_PAIR_19"][TRUE,.(
  date=date,
  bucket=bucket,
  security_units=scrub(security_units),
  market_value=scale2unit(scrub(market_value)),
  close=scale2unit(close),
  tret=scale2unit(cumsum(scrub(market_value)*scrub(tret)))
),keyby=ticker]

dfm <- melt(
        df,
        id.vars=c("date","bucket","ticker"),
        measure.vars=list(
          what=c("market_value","close","tret"),
          pos=rep("security_units",3)
        ),
        variable.name="select"
)
nassuphis commented 6 years ago

this data: dfm.zip

and this code:

g1 <-  dfm %>% ggplot() +
      ylim(-1.5, 1.5) +
      geom_line(
        mapping=aes(
          x=date,
          y=what,
          group=interaction(ticker,select),
          color=ticker
        ),
        size=1,
        show.legend=FALSE
      )+
      scale_x_date(labels = date_format("%Y-%m-%d")) +
      theme(
        axis.text.x=element_text(angle=50,size=5,vjust=0.5),
        axis.ticks.y = element_blank(),    
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        strip.text.y = element_text(size = 6,angle=90)
      )+
      facet_grid(
        cols=vars(select),
        rows=vars(ticker),
        labeller=labeller(
          select=setNames(
            c("market_value","close","tret"),
            order(c("market_value","close","tret"))
          )
        )
      )

produces this plot: image

nassuphis commented 6 years ago

there are some issues with this: the DAX data looks wrong: image

nassuphis commented 6 years ago

the market value for DAX is positive on the plot but this is a a hedge, and hence short, so -ve market value.

nassuphis commented 6 years ago

the issue was "select" ids are in the sequence they were specified, so

g1+facet_grid(
        cols=vars(select),
        rows=vars(ticker),
        labeller=labeller(
          select=setNames(
            c("market_value","close","tret"),
            c("1","2","3")
          )
        )
      )
nassuphis commented 6 years ago

and ptf contents are not sorted by date, so cummulative sums dont work.

df<-ptf[bucket=="GJ_PAIR_19"][TRUE,.(
  date=sort(date),
  bucket=bucket,
  security_units=scrub(security_units)[order(date)],
  market_value=scale2unit(scrub(market_value))[order(date)],
  close=rescale(close[order(date)],to=c(-1,1)),
  tret=local({
    i<-order(date)
    scale2unit(cumsum((scrub(market_value[i])*scrub(tret[i])/100)))
  })
),keyby=ticker]

dfm <- melt(
        df,
        id.vars=c("date","bucket","ticker"),
        measure.vars=list(
          what=c("market_value","close","tret"),
          pos=rep("security_units",3)
        ),
        variable.name="select"
)
nassuphis commented 6 years ago

fixed data dfm.zip

nassuphis commented 6 years ago

now plot looks better:

image

nassuphis commented 6 years ago

and finally:

require(ggplot2)
require(grid)
require(dplyr)
require(lubridate)

securities<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/exposure_securities.csv")

ptf<-fread("N:/Depts/Share/UK Alpha Team/Analytics/db_cache/ptf.csv")[date>"2017-01-01"]
ptf$date<-as.Date(ptf$date,format="%Y-%m-%d")
i<-match(ptf$security,securities$exposure_security_external_id)
ptf$ticker<-gsub(" Equity","",securities$security_ticker[i])

scale2unit<-function(x)rescale(x,from=c(-1,1)*max(1,max(abs(scrub(x)))),to=c(-1,1))

df<-ptf[bucket=="DH_PAIR_42"][TRUE,.(
  date=sort(date),
  bucket=bucket,
  security_units=scale2unit(scrub(security_units))[order(date)],
  market_value=scale2unit(scrub(market_value))[order(date)],
  close=rescale(close[order(date)],to=c(-1,1)),
  tret=local({
    i<-order(date)
    scale2unit(cumsum((scrub(market_value[i])*scrub(tret[i])/100)))
  })
),keyby=ticker]

dfm <- melt(
        df,
        id.vars=c("date","bucket","ticker"),
        measure.vars=list(
          what=c("market_value","close","tret"),
          pos=rep("security_units",3)
        ),
        variable.name="select"
)

g1 <-  dfm %>% ggplot() +
      ylim(-1.5, 1.5) +
      geom_hline(aes(yintercept=0),size=0.25,color=rgb(0,0,0,0.5),show.legend=FALSE)+
      geom_point(
        mapping=aes(
          x=date,
          y=what,
          group=interaction(ticker,select),
          color=pos, #as.character(sign(position)),
          size=1
        ),
        show.legend=FALSE
      ) +  
      scale_colour_gradient2(
        low = rgb(1,0,0,0.5), 
        mid = rgb(1,1,1,0.5), 
        high = rgb(0,1,0,0.5)
      )+
      geom_line(
        mapping=aes(
          x=date,
          y=what,
          group=interaction(ticker,select)
        ),
        size=1,
        color="black",
        show.legend=FALSE
      )+
      scale_x_date(labels = date_format("%Y-%m-%d")) +
      theme(
        axis.text.x=element_text(angle=50,size=5,vjust=0.5),
        axis.ticks.y = element_blank(),    
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        strip.text.y = element_text(size = 6,angle=90)
      )

g2 <-   g1+facet_grid(
          cols=vars(select),
          rows=vars(ticker),
          labeller=labeller(select=c("1"="market_value","2"="close","3"="tret"))
        )
nassuphis commented 6 years ago

image