MaayanLab / enrichr_issues

5 stars 3 forks source link

Error running Speedrichr with background in R #64

Open ycl6 opened 1 year ago

ycl6 commented 1 year ago

Hi all, I am trying the R code shared by @lachmann12 in https://github.com/MaayanLab/enrichr_issues/issues/11#issuecomment-1520203975, but encounter HTTP Status 500 – Internal Server Error.

In the code below, I am using the same genelist, backgroundgenes and backgroundType inputs that are provided in the Enrichr API documentation. The code is using the same method, URL and parameters as that shown in the Enrichr's API documentation to make API requests. How can I make this work in R? Thanks.

    library(jsonlite)
    library(httr)

    baseurl <- "https://maayanlab.cloud/speedrichr/api/"

    # Upload gene set
    genelist <- c("PHF14","RBM3","MSL1","PHF21A","ARL10","INSR","JADE2","P2RX7","LINC00662","CCDC101",
                  "PPM1B","KANSL1L","CRYZL1","ANAPC16","TMCC1","CDH8","RBM11","CNPY2","HSPA1L","CUL2",
                  "PLBD2","LARP7","TECPR2","ZNF302","CUX1","MOB2","CYTH2","SEC22C","EIF4E3","ROBO2",
                  "ADAMTS9-AS2","CXXC1","LINC01314","ATF7","ATP5F1")
    description <- "Example gene list"

    ENRICHR_URL <- paste0(baseurl, "addList")
    payload <- list(list = genelist, description = description)
    response <- POST(ENRICHR_URL, body = payload)
    #> Warning in charToRaw(enc2utf8(val)): argument should be a character vector of length 1
    #> all but the first element will be ignored
    data <- fromJSON(content(response, as = "text"))
    print(toJSON(data, pretty = TRUE))
    #> {
    #>   "userListId": [76095504],
    #>   "shortId": ["4892010"]
    #> }

    # Upload background
    backgroundgenes <- c("NSUN3","POLRMT","NLRX1","SFXN5","ZC3H12C","SLC25A39","ARSG","DEFB29","PCMTD2",
                         "ACAA1A","LRRC1","2810432D09RIK","SEPHS2","SAC3D1","TMLHE","LOC623451","TSR2",
                         "PLEKHA7","GYS2","ARHGEF12","HIBCH","LYRM2","ZBTB44","ENTPD5","RAB11FIP2","LIPT1",
                         "INTU","ANXA13","KLF12","SAT2","GAL3ST2","VAMP8","FKBPL","AQP11","TRAP1","PMPCB",
                         "TM7SF3","RBM39","BRI3","KDR","ZFP748","NAP1L1","DHRS1","LRRC56","WDR20A","STXBP2",
                         "KLF1","UFC1","CCDC16","9230114K14RIK","RWDD3","2610528K11RIK","ACO1","CABLES1",
                         "LOC100047214","YARS2","LYPLA1","KALRN","GYK","ZFP787","ZFP655","RABEPK","ZFP650",
                         "4732466D17RIK","EXOSC4","WDR42A","GPHN","2610528J11RIK","1110003E01RIK","MDH1",
                         "1200014M14RIK","AW209491","MUT","1700123L14RIK","2610036D13RIK","PHF14","RBM3",
                         "MSL1","PHF21A","ARL10","INSR","JADE2","P2RX7","LINC00662","CCDC101","PPM1B",
                         "KANSL1L","CRYZL1","ANAPC16","TMCC1","CDH8","RBM11","CNPY2","HSPA1L","CUL2",
                         "PLBD2","LARP7","TECPR2","ZNF302","CUX1","MOB2","CYTH2","SEC22C","EIF4E3","ROBO2",
                         "ADAMTS9-AS2","CXXC1","LINC01314","ATF7","ATP5F1","COX15","TMEM30A","NSMCE4A",
                         "TM2D2","RHBDD3","ATXN2","NFS1","3110001I20RIK","BC038156","C330002I19RIK",
                         "ZFYVE20","POLI","TOMM70A","LOC100047782","2410012H22RIK","RILP","A230062G08RIK",
                         "PTTG1IP","RAB1","AFAP1L1","LYRM5","2310026E23RIK","SLC7A6OS","MAT2B",
                         "4932438A13RIK","LRRC8A","SMO","NUPL2")

    ENRICHR_URL <- paste0(baseurl, "addbackground")
    payload <- list(background = backgroundgenes)
    response <- POST(ENRICHR_URL, body = payload)
    #> Warning in charToRaw(enc2utf8(val)): argument should be a character vector of length 1
    #> all but the first element will be ignored
    data_back <- fromJSON(content(response, as = "text"))
    print(toJSON(data_back, pretty = TRUE))
    #> {
    #>   "backgroundid": ["4722cf4"]
    #> }

    # Check available libraries
    ENRICHR_URL <- paste0(baseurl, "listlibs")
    response <- GET(ENRICHR_URL)
    data_libs <- fromJSON(content(response, as = "text"))

    print(length(data_libs[["library"]]))
    #> [1] 222

    library_name <- "ChEA_2022"
    print("ChEA_2022" %in% data_libs[["library"]])
    #> [1] TRUE

    # Get enrichment results
    ENRICHR_URL <- paste0(baseurl, "backgroundenrich")
    payload <- list(
      userListId = data$userListId,
      backgroundid = data_back$backgroundid,
      backgroundType = library_name
    )
    payload
    #> $userListId
    #> [1] 76095504
    #> 
    #> $backgroundid
    #> [1] "4722cf4"
    #> 
    #> $backgroundType
    #> [1] "ChEA_2022"

    response <- POST(ENRICHR_URL, body = payload)
    response
    #> Response [https://maayanlab.cloud/speedrichr/api/backgroundenrich]
    #>   Date: 2023-06-11 13:42
    #>   Status: 500
    #>   Content-Type: text/html;charset=utf-8
    #>   Size: 841 B
    #> <!doctype html><html lang="en"><head><title>HTTP Status 500 – Internal Server...

    #data_ben <- fromJSON(content(response, as = "text"))
    #print(toJSON(data_ben, pretty = TRUE))

Created on 2023-06-11 with reprex v2.0.2

jeevangelista commented 1 year ago

Hi @ycl6, you need to concatenate your list:

payload <- list(list = paste(genelist, collapse="\n"), description = description)
 response <- POST(ENRICHR_URL, body = payload)

and

    payload <- list(background = paste(backgroundgenes, collapse="\n"))
     response <- POST(ENRICHR_URL, body = payload)

Let me know if this resolved your issue

ycl6 commented 1 year ago

Hi @jeevangelista

Thanks, for some reason I can't get addList and addbackground to work over the weekend when trying with the symbols concatenated, they always returns an error status code.

Now all three steps work, although in my test example, some of the lines in the results have Infinity, and numeric values such as Inf and NaN are not supported in JSON. So I replaced all the occurances by a character value.

I wonder if this is something you will take care of in the Enrichr backend when returning the results via the API (i.e. to return these types of values as character), or shall I make adjustment in my R code to convert them to character (like below) in order to use the fromJSON function to convert JSON object to R object? I suppose the downside of the first option is that regular users might not be aware of the wrong representation of Infinity in their R object if this is changed in Enrichr, but the incompatibility of special numeric values and JSON means substitution need to be done at users' end.

    library(jsonlite)
    library(httr)

    baseurl <- "https://maayanlab.cloud/speedrichr/api/"

    # Upload gene set
    genelist <- c("PHF14","RBM3","MSL1","PHF21A","ARL10","INSR","JADE2","P2RX7","LINC00662","CCDC101",
                  "PPM1B","KANSL1L","CRYZL1","ANAPC16","TMCC1","CDH8","RBM11","CNPY2","HSPA1L","CUL2",
                  "PLBD2","LARP7","TECPR2","ZNF302","CUX1","MOB2","CYTH2","SEC22C","EIF4E3","ROBO2",
                  "ADAMTS9-AS2","CXXC1","LINC01314","ATF7","ATP5F1")
    description <- "Example gene list"

    ENRICHR_URL <- paste0(baseurl, "addList")
    payload <- list(list = paste(genelist, collapse="\n"), description = description)
    response <- POST(ENRICHR_URL, body = payload)
    data <- fromJSON(content(response, as = "text"))
    print(toJSON(data, pretty = TRUE))
    #> {
    #>   "userListId": [667152768],
    #>   "shortId": ["27c3f180"]
    #> }

    # Upload background
    backgroundgenes <- c("NSUN3","POLRMT","NLRX1","SFXN5","ZC3H12C","SLC25A39","ARSG","DEFB29","PCMTD2",
                         "ACAA1A","LRRC1","2810432D09RIK","SEPHS2","SAC3D1","TMLHE","LOC623451","TSR2",
                         "PLEKHA7","GYS2","ARHGEF12","HIBCH","LYRM2","ZBTB44","ENTPD5","RAB11FIP2","LIPT1",
                         "INTU","ANXA13","KLF12","SAT2","GAL3ST2","VAMP8","FKBPL","AQP11","TRAP1","PMPCB",
                         "TM7SF3","RBM39","BRI3","KDR","ZFP748","NAP1L1","DHRS1","LRRC56","WDR20A","STXBP2",
                         "KLF1","UFC1","CCDC16","9230114K14RIK","RWDD3","2610528K11RIK","ACO1","CABLES1",
                         "LOC100047214","YARS2","LYPLA1","KALRN","GYK","ZFP787","ZFP655","RABEPK","ZFP650",
                         "4732466D17RIK","EXOSC4","WDR42A","GPHN","2610528J11RIK","1110003E01RIK","MDH1",
                         "1200014M14RIK","AW209491","MUT","1700123L14RIK","2610036D13RIK","PHF14","RBM3",
                         "MSL1","PHF21A","ARL10","INSR","JADE2","P2RX7","LINC00662","CCDC101","PPM1B",
                         "KANSL1L","CRYZL1","ANAPC16","TMCC1","CDH8","RBM11","CNPY2","HSPA1L","CUL2",
                         "PLBD2","LARP7","TECPR2","ZNF302","CUX1","MOB2","CYTH2","SEC22C","EIF4E3","ROBO2",
                         "ADAMTS9-AS2","CXXC1","LINC01314","ATF7","ATP5F1","COX15","TMEM30A","NSMCE4A",
                         "TM2D2","RHBDD3","ATXN2","NFS1","3110001I20RIK","BC038156","C330002I19RIK",
                         "ZFYVE20","POLI","TOMM70A","LOC100047782","2410012H22RIK","RILP","A230062G08RIK",
                         "PTTG1IP","RAB1","AFAP1L1","LYRM5","2310026E23RIK","SLC7A6OS","MAT2B",
                         "4932438A13RIK","LRRC8A","SMO","NUPL2")

    ENRICHR_URL <- paste0(baseurl, "addbackground")
    payload <- list(background = paste(backgroundgenes, collapse="\n"))
    response <- POST(ENRICHR_URL, body = payload)
    data_back <- fromJSON(content(response, as = "text"))
    print(toJSON(data_back, pretty = TRUE))
    #> {
    #>   "backgroundid": ["5e9d8561"]
    #> }

    # Check available libraries
    ENRICHR_URL <- paste0(baseurl, "listlibs")
    response <- GET(ENRICHR_URL)
    data_libs <- fromJSON(content(response, as = "text"))

    print(length(data_libs[["library"]]))
    #> [1] 222

    library_name <- "ChEA_2022"
    print("ChEA_2022" %in% data_libs[["library"]])
    #> [1] TRUE

    # Get enrichment results
    ENRICHR_URL <- paste0(baseurl, "backgroundenrich")
    payload <- list(
      userListId = data$userListId,
      backgroundid = data_back$backgroundid,
      backgroundType = library_name
    )
    payload
    #> $userListId
    #> [1] 667152768
    #> 
    #> $backgroundid
    #> [1] "5e9d8561"
    #> 
    #> $backgroundType
    #> [1] "ChEA_2022"

    response <- POST(ENRICHR_URL, body = payload)

    # Infinity and NaN are not permitted in JSON
    data_ben <- fromJSON(content(response, as = "text"))
    #> No encoding supplied: defaulting to UTF-8.
    #> Error: lexical error: invalid char in json text.
    #>           Mouse",0.015272801881952252, Infinity, Infinity, ["ROBO2","P
    #>                      (right here) ------^

    # Substitute Infinity with "Inf"
    data_ben <- fromJSON(gsub("Infinity,", "\"Inf\",", content(response, as = "text")))
    #> No encoding supplied: defaulting to UTF-8.

    # Hits 5 to 7 with Infinity value
    print(toJSON(data_ben[['ChEA_2022']][5:10], pretty = TRUE))
    #> [
    #>   [
    #>     [5],
    #>     ["GLI1 17442700 ChIP-ChIP MESCs Mouse"],
    #>     [0.0153],
    #>     ["Inf"],
    #>     ["Inf"],
    #>     ["ROBO2", "PHF21A", "CDH8"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ],
    #>   [
    #>     [6],
    #>     ["BRD4 28847988 ChIP-Seq BCBL1 Human Blood Lymphoma"],
    #>     [0.0153],
    #>     ["Inf"],
    #>     ["Inf"],
    #>     ["CUX1", "PHF21A", "MSL1"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ],
    #>   [
    #>     [7],
    #>     ["POU2F1 35413990 ChIP-Seq Human ARposCRProstateCancer"],
    #>     [0.0153],
    #>     ["Inf"],
    #>     ["Inf"],
    #>     ["ANAPC16", "CRYZL1", "KANSL1L"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ],
    #>   [
    #>     [8],
    #>     ["FOXH1 21741376 ChIP-Seq EPCs Human"],
    #>     [0.0172],
    #>     [5.1207],
    #>     [20.8084],
    #>     ["RBM3", "ARL10", "CUX1", "CXXC1", "TMCC1", "RBM11"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ],
    #>   [
    #>     [9],
    #>     ["SMC3 22415368 ChIP-Seq MEFs Mouse"],
    #>     [0.0251],
    #>     [5.5556],
    #>     [20.4665],
    #>     ["ROBO2", "EIF4E3", "ANAPC16", "TMCC1", "PLBD2"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ],
    #>   [
    #>     [10],
    #>     ["OCT1 27270436 Chip-Seq PROSTATE Human"],
    #>     [0.0251],
    #>     [5.5556],
    #>     [20.4665],
    #>     ["P2RX7", "HSPA1L", "TMCC1", "TECPR2", "SEC22C"],
    #>     [0.9317],
    #>     [0],
    #>     [0]
    #>   ]
    #> ]

Created on 2023-06-12 with reprex v2.0.2