gosling-lang / gosling.js

Grammar of Scalable Linked Interactive Nucleotide Graphics
https://gosling.js.org
MIT License
166 stars 28 forks source link

Issue with Annotation Positioning Using Beddb in Gosling.js #1092

Open vineetver opened 4 days ago

vineetver commented 4 days ago

Hi, I'm encountering an issue while creating visualizations in Gosling.js using BED files processed through HiGlass. Specifically, the positions of the annotations in the visualization appear to be incorrect and out of order.

Problem Details

In the attached screenshot, a cCRE annotation with the following details:

Actual location: chr17:52497387-52497724
Visualized location: chr19:44907556-44907893
is being incorrectly displayed under chr19 instead of its actual genomic position on chr17.
Screenshot 2024-11-21 at 2 54 54 PM

Here’s what I’ve done:

  1. Aggregated the BED file:

    I used clodius to aggregate the BED file into a beddb format using the following command:

    clodius aggregate bedfile --delimiter $'\t' --chromsizes-filename ../hg38.chrom.sizes example.bed

  2. Ingested the BEDDB into the HiGlass server:

    python manage.py ingest_tileset --filename example.beddb --datatype bedlike --coordSystem hg38 --uid example-track --filetype beddb

My Gosling Spec:

Click me ```javascript const spec = { "arrangement": "vertical", "responsiveSize": { "width": true }, "views": [ { "arrangement": "vertical", "views": [ { "xDomain": {"chromosome": "chr19", "interval": [44906822, 44908822]}, "centerRadius": 0.1, "layout": "linear", "spacing": 0, "alignment": "stack", "tracks": [ { "alignment": "overlay", "title": "HiGlass", "data": { "url": "https://server.gosling-lang.org/api/v1/tileset_info/?d=gene-annotation", "type": "beddb", "genomicFields": [ {"index": 1, "name": "start"}, {"index": 2, "name": "end"} ], "valueFields": [ {"index": 5, "name": "strand", "type": "nominal"}, {"index": 3, "name": "name", "type": "nominal"} ], "exonIntervalFields": [ {"index": 12, "name": "start"}, {"index": 13, "name": "end"} ] }, "tracks": [ { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["gene"]}, {"type": "filter", "field": "strand", "oneOf": ["+"]} ], "mark": "triangleRight", "x": {"field": "end", "type": "genomic", "axis": "top"}, "size": {"value": 15} }, { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["gene"]} ], "mark": "text", "text": {"field": "name", "type": "nominal"}, "x": {"field": "start", "type": "genomic"}, "xe": {"field": "end", "type": "genomic"}, "style": {"dy": -15} }, { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["gene"]}, {"type": "filter", "field": "strand", "oneOf": ["-"]} ], "mark": "triangleLeft", "x": {"field": "start", "type": "genomic"}, "size": {"value": 15}, "style": {"align": "right"} }, { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["exon"]} ], "mark": "rect", "x": {"field": "start", "type": "genomic"}, "size": {"value": 15}, "xe": {"field": "end", "type": "genomic"} }, { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["gene"]}, {"type": "filter", "field": "strand", "oneOf": ["+"]} ], "mark": "rule", "x": {"field": "start", "type": "genomic"}, "strokeWidth": {"value": 3}, "xe": {"field": "end", "type": "genomic"}, "style": {"linePattern": {"type": "triangleRight", "size": 5}} }, { "dataTransform": [ {"type": "filter", "field": "type", "oneOf": ["gene"]}, {"type": "filter", "field": "strand", "oneOf": ["-"]} ], "mark": "rule", "x": {"field": "start", "type": "genomic"}, "strokeWidth": {"value": 3}, "xe": {"field": "end", "type": "genomic"}, "style": {"linePattern": {"type": "triangleLeft", "size": 5}} } ], "row": {"field": "strand", "type": "nominal", "domain": ["+", "-"]}, "color": { "field": "strand", "type": "nominal", "domain": ["+", "-"], "range": ["#7585FF", "#FF8A85"] }, "visibility": [ { "operation": "less-than", "measure": "width", "threshold": "|xe-x|", "transitionPadding": 10, "target": "mark" } ], "opacity": {"value": 0.8}, "width": 350, "height": 100 }, { "alignment": "overlay", "title": "cCREs", "data": { "url": "https://higlass.genohub.org/api/v1/tileset_info/?d=ccre-updated-hg38", "type": "beddb", "genomicFields": [ { "index": 1, "name": "startpos" }, { "index": 2, "name": "endpos" } ], "valueFields": [ { "index": 1, "name": "start_position", "type": "nominal" }, { "index": 2, "name": "end_position", "type": "nominal" }, { "index": 0, "name": "chromosome", "type": "nominal" }, { "index": 3, "name": "elementId", "type": "nominal" }, { "index": 4, "name": "accession", "type": "nominal" }, { "index": 5, "name": "ccre", "type": "nominal" }, { "index": 6, "name": "ccre_full", "type": "nominal" } ] }, "dataTransform": [ { "type": "concat", "separator": "-", "newField": "region", "fields": ["chromosome", "start_position", "end_position"] } ], "tracks": [ { "mark": "point", "x": { "field": "startpos", "type": "genomic" }, "row": { "field": "ccre_full", "type": "nominal", "domain": [ "Promoter", "Proximal enhancer", "Distal enhancer", "Chromatin Accessible with CTCF", "Chromatin Accessible with H3K4me3", "Chromatin Accessible with TF", "Chromatin Accessible Only", "TF Only" ] }, "size": { "value": 8 }, "opacity": { "value": 0.8 } } ], "color": { "field": "ccre_full", "type": "nominal", "domain": [ "Promoter", "Proximal enhancer", "Distal enhancer", "Chromatin Accessible with CTCF", "Chromatin Accessible with H3K4me3", "Chromatin Accessible with TF", "Chromatin Accessible Only", "TF Only" ], "range": [ "red", "orange", "#ffbf00", "blue", "orange", "purple", "green", "pink" ], "legend": true }, "tooltip": [ { "field": "region", "type": "nominal", "alt": "Actual location" }, { "field": "startpos", "type": "genomic", "alt": "Start" }, { "field": "endpos", "type": "genomic", "alt": "End" }, { "field": "ccre_full", "type": "nominal", "alt": "cCRE Type" }, { "field": "accession", "type": "nominal", "alt": "Accession" }, { "field": "elementId", "type": "nominal", "alt": "Element ID" } ], "width": 900, "height": 180 } ] } ] } ] } ```

Questions

How does Gosling process BED files? Could there be an issue with how my BED file is processed or formatted?

Additional Steps for Accuracy: Are there any additional steps I should take (e.g., sorting, indexing) to ensure accurate visualization?

Reference Example: In the Semantic Lollipop Example, Gosling visualizes ClinVar data using .beddb.

Could you share: The process used to create the .beddb file for the example. Any configuration details specific to BED file processing.

Thank you for your guidance!

sehilyi commented 2 hours ago

Hi @vineetver, apologize for the delayed response.

Taking a quick look, I think unsorted chromosomes are likely the source of the issue. In the tileset info you shared (under chrom_names in https://higlass.genohub.org/api/v1/tileset_info/?d=ccre-updated-hg38), I see chrX appears before chr8.

Gosling attaches chromosomes end-to-end and calculates the absolute start position of each chromoscope by concatenating the lengths of previous chromosomes (e.g., the start position of chr2 is the length of chr1). I suggest double checking the order of chromosome names in ../hg38.chrom.sizes and example.bed.