Interactive Visualization scRNAseq Data using D3.js

In this blog, I will introduce how to use d3.js to perform interactive visualization of single cell RNA sequencing data

About d3.js

D3.js is a JavaScript library for manipulating documents based on data. D3 helps you bring data to life using HTML, SVG, and CSS. D3’s emphasis on web standards gives you the full capabilities of modern browsers without tying yourself to a proprietary framework, combining powerful visualization components and a data-driven approach to DOM manipulation.

Download the latest version (6.7.0) here:

To link directly to the latest release, copy this snippet:

<script src="https://d3js.org/d3.v6.min.js"></script>

D3.js is able to visualize different types of data. Here is a gallery of visualizations using D3.

D3 start up

Drawing a graph using D3 is simple. In your HTML file, you can specify a div container to plot the chart.

 <div id="tsne">

Then, we specify the parameters for the D3 chart

var margin = {
top: 10,
right: 30,
bottom: 30,
left: 60
}, 
width = 400;
height = 400;

The d3 can bind to the div container and plot the data in the container as follows:

svg = d3.select("#tsne")
.append("svg")
.attr("viewBox", [0, 0, width, height + margin.top + margin.bottom])
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
g = svg.append("g")
.attr("fill", "none")
.attr("stroke-linecap", "round");

Visualizing the t-SNE data

The t-SNE data can be prepared in the CSV format. The first column should be the barcode of the single cell. The second column should be the position of the x-coordinates, and the third column should be the position of the y-coordinates. Other attributes can be placed after the third column.

We adopt a CSV2Array function to convert the data CSV to Javascript array.

// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray(strData, strDelimiter) {
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
    (
        // Delimiters.
        "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
        // Quoted fields.
        "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
        // Standard fields.
        "([^\"\\" + strDelimiter + "\\r\\n]*))"
    ),
    "gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [
    []
];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec(strData)) {
    // Get the delimiter that was found.
    var strMatchedDelimiter = arrMatches[1];
    // Check to see if the given delimiter has a length
    // (is not the start of string) and if it matches
    // field delimiter. If id does not, then we know
    // that this delimiter is a row delimiter.
    if (
        strMatchedDelimiter.length &&
        strMatchedDelimiter !== strDelimiter
    ) {
        // Since we have reached a new row of data,
        // add an empty row to our data array.
        arrData.push([]);
    }
    var strMatchedValue;
    // Now that we have our delimiter out of the way,
    // let's check to see which kind of value we
    // captured (quoted or unquoted).
    if (arrMatches[2]) {
        // We found a quoted value. When we capture
        // this value, unescape any double quotes.
        strMatchedValue = arrMatches[2].replace(
            new RegExp("\"\"", "g"),
            "\""
        );
    } else {
        // We found a non-quoted value.
        strMatchedValue = arrMatches[3];
    }
    // Now that we have our value string, let's add
    // it to the data array.
    arrData[arrData.length - 1].push(strMatchedValue);
}
// Return the parsed data.
return (arrData);
}

I have provided an sample data and you can use XMLHttpRequest in your JavaScript to fetch the data.

var data;
var xhr = new XMLHttpRequest();
xhr.open("GET", "https://raw.githubusercontent.com/Beiusxzw/Beiusxzw.github.io/master/src/tSNE.csv", true);
// xhr.setRequestHeader('Content-Type', 'application/json');

xhr.send()
xhr.onload = function() {
// console.log(this.responseText)
data = CSVToArray(this.responseText);
}

Now we can plot the t-SNE data. The color of each scatter can be determined by the cell type, which is stored in the third column of the sample data.

// Include <script src="https://d3js.org/d3-scale-chromatic.v1.min.js"></script> in your code!
cell_type_scale_ordinal = d3.scaleOrdinal().domain(data.map(d => parseInt(d[3]))).range(d3.schemeCategory10);

If you want to plot the expression level of genes, you may try

z = d3.scaleSequential().domain([0, 5]).range(["white", "blue"]);

We also need to calculate the maximum coordinates, and build the axis for the chart:

var tsne_x = data.map(d => parseFloat(d[1]));
var tsne_y = data.map(d => parseFloat(d[2]));
var tsne_x_max = 0;
var tsne_y_max = 0;
tsne_x.map(function(d, i) {
if (Math.abs(d) > tsne_x_max) tsne_x_max = Math.abs(d);
})
console.log(tsne_x_max)
tsne_y.map(function(d, i) {
if (Math.abs(d) > tsne_y_max) tsne_y_max = Math.abs(d);
})
// Add X axis
x = d3.scaleLinear()
.domain([-tsne_x_max * 1.2, tsne_x_max * 1.2])
.range([0, width]);
y = d3.scaleLinear()
.domain([-tsne_y_max * 1.2, tsne_y_max * 1.2])
.range([height, 0]);

where x and y is scale transformers for the chart.

Then, we bind the data to the svg element:

g.selectAll("path")
.data(data)
.join("path")
.attr("d", d => `M${x(parseFloat(d[1]))},${y(parseFloat(d[2]))}h0`)
.attr("stroke", d => z(parseFloat(d[d.length - 1]))) // expression. or,
.attr("stroke", d => cell_type_scale_ordinal(d[3])   // cell type.
.attr("stroke-width", function(d) { return 4; });

 

The above code will result in:

Color by cell type

Color by gene expression

A complete HTML file for the above example is available in here

Adding interactive modules

Single cell on hover

We can define two functions, mouseover and mouseleave, to enable visualization of single cell data when hovering the circle in the t-SNE plot.

g.selectAll("path").on('mouseover', function(d, i) {
d3.select(this).transition()
.duration('100')
.attr("stroke-width", 9);
mouseover(d, i);
})
.on('mouseout', function(d, i) {
d3.select(this).transition()
.duration('200')
.attr("stroke-width", 4);
mouseleave(d, i);
});

For example, you can add a new div container in your HTML file as a tooltip

<style>
div.tooltip {	
position: absolute;			
text-align: center;			
width: max-content;		
height: min-content;					
padding: 2px;				
font: 12px sans-serif;		
background: lightsteelblue;	
border: 0px;		
border-radius: 8px;			
pointer-events: none;			
}
</style> 
<div class="tooltip" id="tooltip">

and bind to that container using d3

tooltip = d3.select("#tooltip")
.append("div")
.style("opacity", 1)
.attr("class", "tooltip")
.attr("class", "card")
.attr("class", "row")
.style("background-color", "rgba(0, 0, 0, 0.0)")
.style("padding", "10px")

You can define custom mouseover and mouseleave function to enable interactive visualization

g.selectAll("path")        
.on("mouseover", function(d,i) {	
d3.select(this).attr("stroke-width", 6)
div.transition()		
  .duration(200)		
  .style("opacity", .9)
div.html(i[0])	
  .style("left", (d.clientX) + "px")		
  .style("top", (d.clientY) + "px");	
})					
.on("mouseout", function(d,i) {	
d3.select(this).attr("stroke-width", 4)	
div.transition()		
  .duration(500)		
  .style("opacity", 0);	
});

 

Place your mouse on the scatter point to get information of the single cell.

Zooming

We may implement two kinds of zooming in the scatterplot. The first is an automated zooming by clusters in the scatterplot. We will define a zoom function and an update function to enable zooming in d3.

function zoomed(event) {
const { transform } = event;
g.attr("transform", transform).attr("stroke-width", 7 / transform.k);
gx.call(xAxis, transform.rescaleX(x));
gy.call(yAxis, transform.rescaleY(y));
}

const zoom = d3.zoom()
.on("zoom", zoomed);

chart = Object.assign(svg.node(), {
update(transform) {
svg.transition()
    .duration(1500)
    .call(zoom.transform, transform);
}
});

update = function(transform) {
chart.update(transform)
}

group_transforms = d3.groups(data, d => parseInt(d[2])).map(([key, data]) => {
const [x0, x1] = d3.extent(data, d => parseFloat(d[0])).map(x);
const [y1, y0] = d3.extent(data, d => parseFloat(d[1])).map(y);
var k;

if (Math.abs(x1-x0) < 10 || Math.abs(y1-y0) < 10) {
k =  0.1 * Math.min(width / (x1 - x0), height / (y1 - y0));
} else {
k = 0.5 * Math.min(width / (x1 - x0), height / (y1 - y0));
}
const tx = (width - k * (x0 + x1)) / 2;
const ty = (height - k * (y0 + y1)) / 2;
return { key: key, value: d3.zoomIdentity.translate(tx, ty).scale(k) };
})

group_transforms = group_transforms.sort((a, b) => a.key - b.key)

transforms = [
["Overview", d3.zoomIdentity]
].concat(group_transforms.map(d => d.value))
And finally we can call chart.update(transforms[i]); to enable zooming.

Violin plot using d3

We can also plot the violin plot using d3.js. The input data is called hist in the following sample code. hist is an array storing the width of violin by expression value, which can be calculated by previous data


hist = Array();
Array.from(d3.rollup(data, v => Array.from(v, d => parseFloat(d[d.length - 1])), d => d[3]).entries()).map(
function(d) {
    var bin = d3.bin();
    d[1] = d[1].filter(d => d > 0)
    var bin_value = bin(d[1]);
    bin_value.cluster_id = cluster_map_reverse[d[0]];
    hist.push({ "key": d[0], "value": bin_value })
}
)

var violin = d3.select("#my-violin")
.append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", (height + margin.top + margin.bottom))
.style('margin-left', margin.left)
.append("g")
.attr("transform",
"translate(" + margin.left + "," + margin.top + ")");

violin
.selectAll("violin")
.data(hist)
.enter() // So now we are working group per group
.append("g")
.attr("transform", function(d) { return ("translate(" + box_x(d.key) + " ,0)") }) // Translation on the right to be at the group position
.append("path")
.datum(function(d) { return (d.value) }) // So now we are working bin per bin
.style("stroke", "black")
.style("stroke-width", "0.5")
.style("fill", function(d) { return cell_type_scale_ordinal(cluster_map[d.cluster_id]) })
.style("opacity", "0.5")
.attr("d", d3.area()
.x0(function(d) { return (xNum(-d.length * 2)) })
.x1(function(d) { return (xNum(d.length * 2)) })
.y(function(d) { return (box_y(d.x0)) })
.curve(d3.curveCatmullRom) // This makes the line smoother to give the violin appearance. Try d3.curveStep to see the difference
)

Which gives a violin plot in d3.

012345Naive CD4 T cellsNaive B cellsTh1 cellsExhausted B cellsSwitched memory B cellsNon-switched memory B cellsFollicular helper T cellsMAIT cellsEffector memory CD8 T cellsTh17 cellsTh1/Th17 cellsVd2 gd T cellsCentral memory CD8 T cellsNon-Vd2 gd T cellsTh2 cellsTerminal effector CD4 T cellsT regulatory cellsPlasmablastsTerminal effector CD8 T cells