prabushitha / gremlin-visualizer

visualize a graph network corresponding to a gremlin query
MIT License
233 stars 72 forks source link

Cosmos DB Graph proxy server #10

Open svdoever opened 4 years ago

svdoever commented 4 years ago

Hi @prabushitha, thanks for the great work on the visualizer. I had some issues with Microsoft Cosmos DB, because of the executed query ${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold()).

It contains Gremlin constructs not supported by Cthe Cosmos DB Graph implementation of Gremlin: by(__.valueMap().by(__.unfold()).

I created a version that works correctly for Cosmos DB Graph where I did split the query into three different queries to get it working correctly. I also added an additional endpoint in the service to request the incoming and outgoing edges. Because Cosmos B Graph contains an authenticated endpoint I configure the proxy server with an included config.js file:

config.js:

var config = {}

config.endpoint = "wss://<yourservername>.gremlin.cosmosdb.azure.com:443/gremlin";
config.primaryKey = "<your primary key as can be found under keys in Azure>" 
config.database = "<name of the database>";
config.collection = "<name of the collection>";
config.port = 3001;

module.exports = config;

proxy-server.js:

const express = require('express');
const bodyParser = require('body-parser');
const Gremlin = require('gremlin');
const cors = require('cors');
const app = express();
const fs = require('fs');
const path = require('path');
const config = require('./config');

app.use(cors({
    credentials: true,
}));

// parse application/json
app.use(bodyParser.json());

// Each property has as value an array with length 1 - take first element
function mapVertexPropertiesToObj(propInObj) {
    let propOutObj = {};
    Object.keys(propInObj).forEach(k => propOutObj[k] = propInObj[k][0]);
    return propOutObj;
}

function edgesToVisualizationStructure(edges) {
    if (!!edges) {
        return edges.map(
            edge => ({
                id: typeof edge.id !== "string" ? JSON.stringify(edge.id) : edge.id,
                from: edge.from,
                to: edge.to,
                label: edge.label,
                properties: edge.properties,
            })
        );
    } else {
        return [];
    }
}

function nodesToVisualizationStructure(nodeList) {
    return nodeList.map(
        node => ({
            id: node.id,
            label: node.label,
            properties: mapVertexPropertiesToObj(node.properties),
            edges: edgesToVisualizationStructure(node.edges)
        })
    );
}

function makeSelfQuery(query) {
    const theQuery = `${query}.
        as('node').
        project('id', 'label', 'properties').
        by(__.id()).
        by(__.label()).
        by(__.valueMap())
    `;
    return theQuery;
}

function makeInQuery(query, nodeLimit) {
    // original query: `${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold())`;
    const nodeLimitQuery = !isNaN(nodeLimit) && Number(nodeLimit) > 0 ? `.limit(${nodeLimit})` : '';
    const theQuery = `${query}${nodeLimitQuery}.
        dedup().
        as('node').
        project('id', 'label', 'properties', 'edges').
        by(__.id()).
        by(__.label()).
        by(__.valueMap()).
        by(__.outE().as('outEdge').
            project('id', 'from', 'to', 'label', 'properties').
            by(__.id()).
            by(select('node').id()).
            by(__.inV().id()).
            by(__.label()).
            by(__.valueMap()).
            fold()
        )
    `; 
    // coalesce(select('outEdge').inV().count().is(gt(0)).select('outEdge').inV().id(), constant("NO_TO_VERTEX"))
    return theQuery;
}

function makeOutQuery(query, nodeLimit) {
    // original query: `${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold())`;
    const nodeLimitQuery = !isNaN(nodeLimit) && Number(nodeLimit) > 0 ? `.limit(${nodeLimit})` : '';
    const theQuery = `${query}${nodeLimitQuery}.
        dedup().
        as('node').
        project('id', 'label', 'properties', 'edges').
        by(__.id()).
        by(__.label()).
        by(__.valueMap()).
        by(__.inE().
            project('id', 'from', 'to', 'label', 'properties').
            by(__.id()).
            by(__.outV().id()).
            by(select('node').id()).
            by(__.label()).
            by(__.valueMap()).
            fold()
        )
    `;
    return theQuery;
}

async function executeQuery(query) {
    const authenticator = new Gremlin.driver.auth.PlainTextSaslAuthenticator(`/dbs/${config.database}/colls/${config.collection}`, config.primaryKey)

    const client = new Gremlin.driver.Client(
        config.endpoint, 
        { 
            authenticator,
            traversalsource : "g",
            rejectUnauthorized : true,
            mimeType : "application/vnd.gremlin-v2.0+json"
        }
    );

    console.log(query);
    try {
        const result = await client.submit(query, {})
        console.log(JSON.stringify(result, null, 2));
        return result;
    }
    catch(err) {
        console.error(err);
        return null;   
    }
}

app.post('/query', async (req, res, next) => {
    const nodeLimit = req.body.nodeLimit;
    let query = "" + req.body.query;
    let visualizationNodesAndEdges = [];

    // Support for sample files to show possible  
    if (query.startsWith("sample:")) {
        try {
            const sample = query.split(":")[1];
            visualizationNodesAndEdges = JSON.parse(fs.readFileSync(path.join(__dirname, "samples", `${sample}.json`), 'utf8'));
        }
        catch(err) {
            console.error(err);
        }
    } else {
        let theQuery;
        if(query.endsWith(".out()")) {
            theQuery = makeOutQuery(query, nodeLimit);
        } else if (query.endsWith(".in()")) {
            theQuery = makeInQuery(query, nodeLimit);
        } else {
            theQuery = makeSelfQuery(query);
        }

        const result = await executeQuery(theQuery);
        if (result !== null) {
            visualizationNodesAndEdges = nodesToVisualizationStructure(result._items);
        }
    }

    const visualizationNodesAndEdgesPrettyfiedJSon = JSON.stringify(visualizationNodesAndEdges, null, 2);
    console.log(visualizationNodesAndEdgesPrettyfiedJSon);
    res.send(visualizationNodesAndEdgesPrettyfiedJSon);
});

app.get('/edgecount/:nodeId', async (req, res, next) => {
    const nodeId = req.params.nodeId;
    let query = `g.V("${nodeId}").project("inEdgesCount", "outEdgesCount").by(__.inE().count()).by(__.outE().count())`;
    const result = await executeQuery(query); // result._items in format: [ { "inEdgesCount": 2, "outEdgesCount": 0 } ]
    let countInfo;
    if (result === null || result._items.length === 0) {
        countInfo = { 'inEdgesCount': -1, 'outEdgesCount': -1}; // error - node does not exist?
    } else {
        countInfo = result._items[0];
    }
    res.send(JSON.stringify(countInfo, null, 2));
});

app.listen(config.port, () => console.log(`Simple Gremlin proxy-server listening on port ${config.port}!`));

I also added support for sample graph visualization files without using a Gremlin server, and no support for drilling through the graph. Create next to the proxy-server.js file a folder samples, and add for example the following file:

modern.json: (patterned after the graph created with TinkerFactory.createModern() as described in the Tinkerpop Reference Documentation

[
    {
        "id": "1",
        "label": "person",
        "properties": {
            "name": "marko",
            "age": 29
        },
        "edges": [
            {
                "id": "7",
                "from": "1",
                "to": "2",
                "label": "knows",
                "properties": {
                    "weight": 0.5
                }
            },
            {
                "id": "8",
                "from": "1",
                "to": "4",
                "label": "knows",
                "properties": {
                    "weight": 1.0
                }
            },
            {
                "id": "9",
                "from": "1",
                "to": "3",
                "label": "created",
                "properties": {
                    "weight": 0.4
                }
            }
        ]
    },
    {
        "id": "2",
        "label": "person",
        "properties": {
            "name": "vadas",
            "age": 27
        }
    },
    {
        "id": "3",
        "label": "software",
        "properties": {
            "name": "lop",
            "lang": "java"
        }
    },
    {
        "id": "4",
        "label": "person",
        "properties": {
            "name": "josh",
            "age": 32
        },
        "edges": [
            {
                "id": "11",
                "from": "4",
                "to": "3",
                "label": "created",
                "properties": {
                    "weight": 0.4
                }
            },
            {
                "id": "10",
                "from": "4",
                "to": "5",
                "label": "created",
                "properties": {
                    "weight": 1.0
                }
            }
        ]
    },
    {
        "id": "5",
        "label": "software",
        "properties": {
            "name": "ripple",
            "lang": "java"
        }
    },
    {
        "id": "6",
        "label": "person",
        "properties": {
            "name": "peter",
            "age": 35
        },
        "edges": [
            {
                "id": "12",
                "from": "6",
                "to": "3",
                "label": "created",
                "properties": {
                    "weight": 0.2
                }
            }
        ]
    }
]

If instead of a query the following string is specified sample:modern the sample JSON is read and returned.

dzmitry-lahoda commented 4 years ago

Was able to run stuff. But what what is format of json? It is not graphson.

svdoever commented 4 years ago

It is the format sent by the proxy-server.js to the front-end:-) I did some additional improvements on the proxy-server.js so other queries are handled correctly as well. I will post an updated version soon.

Did you run it against Cosmos DB, or did it work for other Gremlin servers as well?

LoungeFlyZ commented 4 years ago

@svdoever did you complete this? Is your fork available for me to play around with? Im playing with cosmos a bit and would like to try this out.