Closed nelsonic closed 5 years ago
our insert_org
function was:
function insert_org (data, callback) {
const fields = '(' + [ "url", "name", "description", "location",
"website", "email", "pcount", "uid"].join(',') + ')';
connect( function insert_org_after_connected () {
const { url,name,description,location,website,email,pcount,uid } = data;
// console.log(fields, name, username, company, uid, location);
const placeholders = '%L, %L, %L, %L, %L, %L, $p, $1'
let q = escape('INSERT INTO orgs %s VALUES (' + placeholders + ')',
fields, url,name,description,location,website,email);
// for some reason pg-escape does not play well with integers ...
// see: https://github.com/segmentio/pg-escape/issues/15
// so we are manually replacing the values:
q = q.replace('$1', parseInt(uid, 10));
q = q.replace('$p', parseInt(pcount, 10));
// console.log('L93: query:', q);
PG_CLIENT.query(q, function(error, result) {
utils.log_error(error, data, new Error().stack);
return insert_next_page (data, callback);
});
});
}
Simplified to:
function insert_org (data, callback) {
connect( function insert_org_after_connected () {
const { url,name,description,location,website,email,pcount,uid } = data;
const query = `INSERT INTO orgs
(url, name, description, location, website, email, pcount, uid)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`;
const values = [url,name,description,location,website,email,pcount,uid];
PG_CLIENT.query(query, values, function(error, result) {
utils.log_error(error, data, new Error().stack);
return insert_next_page (data, callback);
});
});
}
Cleaner and less "explaining" in the comments.
I'm not sure about creating the local variables based on the values in the data
Object ... 🤔
Is this better:
function insert_org (data, callback) {
connect( function insert_org_after_connected () {
const query = `INSERT INTO orgs
(url, name, description, location, website, email, pcount, uid)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`;
const values = [data.url, data.name, data.description, data.location,
data.website, data.email, data.pcount, data.uid];
PG_CLIENT.query(query, values, function(error, result) {
utils.log_error(error, data, new Error().stack);
return insert_next_page (data, callback);
});
});
}
What do you think...?
our insert_repo
function was:
function insert_repo (data, callback) {
const fields = '(' + [ "url", "description", "website", "tags", "langs",
"watchers", "stars", "forks", "commits"].join(',') + ')';
// console.log('db.js:L105: insert_repo > data', Object.keys(data).join(','));
connect( function insert_repo_after_connected () {
const { url, description, website, tags, langs, // string data
watchers, stars, forks, commits} = data; // int data
// console.log(fields, name, username, company, uid, location);
const placeholders = '%L, %L, %L, %L, %L, $w, $s, $f, $c'
let q = escape('INSERT INTO repos %s VALUES (' + placeholders + ')',
fields, url, description, website, tags, langs.join(','));
// for some reason pg-escape does not play well with integers ...
// see: https://github.com/segmentio/pg-escape/issues/15
// so we are manually replacing the values:
q = q.replace('$w', parseInt(watchers, 10))
.replace('$s', parseInt(stars, 10))
.replace('$f', parseInt(forks, 10))
.replace('$c', parseInt(commits, 10));
// console.log('L121: query:', q);
PG_CLIENT.query(q, function(error, result) {
utils.log_error(error, data, new Error().stack);
return insert_next_page (data, callback);
});
});
}
And is now much cleaner:
function insert_repo (data, callback) {
connect( function insert_repo_after_connected () {
const { url, description, website, tags, langs,
watchers, stars, forks, commits} = data;
const query = `INSERT INTO repos
(url, description, website, tags, langs, watchers, stars, forks, commits)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`;
const values = [url, description, website, tags, langs,
watchers, stars, forks, commits];
PG_CLIENT.query(query, values, function(error, result) {
utils.log_error(error, data, new Error().stack);
return insert_next_page (data, callback);
});
});
}
✅
Included in PR #51
Why?
It would be (highly) irresponsible of us to introduce people to SQL+PostgreSQL without covering SQL injections (SQLi) and how to defend/protect against them. According to the OWASP Top 10, "injection" in general and SQL injection specifically are still the number one source of security vulnerabilities in web apps.
An SQL injection can be devastating to a web application because it can result in anything from privilege escalation (where the attacker makes themself an "admin" of your app) to data theft and database or table deletion!
https://xkcd.com/327 - we can laugh at this comic because we haven't suffered an SQLi attack. 😥
What?
When inserting data (or running a parameterised query) in Postgres, we must sanitise (or "escape") the parameters even when we trust them!
(thankfully,
Ecto
sanitises all queries automatically+transparently, so when usingPhoenix
this is taken care of and we can all just get on with our building our App ... but in Node.js there's more work)How?
Historically we have used the
pg-escape
library to escape our queries. It worked (mostly) and we did not give it any more thought. But while building the example #51 we noted that "escape.literal throwsval.indexOf is not a function
fornumbers
andbooleans
" see: https://github.com/segmentio/pg-escape/issues/15 Which means that we cannot insert INT/BOOL into a Postgres db or select based on the an INT/BOOL parameter ... we've been using the following "work around":In our case the
person.id
is anINT
so we write:You'll notice that this does not actually do any escaping of the
id
parameter. The$id
variable just gets replaced in the query without any checking/sanitisation. This is obviously an undesirable situation, so it got me thinking: "How can we fix this?"The basic example above is just to illustrate the problem. The reality is much worse! e.g:
In the case of our
insert_person
function there are six INT values that have to be "replaced"... 🤦♂️As you can imagine, I'm really unhappy with this. (hence opening this issue to fix it!)
A bit of searching for "nodejs postgresql sanitize" landed me on the SO question: https://stackoverflow.com/questions/41455585/does-pg-node-postgres-automatically-sanitize-data
The accepted answer in his SO question/answer: "Does pg (node-postgres) automatically sanitize data?" is that
node-postgres
(recently renamedpg
) "offers no protection against SQL injection" ...Thankfully, @tuananh took the time to write a factually correct follow-up answer informing that the
pg
module does in fact offer SQL injection protection when using the second argument of thequery
method to supply an array of parameters.https://github.com/brianc/node-postgres/wiki/FAQ#8-does-node-postgres-handle-sql-injection
Anyway, long story short, we are able to re-write the hideous
insert_person
function which previously had 6replace
statements (see above) to become this:This is not "perfect" by any means but it's a lot better!
Todo:
insert
functions in the example #51 to usepg
built-in parameterisation.