Open jbrzusto opened 5 years ago
in (%s)
in a safeSQL query template and make sure the call has .QUOTE=FALSE)
or at least a DBI::SQL(...)
wrapping the
string parameter.batchRuns
len
, tsEnd
, done
for records in table runs
runID
for records in table hits
code fixes:
grep -F -n 'in (%s)' R/*.R | grep -v sprintf
R/dataServer.R:885: t1.projectID in (%s)
R/dataServer.R:886: and t1.tagID in (%s)
R/dataServer.R:911: t1.tagID in (%s)
R/dataServer.R:926: t1.id in (%s)
R/dataServer.R:1042: t1.projectID in (%s)
R/dataServer.R:1043: and t1.deviceID in (%s)
R/dataServer.R:1065: t1.projectID in (%s)
R/dataServer.R:1066: and t1.deviceID in (%s)
R/dataServer.R:1123: t1.ambigID in (%s)
R/deleteFromMotus.R:67: sql("update motusTX set batchID=-batchID where batchID in (%s) and not batchID in (select -batchID from motusTX where batchID < 0)", paste(bn, collapse=","))
R/deleteTestBatches.R:36: bid = MotusDB("select batchID from batches where status=-1 and batchID in (%s)", SQL(paste(batchID, collapse=",")))[[1]]
R/deleteTestBatches.R:41: MotusDB("delete from batches where batchID in (%s)", bids)
R/deleteTestBatches.R:42: MotusDB("delete from projBatch where batchID in (%s)", bids)
R/deleteTestBatches.R:43: MotusDB("delete from gps where batchID in (%s)", bids)
R/deleteTestBatches.R:45: MotusDB("create temporary table _runs_from_test_batches as select runID from runs where batchIDbegin in (%s)", bids)
R/deleteTestBatches.R:59: MotusDB("delete from runs where batchIDbegin in (%s)", bids)
R/deleteTestBatches.R:60: MotusDB("delete from batchProgs where batchID in (%s)", bids)
R/deleteTestBatches.R:61: MotusDB("delete from batchParams where batchID in (%s)", bids)
R/deleteTestBatches.R:62: MotusDB("delete from pulseCounts where batchID in (%s)", bids)
R/deleteTestBatches.R:63: MotusDB("delete from reprocessBatches where batchID in (%s)", bids)
R/deleteTestBatches.R:64: MotusDB("delete from reprocessBatches where batchID in (%s)", SQL(paste(- bid, collapse=","))) ## negated batchIDs might also be in reprocessBatches
R/dropBatchesFromTransfer.R:16: tx = MotusDB("select batchID, status from batches where batchID in (%s)", bid)
R/dropBatchesFromTransfer.R:38: runIDs = MotusDB("select distinct runID from hits where batchID in (%s) order by runID", bid)[[1]]
R/dropBatchesFromTransfer.R:41: runBatchCount = MotusDB("select runID, batchID, count(*) as n from hits where runID in (%s) group by runID, batchID order by runID, batchID",
R/dropBatchesFromTransfer.R:70: MotusDB("delete from %s where batchID in (%s)", t, bid)
R/motusAuthenticateUser.R:53: projectID1 in (%s)
R/motusAuthenticateUser.R:54: or projectID2 in (%s)
R/motusAuthenticateUser.R:55: or projectID3 in (%s)
R/motusAuthenticateUser.R:56: or projectID4 in (%s)
R/motusAuthenticateUser.R:57: or projectID5 in (%s)
R/motusAuthenticateUser.R:58: or projectID6 in (%s)
R/parseFilenames.R:57: rules = MetaDB("select * from serno_collision_rules where serno in (%s) order by id", paste0("'", rv$serno, "'", collapse=","), .QUOTE=FALSE)
R/purgeFromMotus.R:33: MotusDB("delete from %s where batchID in (%s)", t, bdrop)
R/purgeFromMotus.R:35: MotusDB("delete from runs where batchIDbegin in (%s)", bdrop)
R/purgeFromMotus.R:36: MotusDB("delete from batches where batchID in (%s)", bdrop)
sprintf
, so correct.DBI::SQL()
, so correct..QUOTE=FALSE
, so correct.QUOTE=FALSE
, so correct.DB fix files were generated by this code:
library(motusServer)
openMotusDB()
getMotusMetaDB()
hitsUpdate = file("./hits_update.txt", "w")
cat("CREATE TABLE hitFixes (hitID DOUBLE PRIMARY KEY, runID DOUBLE);\n", file=hitsUpdate)
runsUpdate = file("./runs_update.txt", "w")
cat("CREATE TABLE runFixes (runID DOUBLE PRIMARY KEY, tsEnd DOUBLE, done INTEGER, len INTEGER);\n", file=runsUpdate)
batchRunsUpdate = file("./batchRuns_update.txt", "w")
cat("CREATE TABLE batchRunFixes (batchID DOUBLE, runID DOUBLE);\n", file=batchRunsUpdate)
## get full record for these hits
h = MotusDB("select t1.*, t3.motusDeviceID from hits as t1 join hits_with_bad_runIDs as t2 on t1.hitID=t2.hitID join batches as t3 on t3.batchID=t1.batchID order by t1.hitID")
## grouping by receiver, we lookup the runIDs
devs = unique(h$motusDeviceID)
for (idev in seq(along=devs)) {
i = which(h$motusDeviceID == devs[idev])
serno = MetaDB("select distinct serno from recvDeps where deviceID=%d", h$motusDeviceID[i[1]])[[1]]
sql = safeSQL(getRecvSrc(serno))
sql("create temporary table fixhitIDs (hitID double, runID double, batchID integer)")
dbWriteTable(sql$con, "fixhitIDs", h[i, c("hitID", "runID", "batchID")], append=TRUE)
fixes = sql("select t1.batchID, t1.hitID, t2.runID + t3.offsetRunID as runID, t2.len as len, t2.tsEnd as tsEnd, t2.done as done from fixhitIDs as t1 join motusTX as t4 on t1.batchID=t4.batchID+t4.offsetBatchID join runs as t2 on t1.runID=t2.runID join motusTX as t3 on t3.batchID=t2.batchIDbegin")
## we might not have any rows, e.g. if the original batch was deleted from the receiver!
if (nrow(fixes) > 0) {
## output hits update
cat(paste0("insert into hitFixes values ", paste0("(", fixes$hitID, ",", fixes$runID, ")", collapse=","), ";\n"), file=hitsUpdate)
## output runs update
rf = fixes[!duplicated(fixes$runID),]
cat(paste0("insert into runFixes values ", paste0("(", rf$runID, ",", rf$tsEnd, ",", rf$done, ",", rf$len, ")", collapse=","), ";\n"), file=runsUpdate)
## output batchRuns update
cat(paste0("insert into batchRunFixes values ", paste0("(", rf$batchID, ",", rf$runID, ")", collapse=","), ";\n"), file=batchRunsUpdate)
}
}
## output update queries in de-activated form:
cat("-- the update query\n-- UPDATE hits AS t1 JOIN hitFixes AS t2 ON t1.hitID=t2.hitID SET t1.runID=t2.runID;", file=hitsUpdate)
cat("-- the update query\n-- UPDATE runs AS t1 JOIN runFixes as t2 on t1.runID=t2.runID SET t1.tsEnd=t2.tsEnd, t1.done=t2.done, t1.len=t2.len;", file=runsUpdate)
cat("-- the update query\n-- INSERT OR IGNORE INTO batchRuns select t1.batchID, t1.runID, t2.tagDepProjectID from batchRunFixes as t1 join runs as t2 on t1.runID=t2.runID;", file=batchRunsUpdate)
close(hitsUpdate)
close(runsUpdate)
close(batchRunsUpdate)
It turns out almost all of the problematic hits are from obsolete batches (which have generally been deleted from the receiver databases, even if they are still present in the master). These obsolete batches corresponded to running portions of the receiver files, and were effectively replaced by rerunning receiver files in toto.
We're down to 575 hits
, 31 runs
, and 31 batchRuns
records.
Here are the three update files as SQL queries. The final query in each, which updates the original database tables, is commented out.
batchRuns_update.txt
hits_update.txt
runs_update.txt
(the syntax for the batchRuns INSERT OR IGNORE
statement is for sqlite; for mysql it needs to be INSERT IGNORE
and I don't know what for SQL Server)
Do you have a suggestion about how we can safely identify obsolete (or re-run) batches?
We have been trying to do this on the basis of deviceID’s, boot numbers and timestamps, but that leaves a number of uncertain cases (changes in ts among batches for the same boot number, for instance).
I'm going ahead and applying the fix above to the sgdata master database.
Re: batch obsolescence - large topic already under a different issue #407
For runs continued from a previous batch, all but the first of them were not being recorded in the
batchRuns
table for the current batch. This was due to a mis-quoted query like:where the '%s' format specifier was being replaced by a quoted string, so that the
where
clause looked e.g. like:which unfortunately succeeds but only partially: the quoted string is coerced to a number before comparing to
runID
and this ends up comparingrunID
to only the first number.This means there are significant missing records from the batchRuns table.
This was inadvertently fixed by eb95ad57867cefcf21b28a0e65f62d1556adcd93, which uses a more streamlined query on tables already in the master DB to achieve the (correct) result.