Open AshleyWoods opened 5 years ago
Updated code for attempt:
#get the data in here
dat <- occ_download_import(key = "0009491-190621201848488")
sp_key <- name_suggest(q ="Caretta caretta", rank = "species")$key[1]
comparison <- map_fetch(taxonKey = sp_key, srs = "EPSG:3857")
#visualize it
#plot(dat)
#plot(comparison)
#prep for later
#somethings funky here, so yeah
#comparison2 = projectRaster(comparison, comparison)#crs = "+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs")
comparison3 <- setExtent(comparison, extent(-20037508, 20037508, -20037508, 20037508))
#Turns dat into a raster to match comparison
crds = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude)
crds = subset(crds, subset= lat > -85.06)
crds = subset(crds, subset = lat < 85.06)
#crds2 = dat$data[c("decimalLongitude","decimalLatitude")]
#crds2 = subset(crds2, subset= "decimalLatitude" > -85.06)
#crds2 = subset(crds2, subset = "decimalLatitude" < 85.06)
# crds_sp = SpatialPoints(crds, proj4string = CRS("+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs")) #CRS(projection(comparison3)))#
#r_template = comparison
#r = raster(nrows=512, ncols=512, xmn=-20037.51, xmx=20002.49, ymn=-6396.115,
# ymx=6363.885, crs = CRS("+proj=cea +units=km +ellps=WGS84"))
#crds_cea = spTransform(crds_sp, CRSobj = CRS(proj4string(r)))
#ct = rasterize(crds_cea, r, fun='count')
# crds_sp_reproj = spTransform(crds_sp, CRSobj = CRS("+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs"))
crds = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
crds_3857 = sp::spTransform(crds, CRSobj = CRS(projection(comparison3)))
ct = rasterize(crds_3857, comparison3, fun='count')
max_ct = maxValue(ct)
prop = calc(ct, function(x) ifelse(is.na(x), 0, x / max_ct))
#this will turn the full numbers in to percentages of the max to match the values
#from the map_fetch() functions ( values on a scale of 0 to 1)
int <- ct@data@values
#prop = projectRaster(prop, crs = "+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs")
# crds = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
# crds_3857 = sp::spTransform(crds, CRSobj = CRS(projection(comparison3)))
plot(comparison3)
points(crds_3857, pch='.', col='red')
#plot the thing -- how to show the density of the points
plot(comparison3, prop,
xlab = "Density of Refrence Points (Comparison dataset)",
ylab = "Density of Species of Interest", maxpixels = 262144)
#add best fit line
abline(lm(getValues(prop) ~ getValues(comparison)))
This is very close to the desired output, but the map projection is still just slightly off. In the image below, look at the east coast of Australia for the most obvious example.
The graph this outputs looks like this:
Hmm, I think their patch is actually working. Try this:
den <- map_fetch(taxonKey = 2480498, srs = "EPSG:3857")
occ <- occ_search(taxonKey = 2480498)
crds <- occs$data[c("decimalLongitude","decimalLatitude")] ## Extract only the coordinates
crds_3857 <- rgdal::project(as.matrix(crds), projection(den))
r <- setExtent(den, extent(-20037508, 20037508, -20037508, 20037508))
pdf('test.pdf')
plot(r)
mp <- maps::map(plot = FALSE)
xy <- rgdal::project(cbind(mp$x, mp$y), projection(den))
lines(xy)
points(crds_3857, col='red', pch = '.')
dev.off()
I see that now, I did some messing around with the zoom window from RStudio and it turns out that the dimensions of the zoom window can affect how the points line up for some reason, they scale differently. (But that's a problem for RStudio to solve)
While that helps a lot, we are still losing points while turning crds into a raster. I believe it may happen during the rasterize() step.
#get the data in here
dat <- occ_download_import(key = "0009491-190621201848488")
sp_key <- name_suggest(q ="Caretta caretta", rank = "species")$key[1]
comparison <- map_fetch(taxonKey = sp_key, srs = "EPSG:3857")
comparison3 <- setExtent(comparison, extent(-20037508, 20037508, -20037508, 20037508))
#Throw out invalid crds + reproject it
crds = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude)
crds = subset(crds, subset= lat > -85.06)
crds = subset(crds, subset = lat < 85.06)
crds_3857 <- rgdal::project(as.matrix(crds), projection(comparison))
# I don't think these are needed anymore? I'm not sure
# crds2 = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
# crds_38572 = sp::spTransform(crds2, CRSobj = CRS(projection(comparison3)))
#Rasterize the crds
#I believe the Error happens here!!!!!!!!!!!!!!!!!!!!!!!
ct = rasterize(crds_3857, comparison, fun='count')
#turn the numeric values into density values
max_ct = maxValue(ct)
prop = calc(ct, function(x) ifelse(is.na(x), 0, x / max_ct))
#grab data for best fit line
#int <- ct@data@values
#visualize
plot(comparison3)
points(crds_3857, pch='.', col='red')
# If you could get this to work I think it would support my point about losing points
#points(x = prop, pch = '.', col = 'blue')
#plot the thing -- how to show the density of the points
plot(comparison3, prop,
xlab = "Density of Refrence Points (Comparison dataset)",
ylab = "Density of Species of Interest", maxpixels = 262144)
#add best fit line
abline(lm(getValues(prop) ~ getValues(comparison)))
I looked into the points dropping and I'm not seeing it on my end. Good news is that I think this is working for the most part now. I think maybe you should get a fresh update of your caretta caretta data and you might see that the two overlap.
This code now grabs the newest Caretta caretta dataset (once you replace the pound signs with your login)
#get the data in here
#dat <- occ_download_import(key = "0009491-190621201848488")
sp_key <- name_suggest(q ="Caretta caretta", rank = "species")$key[1]
res = occ_download(paste("taxonKey =", sp_key), 'hasCoordinate = TRUE',
user = '#######', pwd = '#########',
email = '#########################')
continue = TRUE
while(continue){
meta = occ_download_meta(res)
if (meta$status == "SUCCEEDED"){
continue = FALSE
dat <- occ_download_get(res[1], overwrite = TRUE) %>%
occ_download_import()
}
if (meta$status == "KILLED"){
continue = FALSE
#Returns: ERROR:Bad Request(HTTP 400)
}
#30 second delay -- Extend?
Sys.sleep(30)
}
comparison <- map_fetch(taxonKey = sp_key, srs = "EPSG:3857")
comparison3 <- setExtent(comparison, extent(-20037508, 20037508, -20037508, 20037508))
#Throw out invalid crds + reproject it
crds = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude)
crds = subset(crds, subset= lat > -85.06)
crds = subset(crds, subset = lat < 85.06)
crds_3857 <- rgdal::project(as.matrix(crds), projection(comparison3))
# I don't think these are needed anymore? I'm not sure
# crds2 = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
# crds_38572 = sp::spTransform(crds2, CRSobj = CRS(projection(comparison3)))
#Rasterize the crds
#I believe the Error happens here!!!!!!!!!!!!!!!!!!!!!!!
ct = rasterize(crds_3857, comparison3, fun='count')
#turn the numeric values into density values
max_ct = maxValue(ct)
prop = calc(ct, function(x) ifelse(is.na(x), 0, x / max_ct))
#grab data for best fit line
#int <- ct@data@values
#visualize
plot(comparison3)
points(crds_3857, pch='.', col='red')
#If you could get this to work I think it would support my point about loosing points
#points(x = prop, pch = '.', col = 'blue')
#plot the thing -- how to show the density of the points
plot(comparison3, prop,
xlab = "Density of Refrence Points (Comparison dataset)",
ylab = "Density of Species of Interest", maxpixels = 262144)
#add best fit line
abline(lm(getValues(prop) ~ getValues(comparison3)))
That gives this plot, even though it should be giving a 1:1 with the exception of a few (4) points that were removed due to proximity to the poles.
I added a section at the end of the code that shows how many coordinates the GBIF data has at the beginning of the code vs. the end after it has been rasterized. Remember to replace the '#' with your login information. When I ran the last section and checked the value of the 'errors' variable it had a value of 20765. This means that 20765 coordinate points were not transferred to the raster layer and that is why the end graph does not appear as a 1:1.
#get the data in here
#dat <- occ_download_import(key = "0009491-190621201848488")
sp_key <- name_suggest(q ="Caretta caretta", rank = "species")$key[1]
res = occ_download(paste("taxonKey =", sp_key), 'hasCoordinate = TRUE',
user = "#######", pwd = "########",
email = "##################")
continue = TRUE
while(continue){
meta = occ_download_meta(res)
if (meta$status == "SUCCEEDED"){
continue = FALSE
dat <- occ_download_get(res[1], overwrite = TRUE) %>%
occ_download_import()
}
if (meta$status == "KILLED"){
continue = FALSE
#Returns: ERROR:Bad Request(HTTP 400)
}
#30 second delay -- Extend?
Sys.sleep(30)
}
comparison <- map_fetch(taxonKey = sp_key, srs = "EPSG:3857")
comparison3 <- setExtent(comparison, extent(-20037508, 20037508, -20037508, 20037508))
#Throw out invalid crds + reproject it
crds = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude)
crds = subset(crds, subset= lat > -85.06)
crds = subset(crds, subset = lat < 85.06)
crds_3857 <- rgdal::project(as.matrix(crds), projection(comparison3))
# I don't think these are needed anymore? I'm not sure
# crds2 = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
# crds_38572 = sp::spTransform(crds2, CRSobj = CRS(projection(comparison3)))
#Rasterize the crds
#I believe the Error happens here!!!!!!!!!!!!!!!!!!!!!!!
ct = rasterize(crds_3857, comparison3, fun='count')
#turn the numeric values into density values
max_ct = maxValue(ct)
prop = calc(ct, function(x) ifelse(is.na(x), 0, x / max_ct))
test = calc(ct, function(x) ifelse(is.na(x), NA, x/max_ct))
#grab data for best fit line
#int <- ct@data@values
#visualize
plot(comparison3)
points(crds_3857, pch='.', col='red')
#If you could get this to work I think it would support my point about loosing points
#points(x = prop, pch = '.', col = 'blue')
#plot the thing -- how to show the density of the points
plot(comparison3, prop,
xlab = "Density of Refrence Points (Comparison dataset)",
ylab = "Density of Species of Interest", maxpixels = 262144, xlim = c(0,1), ylim = c(0,1))
#add best fit line
abline(lm(getValues(prop) ~ getValues(comparison3)))
# checking for problems with the coordinates and losing them when creating the raster
# these coordinates are lost from the GBIF data, not the mapfetch data
raster_count = 0
crds_count = length(crds_3857)
for (i in 1:262144) {
if (!is.na(test[i])) {
raster_count = raster_count + 1
}
}
errors = crds_count - raster_count #if this is not zero, then it is the number of coordinates that were lost
errors
Hey @AshleyWoods I don't have time right now to go through your code but an even simpler check on my end shows that rasterize is not making a mistake
sum(getValues(ct), na.rm=T)
[1] 10795
dim(crds_3857)
[1] 10795 2
So every coordinate in crds_3857
has been converted into a unit of count in ct
. Can you confirm this behavior on your end as well please
I see what you mean. I'm not sure where it is happening then, but something is going wrong if my graph still looks like that. I'll through the code again and see what I can find.
> sum(getValues(ct), na.rm=T)
[1] 10801
> dim(crds_3857)
[1] 10801 2
Here's something interesting. I plotted prop against prop and got this: But when I plotted comparison3 against comparison3, I got this:
The density values are fundamentally different. I think that some of the coordinates are recording more than one occurrence. I need to find what column that value is stored in and take that into account to see if that fixes the issue. I have two suspicions for what columns this value is stored in, but it sometimes contains a null value and sometimes contains a zero. When I figure out what column the data is stored in, do you know how I could factor this in?
That's a good call to think about duplicates. I'm not sure if the GBIF folks filtered those out before computing the densities. To identify duplicate records the easiest thing to do is filter if they have the exact same long and lat. This is easy to do in R
# remove duplicates
crds = crds[!duplicated(crds), ]
Unfortunately this doesn't solve the problem you have identified. I appears that the density values have been binned very coarsely into 4 values (at least in this dataset - this may be general not sure). As you have noted the bin classes don't match up with what we're seeing in the raw point data. I think in this case we need to file another bug report over at rgbif
with a simple reproducible example so that we can better understand how GBIF has computed these pixel colors that we're capturing with map_fetch
. What do you think?
My thought is the last time we were getting strange stuff it was because of an upstream bug and this case may the same
I wasn't talking about duplicates. I meant that in some occurrences, there are columns that allow the reporter of the occurrence to input how many individuals there were. Some people left it blank, some have it filled in with a zero, but some have around 200 occurrences. I think gbif may be counting those and this is not an error. At the very least, I think we should test it first and try the course binning to see if it eventually matches up or if there is a but somewhere.
I finally found a page on the Gbif website that told me which columns contain this and how to read them.
I added code to make sure every occurrence was counted for every individual included in the observation.
#get the data in here
#dat <- occ_download_import(key = "0009491-190621201848488")
sp_key <- name_suggest(q ="Caretta caretta", rank = "species")$key[1]
res = occ_download(paste("taxonKey =", sp_key), 'hasCoordinate = TRUE',
user = "ashwood", pwd = "Voidfish13",
email = "woodsy913@comcast.net")
continue = TRUE
while(continue){
meta = occ_download_meta(res)
if (meta$status == "SUCCEEDED"){
continue = FALSE
dat <- occ_download_get(res[1], overwrite = TRUE) %>%
occ_download_import()
}
if (meta$status == "KILLED"){
continue = FALSE
#Returns: ERROR:Bad Request(HTTP 400)
}
#30 second delay -- Extend?
Sys.sleep(30)
}
comparison <- map_fetch(taxonKey = sp_key, srs = "EPSG:3857")
comparison3 <- setExtent(comparison, extent(-20037508, 20037508, -20037508, 20037508))
#Throw out invalid crds + reproject it
crds = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude)
crds = subset(crds, subset= lat > -85.06)
crds = subset(crds, subset = lat < 85.06)
#for the sake of having the counting cols (but still weeding out the same ones that have been taken from crds)
crds2 = data.frame(long = dat$decimalLongitude, lat = dat$decimalLatitude, count = dat$individualCount, quantity = dat$organismQuantity)
crds2 = subset(crds2, subset= lat > -85.06)
crds2 = subset(crds2, subset = lat < 85.06)
#and now you make sure EVERY occurrence is counted
for (i in 1:length(rownames(crds2))) {
if (!is.na(crds2[i,3]) && crds2[i,3] > 1) {
#it has more than 1 occurrence in the individualCount column, append it however many times to the end of crds
for (j in 1:crds2[i,3]) {
crds <- rbind(crds, c(crds2[i,1], crds2[i,2]))
}
}else if (!is.na(crds2[i,4]) && is.numeric(crds2[i,4]) && crds2[i,4] > 1) {
#it has more than 1 occurrence in the organismQuantity column, append it however many times to the end of crds
for (j in 1:crds2[i,4]) {
crds <- rbind(crds, c(crds2[i,1], crds2[i,2]))
}
}
}
crds_3857 <- rgdal::project(as.matrix(crds), projection(comparison3))
# I don't think these are needed anymore? I'm not sure
# crds2 = sp::SpatialPoints(crds, proj4string = CRS("+proj=longlat"))
# crds_38572 = sp::spTransform(crds2, CRSobj = CRS(projection(comparison3)))
#Rasterize the crds
#I believe the Error happens here!!!!!!!!!!!!!!!!!!!!!!!
ct = rasterize(crds_3857, comparison3, fun='count')
#turn the numeric values into density values
max_ct = maxValue(ct)
prop = calc(ct, function(x) ifelse(is.na(x), 0, x / max_ct))
test = calc(ct, function(x) ifelse(is.na(x), NA, x/max_ct))
#grab data for best fit line
#int <- ct@data@values
#visualize
plot(comparison3)
points(crds_3857, pch='.', col='red')
#If you could get this to work I think it would support my point about loosing points
#points(x = prop, pch = '.', col = 'blue')
#plot the thing -- how to show the density of the points
plot(comparison3, prop,
xlab = "Density of Refrence Points (Comparison dataset)",
ylab = "Density of Species of Interest", maxpixels = 262144, xlim = c(0,1), ylim = c(0,1))
#add best fit line
abline(lm(getValues(prop) ~ getValues(comparison3)))
This is the solution it gives:
It's different, but it is still not 1:1
I'm trying to get a package called rprotobuf (suggested by Tim Robertson on this thread) installed to try and work with the mvt packages. However, it gives me these errors and I can't figure out what it wants me to do. (or if I have permission to do so on Uniola)
configure: WARNING: Protobuf headers not found with default CXXFLAGS and CPPFLAGS, manually trying /usr/local/include
configure: WARNING: Unsetting ac_cv_header_google_protobuf_stubs_common_h
and
configure: error: ERROR: ProtoBuf headers required; use '-Iincludedir' in CXXFLAGS for unusual locations.
ERROR: configuration failed for package ‘RProtoBuf’
* removing ‘/home/woodsae/R/x86_64-pc-linux-gnu-library/3.5/RProtoBuf’
Thoughts?
You would need to install the protobuf
library.
https://github.com/eddelbuettel/rprotobuf#installation has instructions for Debian/Ubuntu, "with similar commands on other operating systems or distributions". It's the kind of thing that typically requires permissions.
Issues with making the raster layers match for plotting the density of the two. Code: