Closed dstillman closed 5 years ago
Most BibTeX styles don't support url
, but this seems valid; bibtex will treat this as verbatim, and in verbatim, %
is allowed. This MWE renders as expected for me:
\documentclass{article}
\usepackage{filecontents}
\begin{filecontents}{\jobname.bib}
@misc{Bar2019,
author = {Bar, Foo},
url = {http://example.com/Foo%20-%20Bar.pdf}
}
\end{filecontents}
\begin{document}
From \cite{Bar2019} we see \ldots
\bibliographystyle{alpha}
\bibliography{\jobname}
\end{document}
Both BBT and stock import this just fine for me. Does the detectImport error out, or does it return false?
No errors are logged.
I see the detectWeb treating %
as a comment character (which it shouldn't, technically), but running it locally gives me a positive detect:
let data = `
@misc{Bar2019,
author = {Bar, Foo},
url = {http://example.com/Foo%20-%20Bar.pdf}
}
`
Zotero = {
read: function(n) {
const part = data.substring(0, n)
data = data.substring(n)
return part
},
debug(msg) {
console.log(msg)
}
}
function detectImport() {
var maxChars = 1048576; // 1MB
var inComment = false;
var block = "";
var buffer = "";
var chr = "";
var charsRead = 0;
var re = /^\s*@[a-zA-Z]+[\(\{]/;
while ((buffer = Zotero.read(4096)) && charsRead < maxChars) {
Zotero.debug("Scanning " + buffer.length + " characters for BibTeX");
charsRead += buffer.length;
for (var i=0; i<buffer.length; i++) {
chr = buffer[i];
if (inComment && chr != "\r" && chr != "\n") {
continue;
}
inComment = false;
if (chr == "%") {
// read until next newline
block = "";
inComment = true;
} else if ((chr == "\n" || chr == "\r"
// allow one-line entries
|| i == (buffer.length - 1))
&& block) {
// check if this is a BibTeX entry
if (re.test(block)) {
return true;
}
block = "";
} else if (!" \n\r\t".includes(chr)) {
block += chr;
}
}
}
}
console.log(detectImport())
If I add this as a testcase though, I get an internal server error running the tests.
{
"type": "import",
"input": "@misc{Bar2019,\n author = {Bar, Foo}, url = {http://example.com/Foo%20-%20Bar.pdf} }",
"items": [
{
"itemType": "book",
"creators": [
{
"firstName": "Foo",
"lastName": "Bar",
"creatorType": "author"
}
],
"url": "http://example.com/Foo%20-%20Bar.pdf",
"attachments": [],
"notes": [],
"seeAlso": []
}
Oh that's because detectImport said false
No wait, that's a different problem, it stops scanning at the first newline not before -- my test above has no newlines. With newlines (updated above), this passes for me.
Confirmed: importing
@misc{Bar2019,author = {Bar, Foo}, url = {http://example.com/Foo%20-%20Bar.pdf} }
(single line) also fails in the desktop with BibTeX.js
and also this yields undefined
, so the no suitable translators found
would be expected:
let data = '@misc{Bar2019, author = {Bar, Foo}, url = {http://example.com/Foo%20-%20Bar.pdf} }'
Zotero = {
read: function(n) {
const part = data.substring(0, n)
data = data.substring(n)
return part
},
debug(msg) {
console.log(msg)
}
}
function detectImport() {
var maxChars = 1048576; // 1MB
var inComment = false;
var block = "";
var buffer = "";
var chr = "";
var charsRead = 0;
var re = /^\s*@[a-zA-Z]+[\(\{]/;
while ((buffer = Zotero.read(4096)) && charsRead < maxChars) {
Zotero.debug("Scanning " + buffer.length + " characters for BibTeX");
charsRead += buffer.length;
for (var i=0; i<buffer.length; i++) {
chr = buffer[i];
if (inComment && chr != "\r" && chr != "\n") {
continue;
}
inComment = false;
if (chr == "%") {
// read until next newline
block = "";
inComment = true;
} else if ((chr == "\n" || chr == "\r"
// allow one-line entries
|| i == (buffer.length - 1))
&& block) {
// check if this is a BibTeX entry
if (re.test(block)) {
return true;
}
block = "";
} else if (!" \n\r\t".includes(chr)) {
block += chr;
}
}
}
}
console.log(detectImport())
Sorry, not sure what you're saying here. The sample with newlines I give above works in the client and fails in translation-server. What's causing that?
It looks like newlines are being stripped out of ctx.request.body
, leaving only tabs:
(3)(+0000000): @misc{Bar2019, author = {Bar, Foo}, url = {http://example.com/Foo-Bar.pdf}}
(3)(+0000000): @
(3)(+0000000): ===>64<=== (number)
(3)(+0000001): m
(3)(+0000000): ===>109<=== (number)
(3)(+0000000): i
(3)(+0000000): ===>105<=== (number)
(3)(+0000000): s
(3)(+0000000): ===>115<=== (number)
(3)(+0000000): c
(3)(+0000000): ===>99<=== (number)
(3)(+0000000): {
(3)(+0000000): ===>123<=== (number)
(3)(+0000000): B
(3)(+0000000): ===>66<=== (number)
(3)(+0000000): a
(3)(+0000000): ===>97<=== (number)
(3)(+0000000): r
(3)(+0000000): ===>114<=== (number)
(3)(+0000000): 2
(3)(+0000000): ===>50<=== (number)
(3)(+0000000): 0
(3)(+0000000): ===>48<=== (number)
(3)(+0000000): 1
(3)(+0000000): ===>49<=== (number)
(3)(+0000000): 9
(3)(+0000000): ===>57<=== (number)
(3)(+0000000): ,
(3)(+0000000): ===>44<=== (number)
(3)(+0000000):
(3)(+0000000): ===>9<=== (number)
ctx.request.rawBody
works.
Sorry, no it doesn't — I was testing the wrong thing.
Ugh, OK, sorry. curl -d @test.bib
fails because it strips newlines. curl --data-binary @test.bib
works fine.
https://ec.haxx.se/http-post.html#posting-binary
When reading from a file, -d will strip out carriage return and newlines.
No idea why it does that, but that was the problem.
For some reason a file that imports successfully into Zotero fails here with
No suitable translators found
:I don't know if this is valid BibTeX, but it's extracted from a test file I have around and works in the client. The
%
characters in the URL seem to stop this from being detected as BibTeX in translation-server. If I remove those, it works.@retorquere, could you have a look?