Open nathan-chappell opened 2 months ago
Just a thought, exposing the parsing logic would help us to test our prompts to make sure they are correctly formatted for Helicone. Here is something off the top of my head...
// @ts-check
/**
* @typedef {Object} HeliconeToken
* @prop {'str'|'static'|'input'} tokenType
* @prop {string} value
* @prop {number} position
* @prop {string} [key]
*/
/**
* @param {string} s
* @returns {}
*/
function parseHeliconeTags(s) {
let inputOpenRegex = /<\s*helicone-prompt-input\s+key="(?<keyname>[^"]*)"\s*>/;
let inputCloseRegex = /<\/\s*helicone-prompt-input.*>/;
let staticOpenRegex = /<\s*helicone-prompt-static\s*>/;
let staticCloseRegex = /<\/\s*helicone-prompt-static.*>/;
let position = 0;
/** @type {HeliconeToken[]} */
let result = [];
let _kill = 10;
while (position < s.length) {
if (--_kill < 0) {
return result;
}
// console.log("parsing from: ", position);
let nextInputFromPos = s.substring(position).search(inputOpenRegex);
let nextStaticFromPos = s.substring(position).search(staticOpenRegex);
if (nextInputFromPos === -1 && nextStaticFromPos === -1) {
result.push({ tokenType: "str", value: s.substring(position), position: position });
position = s.length;
break;
}
/** @type {RegExp} */
let openRegex;
/** @type {RegExp} */
let closeRegex;
/** @type {number} */
let nextStartPos;
/** @type {'static'|'input'} */
let tokenType;
if ((nextInputFromPos !== -1 && nextInputFromPos < nextStaticFromPos) || nextStaticFromPos === -1) {
// nextStaticFromPos = -1;
openRegex = inputOpenRegex;
closeRegex = inputCloseRegex;
nextStartPos = position + nextInputFromPos;
tokenType = "input";
} else {
// nextInputFromPos = -1;
openRegex = staticOpenRegex;
closeRegex = staticCloseRegex;
nextStartPos = position + nextStaticFromPos;
tokenType = "static";
}
if (position < nextStartPos) {
result.push({ tokenType: "str", value: s.substring(position, nextStartPos), position: position });
}
const nextClosePositionFromNextStartPos = s.substring(nextStartPos).search(closeRegex);
if (nextClosePositionFromNextStartPos === -1) {
throw new Error("unclosed static tag");
}
const nextClosePosition = nextStartPos + nextClosePositionFromNextStartPos;
const openTagMatch = s.substring(nextStartPos).match(openRegex);
const closeTagMatch = s.substring(nextClosePosition).match(closeRegex);
if (openTagMatch === null || closeTagMatch === null) {
throw new Error("unreachable");
}
const endNextOpenTag = nextStartPos + openTagMatch[0].length;
const key = (openTagMatch.groups || {}).keyname;
result.push({ tokenType: tokenType, value: s.substring(endNextOpenTag, nextClosePosition), position: endNextOpenTag, key: key });
position = nextClosePosition + closeTagMatch[0].length;
}
return result;
}
/**
*
* @param {HeliconeToken[]} tokens
* @param {Record<string, string>} selectedValues
*/
function formatHeliconeTags(tokens, selectedValues) {
/** @type {string[]} */
const sb = [];
const selectedKeys = new Set(Object.keys(selectedValues));
for (let token of tokens) {
switch (token.tokenType) {
case "str":
sb.push(token.value);
break;
case "input":
if (!token.key) {
throw new Error("unreachable");
}
if (selectedKeys.has(token.key)) {
sb.push(`<helicone-prompt-input key="${token.key}>${selectedValues[token.key]}</helicone-prompt-input>`);
} else {
sb.push(`<helicone-prompt-input key="${token.key} />`);
}
break;
case "static":
sb.push(`<helicone-prompt-static>${token.value.substring(0, 60)}</helicone-prompt-static>`);
break;
}
}
return sb.join("");
}
let text = `
<helicone-prompt-static>
Your task is to extract information from a transcript.
</helicone-prompt-static>
<helicone-prompt-input key="current-values" >
Some values have already been found. If they are correct, please use them in your output.
CURRENT VALUES:
\`\`\`
null
\`\`\`
</helicone-prompt-input>
Here is the transcript that has been recorded so far:
TRANSCRIPT:
'''
<helicone-prompt-input key="transcript" >
Johannes Kepler (/ˈkɛplər/;[2] German: [joˈhanəs ˈkɛplɐ, -nɛs -] ⓘ;[3][4] 27 December 1571 – 15 November 1630) was a German astronomer, mathematician, astrologer, natural philosopher and writer on music.[5] He is a key figure in the 17th-century Scientific Revolution, best known for his laws of planetary motion, and his books Astronomia nova, Harmonice Mundi, and Epitome Astronomiae Copernicanae, influencing among others Isaac Newton, providing one of the foundations for his theory of universal gravitation.[6] The variety and impact of his work made Kepler one of the founders and fathers of modern astronomy, the scientific method, natural and modern science.[7][8][9] He has been described as the "father of science fiction" for his novel Somnium.[10][11]
</helicone-prompt-input>'''
<helicone-prompt-static>
Please output valid JSON using the keys from the schema above.
If any information is missing, impute it with \`null\`.
Attempt to infer correct results if possible.
JSON:
</helicone-prompt-static>
`;
const parsedTokens = parseHeliconeTags(text)
console.log(parsedTokens);
console.log(formatHeliconeTags(parsedTokens, {}))
with output:
<helicone-prompt-static>
Your task is to extract information from a transcript.
</helicone-prompt-static>
<helicone-prompt-input key="current-values />
Here is the transcript that has been recorded so far:
TRANSCRIPT:
'''
<helicone-prompt-input key="transcript />'''
<helicone-prompt-static>
Please output valid JSON using the keys from the schema</helicone-prompt-static>
FWIW After some testing, it seems like the issue is being caused by having the input
and static
tags in the same message.
What happened?
I'm interested in using the tags for prompts described in the documentation. The inputs are not being represented properly in the prompt-view, but it seems like they are being stripped out before being forwarded to openai. This also causes each version to be bumped everytime a request is sent, so I can't use the feature as intended. Am I doing something wrong? The precise meta-language isn't described in the docs, so it's hard to tell why it's not working.
First, here's the text:
Here is a full request. Important to note:
json_schema
response formatcopy-of-transcript
has the<helicone-prompt-*>
tags removed, indicating they were stripped before being forwardedFull Request
Relevant log output
No response
Twitter / LinkedIn details
No response