Open psychemedia opened 5 days ago
Folium map:
"outputs": [{
"data": {
"text/plain": [
"<folium.folium.Map at 0xffff65285780>"
]
}
}]
How about:
_
(i.e. the object type returned from the last code line in the cell).Claude.ai suggests:
import nbformat
from IPython import get_ipython
def add_output_types_to_metadata(notebook_path, output_path=None):
# Load the notebook
with open(notebook_path, 'r') as f:
nb = nbformat.read(f, as_version=4)
# Get the IPython interpreter
ip = get_ipython()
for cell in nb.cells:
if cell.cell_type == 'code':
# Execute the cell
ip.run_cell(cell.source)
# Get the type of the last output
output_type = type(ip.user_ns['_'])
# Add the type to cell metadata
if 'metadata' not in cell:
cell['metadata'] = {}
cell['metadata']['output_type'] = str(output_type)
# Save the modified notebook
if output_path is None:
output_path = notebook_path
with open(output_path, 'w') as f:
nbformat.write(nb, f)
print(f"Notebook saved with output types in metadata to: {output_path}")
# Example usage
notebook_path = 'path/to/your/notebook.ipynb'
output_path = 'path/to/your/output_notebook.ipynb'
add_output_types_to_metadata(notebook_path, output_path)
(I then further prompted it to add the metadata)
and notes:
This script needs to be run in an IPython environment (like a Jupyter notebook) to work correctly.
I then wondered whether a JupyterLab extension might do it?
Claude suggested as a starting point:
import {
JupyterFrontEnd,
JupyterFrontEndPlugin
} from '@jupyterlab/application';
import {
INotebookTracker
} from '@jupyterlab/notebook';
import {
Cell
} from '@jupyterlab/cells';
const plugin: JupyterFrontEndPlugin<void> = {
id: 'output-type-metadata:plugin',
autoStart: true,
requires: [INotebookTracker],
activate: (app: JupyterFrontEnd, notebookTracker: INotebookTracker) => {
console.log('JupyterLab extension output-type-metadata is activated!');
notebookTracker.widgetAdded.connect((sender, notebook) => {
notebook.content.executed.connect((sender, args) => {
const cell = args.cell as Cell;
const outputArea = cell.outputArea;
if (outputArea.model.length > 0) {
const lastOutput = outputArea.model.get(outputArea.model.length - 1);
let outputType = 'Unknown';
if (lastOutput.type === 'execute_result') {
// Determine output type based on the data
if (lastOutput.data['text/plain']) {
// You might need more sophisticated logic here
outputType = 'text/plain';
} else if (lastOutput.data['application/vnd.jupyter.widget-view+json']) {
outputType = 'widget';
}
// Add more conditions as needed
}
// Update cell metadata
cell.model.metadata.set('output_type', outputType);
}
});
});
}
};
export default plugin;
Then I asked if the extension could run the _
(though this would break _
for other cells unless we handle that somehow, by saving the object, typing it, then re-returning it?)
import {
JupyterFrontEnd,
JupyterFrontEndPlugin
} from '@jupyterlab/application';
import {
INotebookTracker
} from '@jupyterlab/notebook';
import {
Cell
} from '@jupyterlab/cells';
import {
KernelMessage
} from '@jupyterlab/services';
const plugin: JupyterFrontEndPlugin<void> = {
id: 'output-type-metadata:plugin',
autoStart: true,
requires: [INotebookTracker],
activate: (app: JupyterFrontEnd, notebookTracker: INotebookTracker) => {
console.log('JupyterLab extension output-type-metadata is activated!');
notebookTracker.widgetAdded.connect((sender, notebook) => {
notebook.content.executed.connect((sender, args) => {
const cell = args.cell as Cell;
const kernel = notebook.sessionContext.session?.kernel;
if (kernel) {
// Execute type(_) to get the type of the last output
kernel.requestExecute({
code: 'type(_).__name__'
}).onIOPub = (msg: KernelMessage.IIOPubMessage) => {
if (msg.header.msg_type === 'execute_result') {
const content = msg.content as KernelMessage.IExecuteResultMsg['content'];
const outputType = content.data['text/plain'] as string;
// Update cell metadata with the output type
cell.model.metadata.set('output_type', outputType.replace(/['"]+/g, '')); // Remove quotes
}
};
}
});
});
}
};
export default plugin;
When I explained my _
concerns it suggested:
import {
JupyterFrontEnd,
JupyterFrontEndPlugin
} from '@jupyterlab/application';
import {
INotebookTracker
} from '@jupyterlab/notebook';
import {
Cell
} from '@jupyterlab/cells';
import {
KernelMessage
} from '@jupyterlab/services';
const plugin: JupyterFrontEndPlugin<void> = {
id: 'output-type-metadata:plugin',
autoStart: true,
requires: [INotebookTracker],
activate: (app: JupyterFrontEnd, notebookTracker: INotebookTracker) => {
console.log('JupyterLab extension output-type-metadata is activated!');
notebookTracker.widgetAdded.connect((sender, notebook) => {
notebook.content.executed.connect((sender, args) => {
const cell = args.cell as Cell;
const kernel = notebook.sessionContext.session?.kernel;
if (kernel) {
// Execute code to get type without modifying _
kernel.requestExecute({
code: `
import sys
_temp = _
_type = type(_).__name__
sys.stdout.write(_type)
del _temp
`
}).onIOPub = (msg: KernelMessage.IIOPubMessage) => {
if (msg.header.msg_type === 'stream' && msg.content.name === 'stdout') {
const content = msg.content as KernelMessage.IStreamMsg['content'];
const outputType = content.text;
// Update cell metadata with the output type
cell.model.metadata.set('output_type', outputType);
}
};
}
});
});
}
};
export default plugin;
I also wonder if I could just riff on cell execution status extension?
I then thought it might make more sense as a menu command, incl with an option to ignore errors. Claude again:
import {
JupyterFrontEnd,
JupyterFrontEndPlugin
} from '@jupyterlab/application';
import {
INotebookTracker,
NotebookActions
} from '@jupyterlab/notebook';
import {
IMainMenu
} from '@jupyterlab/mainmenu';
import {
Cell
} from '@jupyterlab/cells';
import {
KernelMessage
} from '@jupyterlab/services';
const plugin: JupyterFrontEndPlugin<void> = {
id: 'output-type-metadata:plugin',
autoStart: true,
requires: [INotebookTracker, IMainMenu],
activate: (app: JupyterFrontEnd, notebookTracker: INotebookTracker, mainMenu: IMainMenu) => {
console.log('JupyterLab extension output-type-metadata is activated!');
function enrichCell(cell: Cell, kernel: any) {
return new Promise((resolve, reject) => {
kernel.requestExecute({
code: `
import sys
_temp = _
_type = type(_).__name__
if _type == 'DataFrame':
import pandas as pd
if isinstance(_, pd.DataFrame):
_type = f"DataFrame(shape={_.shape}, columns={list(_.columns)})"
sys.stdout.write(_type)
del _temp
`
}).onIOPub = (msg: KernelMessage.IIOPubMessage) => {
if (msg.header.msg_type === 'stream' && msg.content.name === 'stdout') {
const content = msg.content as KernelMessage.IStreamMsg['content'];
const outputType = content.text;
cell.model.metadata.set('output_type', outputType);
resolve(outputType);
}
};
});
}
async function runAllCellsEnriched(notebook: any, ignoreErrors: boolean = false) {
const cells = notebook.content.model.cells;
const kernel = notebook.sessionContext.session?.kernel;
if (!kernel) {
console.error('No kernel available');
return;
}
for (let i = 0; i < cells.length; i++) {
const cell = notebook.content.widgets[i];
if (cell.model.type === 'code') {
try {
await NotebookActions.runCell(notebook.content, cell);
await enrichCell(cell, kernel);
} catch (error) {
console.error(`Error in cell ${i}:`, error);
if (!ignoreErrors) {
break;
}
}
}
}
}
// Add menu items
mainMenu.runMenu.addGroup([
{
command: 'notebook:run-all-cells-enriched'
},
{
command: 'notebook:run-all-cells-enriched-ignore-errors'
}
]);
// Add commands
app.commands.addCommand('notebook:run-all-cells-enriched', {
label: 'Run All Cells (Enriched)',
execute: () => {
const notebook = notebookTracker.currentWidget;
if (notebook) {
runAllCellsEnriched(notebook);
}
}
});
app.commands.addCommand('notebook:run-all-cells-enriched-ignore-errors', {
label: 'Run All Cells (Enriched, Ignore Errors)',
execute: () => {
const notebook = notebookTracker.currentWidget;
if (notebook) {
runAllCellsEnriched(notebook, true);
}
}
});
}
};
export default plugin;
The _temp
thing is wrong, we need to return _temp
to reset the _
value. And then it'll hang around. Use a var name with a likely unique name?
After a bit of hassling, claude suggested;
const uniqueId = `_output_type_metadata_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;`
used as e.g. ${uniqueId}_temp = _
I suggested a uuid
solution and it gave:
__output_type_uuid = str(uuid.uuid4()).replace('-', '_')
Create a simple CLI tool that will autotag cells in a notebook based on cell output characteristics.
So if a cell looks like its a
folium
map, tag it as such etc.This could be run as a pre-testing step on an untagged notebook:
If you know cell outputs are correct in a notebook (including any error cells) then many of the tags can be derived from that.