Open westurner opened 7 years ago
[ ] As a notebook user, I can add metadata about the whole JupyterNotebook to be displayed at the top
[ ] As a notebook user, I can add metadata by creating a notebook cell which produces and displays RDFa HTML and/or JSONLD
yield (objmeta, obj)
display(obj._repr_html_() if hasattr(obj, '_repr_hrml') else obj)
when rendering each notebook cell output_repr_html_
=> RDFa and/or JSONLD
X = range(10) y = [ (x*110)+1 for x in X ] obj = collections.OrderedDict() obj = DefaultOrderedDict() # type: MappingType obj['X'] = X obj['y'] = y
Meta(obj, meta=dict(name=, url=, dateCreated=, dateModified=))
class Meta(object): def init(object, meta=None,Mapping, kwargs): self.obj = object self.meta = meta self.meta.update(kwargs)
def _reprhtml(self): ctx = to_html5_rdfa(self.meta)
return ctx['html']
def to_triples(self):
# subject URI, predicate URI , object value
triples_xy = [
[_, 'X', X],
[_, 'y', y],
]
triples = []
triples_type = [
[_, 'a', types[0]],
# [_, 'rdf:type', types[0]], # Curie, QName to be expanded
# [_, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', types[0]],
# [_, '@type', types[0]] # JSONLD
]
triples.extend(triples_type)
# yield from triples_type
triples.extend(triples_xy)
# yield from triples_xy
triples_context = OrderedDict(
"@context": OrderedDict(schema="http://schema.org),
)
triples.extend(jsonld_to_triples(triples_context)
# triples.extend(jsonld_to_triples(namespaces.context(format='jsonld'))
return triples
## - [ ] rdflib namespace manager to jsonldcontext()
def to_html5_rdfa(metaobj): # _reprhtml """generate html5_rdfa""" ctx = {} jsonld = to_jsonld(metaobj) ctx['jsonld'] = jsonld
markdown = to_markdown(metaobj)
ctx['markdown'] = markdown
doc = div(id="dataobj/01")
with doc:
jsonld_rdfa = script(
id="dataobj/01",
type="application/ld+json",
content=jsonld)
panels_div = div(class="panels")
with panels_div:
panels['html'] = div(id="dataobj/01.html", class="panel-html")
panels['jsonld'] = div(id="dataobj/01.jsonld"), class="panel-jsonld")
panels['markdown'] = div("id="dataobj/01.markdown", class="panel-markdown")
ctx['html'] = doc.render()
return ctx
##### References
- https://wrdrd.com/docs/tools/#jupyter-notebook
- https://wrdrd.com/docs/tools/#nbformat
- https://wrdrd.com/docs/consulting/education-technology#jupyter-and-reproducibility
- https://westurner.org/wiki/ideas#jupyter-notebook-meta-object
- https://wrdrd.com/docs/consulting/linkedreproducibility
#### Metadata **about** the JupyterNotebook
- "Add JSONLD @context to the top level .ipynb node "
https://github.com/jupyter/nbformat/issues/44
- Example JupyterNotebook metadata as JSONLD:
```json
{"@context": {
"schema": "http://schema.org/",
"jupyter": "https://jupyter.org/ns/v4/#",
"_base": "http://localhost:8000/ns/v1#" },
"@type": [
"schema:JupyterNotebook",
"schema:ScholarlyArticle",
"schema:DataCatalog", ],
"@id": "http://westurner.github.io/notebooks/notebooks/nbname"
"name": "Notebook Name",
"author": [{
"@type": "schema:Person",
"givenName": "Wesley",
"familyName": "Turner",
"url": "https://westurner.org/"
}],
"dateCreated": "2016-09-20",
"about": [
{"url": ["https://en.wikipedia.org/wiki/JSONLD"] },
{"url": "https://pypi.org/project/pipfile/", name="Pipfile and Pipfile.lock},
]
}
{"@context": {
"schema": "http://schema.org/",
"prov": "http://www.w3.org/ns/prov#"},
...,
"hasPart": [{
"@type": "schema:Dataset",
"name": "Months of the Year",
"distribution": {
"@type": "schema:DataDownload",
"contentUrl": "http://...",
"encodingFormat": "CSV", # URI?
}
}]
https://github.com/jupyter/nbformat/issues/44#issuecomment-275954832
https://github.com/pypa/pipfile#pipfile-1
# Pipfile (TOML) + meta.pkgname
# https://github.com/pypa/pipfile#pipfile-1
[[source]]
url = 'https://pypi.python.org/simple'
verify_ssl = true
[requires]
python_version = '2.7'
[packages]
requests = { extras = ['socks'] }
Django = '>1.10'
pinax = { git = 'git://github.com/pinax/pinax.git', ref = '1.4', editable = true }
appname =
[dev-packages]
nose = '*'
## Pipfile .meta (Pipfile.meta .toml)
# SoftwareApplication +
# funder, sponsor < CreativeWork
# homepage -> url ?
# wikipedia
# src = URL || List[URL] || List[Union[RepositoryThing,Thing]]
# pypi = "
# docs = "
# irc = "
# {twitter, }
# https://schema.org/about domain: {URL, Thing}
# about= {type="schema__:Twitter__", url="https://twitter.com/[@]username"}
# [ ] Union[URL, Thing]
# [ ] Union[URL, Thing, RepositoryThing(git=, ref=)]
#[meta.requests]
[meta.Django]
homepage = "https://djangoproject.com/" #
wikipedia = "https://en.wikipedia.org/wiki/Django_(web_framework)"
src = [ {git="https://github.com/django/django", ref="stable/1.10.x" } ]
src_stable = [ {git="https://github.com/django/django", ref="stable/1.10.x" } ]
src_master = [ {git="https://github.com/django/django", ref="master" } ]
pypi = "https://pypi.python.org/pypi/django"
#pypi = "https://pypi.org/project/django" # Warehouse
docs = [
{ name="docs (latest)", url="https://docs.djangoproject.com/en/"},
{ name="docs (master) src", url="https://github.com/django/django/tree/master/docs", ref="master", },
{ name="docs (v1.10)", url="https://docs.djangoproject.com/en/1.10/", ref="stable/1.10.x"},
{ name="docs (v1.10) src", url="https://github.com/django/django/tree/stable/1.10.x/docs", ref="stable/1.10.x"},
]
irc="irc://irc.freenode.net/django"
twitter= ["https://twitter.com/@djangoproject" ]
awesome = [ "https://github.com/rosarior/awesome-django" ]
# schema.org
funder = [
{"@type"="schema:Organization",
name="DSF: Django Software Foundation", url="https://www.djangoproject.com/foundation/",
twitter="https://twitter.com/@djangoproject" }]
sponsor = [{}]
[meta.pinax]
homepage = "http://pinaxproject.com/"
src = [ {git="https://github.com/pinax/pinax"} ]
#
twitter = [ "https://twitter.com/@pinaxproject", ]
# [meta.nose]
[meta."/app/wheels/appname"]
homepage =
src =
irc =
twitter =
Jupyter notebook JSONLD \@context
From https://github.com/westurner/pypfi/issues/7#issuecomment-68125966 :
From https://www.reddit.com/r/IPython/comments/2qae4s/ipython_repr_method_examples/cn5ko02 :+1:
So, from (https://github.com/ipython/ipython/blob/master/IPython/utils/capture.py and https://github.com/ipython/ipython/blob/master/IPython/utils/tests/test_capture.py#L27) and (https://github.com/ipython/ipython/blob/master/IPython/core/display.py and https://github.com/ipython/ipython/blob/master/IPython/core/tests/test_display.py) :
_mime_map = dict(
_repr_png_="image/png",
_repr_jpeg_="image/jpeg",
_repr_svg_="image/svg+xml",
_repr_html_="text/html",
_repr_json_="application/json",
_repr_javascript_="application/javascript",
)
# _repr_latex_ = "text/latex"
# _repr_retina_ = "image/png"
And from https://westurner.org/redditlog/#comment/cn5kqjf :
Additional ideas for IPython _repr_<type>_ methods:
_repr_rdfa_ = "text/html" (_repr_html_) + [xmlns: namespaces]
_repr_jsonld_ = "application/json" (_repr_json_) + (@context = {})
@context
@base
@vocabulary -- http://www.w3.org/TR/json-ld/#default-vocabulary
See: https://github.com/westurner/strypes
(The JSONLDstr -type things might be more appropriately factored out to strypes, with nbmeta as a minimal solution for adding JSONLD and RDFa metadata to the page (and strypes or another package for widgets displaying various forms of JSONLD and RDFa for teaching/development))
Tablib support would be great:
Jupyter table display:
from https://github.com/westurner/nbmeta/blob/master/README.rst :
########### nbmeta ########### | Src: https://github.com/westurner/nbmeta/
.. contents::
Initial API:
https://github.com/westurner/nbmeta/blob/develop/nb/nbmeta-00-01__exploration.py
https://github.com/westurner/nbmeta/blob/develop/nb/nbmeta-00-01__exploration.ipynb
https://nbviewer.jupyter.org/github/westurner/nbmeta/blob/develop/nb/nbmeta-00-01__exploration.ipynb
@type=Union[URI, List[URI]]
# JSONLDData can just be returned from an initial notebook cell for now?
title / schema.org/name: https://github.com/jupyter/nbformat/issues/45
re: tablib (and maybe qgrid) @kennethreitz https://github.com/kennethreitz/tablib/issues/305
@westurner I'd like to. It may be good to get to 1.0 w/ #nbmeta (maybe retool w/ #attrs) result objects w/ repr_html (repr_rdfa_and_jsonld) be4
Is this a similar problem to the RDFa in _repr_html_()
and JSON-LD in _repr_json_()
and _repr_mimebundle_()
problem?
I've started writing solutions to this problem a number of times ("nbmeta" *); here's another take literally in the comment box here this time:
import itertools
class nbmeta:
# node_id_seq = itertools.count() # or e.g. uuid.uuid4()
class Node:
"""
>>> Node()
>>> Node(id="123")
>>> Node(obj, id="123")
>>> Node(obj='abc'.split(), id="123")
"""
def __init__(self, obj=None, **kwargs):
self.attrs = self.validate_kwargs(kwargs, obj=self.obj)
# assert 'id' in kwargs
# assert 'obj' in kwargs
if obj is not None:
if 'obj' not in kwargs:
self.obj = obj
def validate_kwargs(self, kwargs, obj=None):
"""validate [and transform] a kwargs dict to HTML [& RDFa/Microdata] attrs"""
_validators = getattr(self, 'kwargs_validators', None)
if _validators is None:
_validators = [getattr(self, attr) for attr in dir(self) if attr.startswith('validate_kwargs__')]
_kwargs = kwargs.copy() #
for validator_transform_func in _validators:
_kwargs = validator_transform_func(_kwargs)
return _kwargs
def get_attrs(self, include_values=False, asdict=False):
attrs = [*self.attrs.keys(), 'obj']
if include_values or asdict:
if asdict:
return {attr:getattr(self, attr) for attr in attrs)}
else:
return
else:
return attrs
def __repr__():
return str(self)
def __str__(self):
return f'<{self.__class__.name} id={htmlescape(self.id)} ' + " ".join(f'{htmlescape(attr)}={htmlescape(value)}' for attr, value in self.attrs.items()) + f'obj={htmlescape(obj)} />')
def _repr_mimebundle_():
output = {}
output['text/plain'] = str(self)
output['text/html'] = self._repr_html_() # or _repr_html_rdfa_() first if it exists
output['application/json'] = self._repr_json_()
# output['application/json+ld'] = self._repr_json_ld_()
return output
def _repr_html_():
# yield IPython.display.HTML(self.attrs)
yield IPython.display.HTML(str(self))
yield IPython.display.HTML(obj)
def _repr_json_():
# TODO
return {attr:json_cast(value)) for value in self.get_attrs(include_values)}
HTML_ATTRS = ['alt', 'aria-*', 'data-*', 'name', 'id']
def html_attrs(self):
_dir_self = dir(self)
return [(attr, getattr(self, attr)) for attr in ['alt', 'aria-*', 'data-*'] if attr in _dir_self)]
class Img(Node):
def validate_kwargs__check_alt_attr(kwargs):
logging.debug("Note: a11y guidelines strongly suggest alt= tags for <img> elements")
# class Heading(Node):
# class H1(Heading):
# class H2(Heading):
# class H3(Heading):
# class Figure(Node):
import nbmeta
def test_nbmeta_img():
obj = None
img = nbmeta.Img(obj, alt="Alt text")
assert 'alt' in img.data
assert hasattr(img, 'obj')
assert img.obj == obj
There are already many good, well-tested templating systems and there are already SQL, NoSQL, and RDF / JSON-LD ORMs, and there are already e.g. jsonschema and shacl form validation libraries; but what should IPython.display. or another package minimally require?
Dataset meta object https://schema.org/Dataset Okfn data pkg JSON JSONLD / RDFa Columns CSVW
_repr_html_
DefaultOrdereddict