Proposed Autobahn OO C Style

ericchapman commented 8 years ago

Given the complexities we will face with creating a WAMP stack in C, I would like to propose an OO style of coding to use that gives us as much OO capabilities as possible so we can cleverly use inheritance throughout the stack. The goal is to support as many OO features as we can without making the code difficult to develop/maintain. Here I will use an example of a "base" class and then create a "shape" and a "circle" using inheritance (ignore minor syntax issues and coding style)

base.h

#ifndef __BASE_H__
#define __BASE_H__

#include <stdlib.h>

typedef int bool;
#define true 1
#define false 0

// ** "void *" is used to eliminate struct conversion errors when using polymorphism of structure **
// ** methods are pointers in the structure so the pointer can be modified when subclassing **
// ** "super" included so subclassing instance can have access to super methods **
#define BASE_CLASS \
    char *_type; \
    void *_super; \
    void (*init)(void *); \
    void (*destroy)(void *); \
    char *(*type)(void *); \
    void *(*super)(void *); \
    bool (*is_kind_of)(void *, char *); \
    bool (*is_instance_of)(void *, char *);

typedef struct base_s {
    BASE_CLASS
} base_t;

// Constructor(s)
base_t *base_();

#endif

base.c

#include "base.h"
#include <string.h>

// ** methods static to disallow linking from outside this file **
static void base_init(void *);
static void base_destroy(void *);
static char *base_type(void *);
static void *base_super(void *);
static bool base_is_kind_of(void *, char *);
static bool base_is_instance_of(void *, char *);

base_t *base_() {
    base_t *this = (base_t *)malloc(sizeof(base_t));
    base_init(this);
    return this;
}

static void base_init(void *self) {
    // ** cast back to correct type **
    base_t *this = self;

    // ** Init variables **
    this->_super = NULL;
    this->_type = "base";

    // ** init methods **
    this->init = &base_init;
    this->destroy = &base_destroy;
    this->type = &base_type;
    this->super = &base_super;
    this->is_kind_of = &base_is_kind_of;
    this->is_instance_of = &base_is_instance_of;
}

static char *base_type(void *self) {
    base_t *this = self;
    return this->_type;
}

static void *base_super(void *self) {
    base_t *this = self;
    return this->_super;
}

static void base_destroy(void *self) {
    // ** void type is fine since free only needs address to free, doesn't care about type **
    free(self);
}

static bool base_is_kind_of(void *self, char *type) {
    base_t *this = self;
    if (strcmp(this->_type, type) == 0) {
        return true;
    }
    else if (this->_super != NULL) {
        // ** Example of calling the super of the instance **
        return (((base_t *)this->_super)->is_kind_of)(this->_super, type);
    }
    return false;
}

static bool base_is_instance_of(void *self, char *type) {
    base_t *this = self;
    return (strcmp(this->_type, type)) == 0?true:false;
}

shape.h

#ifndef __SHAPE_H__
#define __SHAPE_H__

#include "base.h"

typedef struct point_s {
    double x;
    double y;
} point_t;

// ** BASE_CLASS will inherit base attributes and methods **
#define SHAPE_CLASS \
    BASE_CLASS \
    int _id; \
    int (*id)(void *); \
    double (*area)(void *); \
    double (*circumference)(void *);

typedef struct shape_s {
    SHAPE_CLASS
} shape_t;

// Constructor(s)
shape_t *shape_(int);

#endif

shape.c

#include "shape.h"

// ** dummy class to allow access to super methods **
static base_t *super = NULL;

// ** methods static to disallow linking from outside this file **
static void shape_init(void *);
static int shape_id(void *);

shape_t *shape_(int id) {
    shape_t *this = (shape_t *)malloc(sizeof(shape_t));
    shape_init(this);
    this->_id = id;
    return this;
}

static void shape_init(void *self) {
    // ** cast back to correct type **
    shape_t *this = self;

    // Call the super initialization
    if (super == NULL) {
        super = base_();
    }
    (*super->init)(this);

    // ** Init variables **
    this->_super = super;
    this->_type = "shape";
    this->_id = -1;

    // ** init methods **
    this->init = &shape_init;  // ** overload super "init" method **
    this->id = &shape_id;
    this->area = NULL;  // ** virtual **
    this->circumference = NULL;  // ** virtual **
}

static int shape_id(void *self) {
    shape_t *this = self;
    return this->_id;
}

circle.h

#ifndef __CIRCLE_H__
#define __CIRCLE_H__

#include "shape.h"

// ** Inherits from SHAPE_CLASS **
#define CIRCLE_CLASS \
    SHAPE_CLASS \
    point_t _center; \
    double _radius;

typedef struct circle_s {
    CIRCLE_CLASS
} circle_t;

// Constructor(s)
circle_t *circle_(int, point_t, double);

#endif

circle.c

#include "circle.h"
#include "math.h"

// ** dummy class to allow access to super methods **
static shape_t *super = NULL;

static void circle_init(void *);
static double circle_area(void *);
static double circle_circumference(void *);

circle_t *circle_(int id, point_t point, double radius) {
    circle_t *this = (circle_t *)malloc(sizeof(circle_t));
    circle_init(this);

    this->_id = id;
    this->_center.x = point.x;
    this->_center.y = point.y;
    this->_radius = radius;

    return this;
}

static void circle_init(void *self) {
    circle_t *this = self;

    // ** Call the super initialization **
    if (super == NULL) {
        super = shape_(-1);
    }
    (*super->init)(this);

    // Init variables
    this->_super = super;
    this->_type = "circle";
    this->_center.x = -1;
    this->_center.y = -1;
    this->_radius = -1;

    // ** overload methods **
    this->init = &circle_init;
    this->area = &circle_area;
    this->circumference = &circle_circumference;
}

static double circle_area(void *self) {
    circle_t *this = self;
    return M_PI*this->_radius*this->_radius;
}

static double circle_circumference(void *self) {
    circle_t *this = self;
    return 2*M_PI*this->_radius;
}

main.c

#include "circle.h"
#include <stdio.h>

int main() {
    circle_t *circle;
    point_t center = {1,2};

    // ...

    circle = circle_(1234, center, 2.5);
    printf("circle id: %d\n", (*circle->id)(circle));  // ** calls shape "id" method **
    printf("circle area: %f\n", (*circle->area)(circle));  // ** calls circle overloaded "area" method **
    printf("circle circumference: %f\n", (*circle->circumference)(circle));  // ** calls circle overloaded "circumference" method **

    // ...

    printf("circle type: %s\n", (*circle->type)(circle));
    printf("circle is kind of base: %d\n", (*circle->is_kind_of)(circle, "base"));  // ** true **
    printf("circle is kind of circle: %d\n",(*circle->is_kind_of)(circle, "circle"));  // ** true **
    printf("circle is instance of base: %d\n",(*circle->is_instance_of)(circle, "base"));  // ** false **
    printf("circle is instance of circle: %d\n",(*circle->is_instance_of)(circle, "circle"));  // ** true **

    // ...

    (*circle->destroy)(circle);  // ** calls base "destroy" method **
    circle = NULL;

    return 0;
}

output

circle id: 1234
circle area: 19.634954
circle circumference: 15.707963
circle type: circle
circle is kind of base: 1
circle is kind of circle: 1
circle is instance of base: 0
circle is instance of circle: 1

Using this technique will provide inheritance, polymorphism, overloading, and access to "super" methods. It does not provide encapsulation (which is not possible in C). It also provides ways to add "is_kind_of" and "is_instance_of" functionality to allow more advanced coding patterns (as shown above).

The only downsides are

Method calls are a little ugly ("(circle->area)(circle);") but this can be cleaned up using macros if desired ("#define area(a) ((a)->area)((a))" -> "area(circle);") but this may introduce naming conflicts
Each class has a static instance of it's "super" which could be viewed as a waste of memory but this can be ommited if desired.

oberstet commented 8 years ago

Woah;) Quite elaborate. I can see this is a well thought out design that tries to bring as much OO to C as possible.

Going down that road ultimately leads to stuff like gobject https://en.wikipedia.org/wiki/GObject

I think this is overkill. My biggest issues:

classic OO in as using inheritance is often wrong in the first place in my experience. Composition is much more flexible. Or generics (or general C++ templates .. but that's out of scope).
AutobahnC must be able to work without malloc at all

And I have the feeling that going C OO without malloc doesn't really make sense. But maybe that's wrong.

"no malloc" is a must if we want to go to tiny devices. Everything lives on the stack, and yes, that has deep and far reaching consequences (eg hard, precompiled limits/buffers etc).

Probably we should first discuss the "no malloc" thing ..

ericchapman commented 8 years ago

Yeah, this design can pretty much give the developer every piece of OO that they need in C. I was curious where your thoughts were on how elaborate we should get.

Actually, the best thing to do here is probably to define what "tiny" is just so we have a baseline. I have implemented an entire TDMA RF protocol on a PIC16 with 2KB ROM and 200B of RAM and I didn't use malloc there for the reason you described above. No way that was fitting in 200B of data.

What device were you thinking? I saw in something I read "64KB" but wasn't sure if that was data or program and data.

ericchapman commented 8 years ago

@oberstet Only place I see where "malloc" would be useful would be for the linked lists used to implement "arrays" and "hashes" for the user code to create "args" and "kwargs". Alternatively we can make a "pool" of links that are "reserved" and then "returned" as needed and make the size of the pool a define so the user code can increase the number as their application needs it.

ericchapman commented 8 years ago

@oberstet I thought about it more and I think we can get away with static buffers and accomplish what we need to in order to keep this as simple as possible. I created the following sample header file to give us something to throw darts at

https://github.com/ericchapman/autobahn-c/blob/dev/lib/session.h

Do you still want the entire top layer of this thing to be like "autobahn.h" or was that more of a conceptual brain dump? What would you like the top layer to be? As you can see in "session.h" I think we can make this similar to the other libraries from an API perspective which I believe is desirable.

oberstet commented 8 years ago

What device were you thinking? I saw in something I read "64KB" but wasn't sure if that was data or program and data.

Not set in stone, just a rough number. Cortex M4 devices seem to start at 48KB RAM.

oberstet commented 8 years ago

Do you still want the entire top layer of this thing to be like "autobahn.h"

This, and the code in SPEC.md where just braindumps, yes. I often like starting something new by writing different flavors of an API "on paper" first (no working code yet).

I do think it would be nice if the user facing API would just be a single header (autobahn.h).

Regarding https://github.com/ericchapman/autobahn-c/blob/dev/lib/session.h - yes, I agree. If we can make the "look & feel" similar to the other Autobahn's, users would immediately feel at home (at least, they can directly translate concepts).

I think one crucial aspect is args and kwargs eg https://github.com/ericchapman/autobahn-c/blob/dev/lib/session.h#L57

WAMP is dynamically types, and C is statically typed. How do we approach that? How does that interplay with CBOR (the types that RIOT has for that)?

ericchapman commented 8 years ago

@oberstet Quick clarification on device. Is that 48 KB of RAM for "program and data" or just data? Most embedded devices I work with are, for example 128 KB of ROM (Flash) for program and then 32 KB of RAM for data. Just an important clarification when making tradeoff decisions.

As far as the top level header, I have seen some clever "namespace" like C implementation where you can actually expose your library at the top level using a "struct". This would allow us to combine the exposed methods into one single included header file. See here http://stackoverflow.com/a/28535585/462398

The "args" and "kwargs" aspect is interesting. What I was going to do was use a "void " for the object and then include a "char type" which would allow the receiving code to do something like this

typedef struct link_s {
    struct link_t *prev;
    struct link_t *next;
    void *object;
    char *type;
} link_t;

...

link_t *link = // some link
if (strcmp(link->type, "dict") == 0) {
    dict_t *dict = link->object;
    // Do Something
}
...

And then both my "list" and "dict" implementation would use those links. Let me look at CBOR and get back to you.

The other thing that keeps popping up in my head is the "no malloc". I understand the need for that requirement but I am having trouble figuring out how a JSON payload can be parsed from raw bytes and turned into "args" and "kwargs" without malloc? One thought I had was to map into the actual raw payload but for strings, there is no "null" terminator so that wouldn't work. You have to copy it somewhere to add the null terminator. The only other option is to have a pool of buffers that you could use, but that very quickly becomes your own custom "heap" with a "malloc" implementation. I looked at "Jansson" and that is using "malloc" for the exact problem I just described. I wrote a "link.h" and "link.c" here that is doing the static pool of links but it starts to feel more and more like I am re-creating "malloc". I pretty much made my own "heap" that can be resized at compile time. https://github.com/ericchapman/autobahn-c/blob/dev/lib/link.h https://github.com/ericchapman/autobahn-c/blob/dev/lib/link.c

oberstet commented 8 years ago

@ericchapman I'll answer in multiple parts (there are some tricky questions here).

rgd memory

for example, this device is using this MCU.

    512 KB Flash and 64 KB RAM or
    256 KB Flash and 32 KB RAM with:
        64K FlexNVM (MKW21D256 only) and
        4k bytes of enhanced EEPROM/FlexRAM

I'm not sure how to interpret it. Can it run program code directly from Flash?

Here is the MCU fact sheet.

And here is the reference manual for that piece.

Memory is discussed on pp73 in the latter. But I still don't understand which memory can be used for running program code.

My assumption would be: program code does not have to be run from the 32/64kB SRAM necessarily, and if so, we could use the SRAM completely for data.

A proposal: lets make 32kB RAM data memory the minimal target for AutobahnC

oberstet commented 8 years ago

The malloc, JSON, CBOR, args and kwargs questions are deeply interconnected.

I now think this is the most tricky one: the user facing API of the core library. In particular, how to represent args/kwargs to user code. It's harder than the system facing API.

And (if you agree about hardness), then I think we should first address this question. The hardest one first. Because depending on answers, it will have ramifications onto everything else inside the core library.

but I am having trouble figuring out how a JSON payload can be parsed from raw bytes and turned into "args" and "kwargs" without malloc

WAMP is dynamically typed, and if we want to uphold that for AutobahnC (no precompiled, statically types for user args/kwargs payloads, which would be possible too in principle), then AutobahnC must be ready to receive arbitrary args/kwargs.

Eg it must be able to receive an args = [1, 2, 3, .., N] in an event handler, where N is not known at build time (if N would be fixed for an app, then we could do the precompiled statically types user payloads thing I hinted above .. but I think we should avoid that).

If the user callback for the event handler is supposed to be given the complete args array and N isn't fixed, then I can't see how to do that without a malloc like thing.

However, there is another approach: a streaming API for the args/kwargs user payloads. That is, the user event handler callback has additional user callbacks attached for reading arrays/dicts in a streaming like fashion. Similar to a SAX based XML API vs a DOM based XML API.

If we could make that fly, and more so, in a way that is "nice" for the user, then AutobahnC could in principle receive args/kwargs of arbitrary size.

I just stumbled across this: http://riot-os.org/api/group__sys__ubjson.html

Here:

oberstet@corei7ub1310:~/scm/3rdparty/RIOT$ find . -name "*.h" | grep cbor
./sys/include/cbor.h
oberstet@corei7ub1310:~/scm/3rdparty/RIOT$ find . -name "*.c" | grep cbor
./tests/unittests/tests-cbor/tests-cbor.c
./sys/cbor/cbor.c
oberstet@corei7ub1310:~/scm/3rdparty/RIOT$ find . -name "*.h" | grep json
./tests/unittests/tests-ubjson/tests-ubjson.h
./sys/include/ubjson.h
./sys/ubjson/ubjson-internal.h
oberstet@corei7ub1310:~/scm/3rdparty/RIOT$ find . -name "*.c" | grep json
./tests/unittests/tests-ubjson/tests-ubjson.c
./tests/unittests/tests-ubjson/test-ubjson-empty-object.c
./tests/unittests/tests-ubjson/test-ubjson-empty-array.c
./sys/ubjson/ubjson-write.c
./sys/ubjson/ubjson-read.c

Sidenote: adding UBJSON serialization to AutobahnPython (and hence Crossbar.io) would be trivial. Could be done in few lines of code, given a proper UBJSON Python library.

From looking at ./sys/ubjson/ubjson-read.c: this does not use malloc. It's an event driven, SAX like API.

Comparing this to http://riot-os.org/api/cbor_8h.html, I don't get how the latter does it. Eg while there is cbor_deserialize_array and cbor_deserialize_array_indefinite, the former seems to require that you know the number and types of array elements before hand (which conflicts with the dynamic typing of WAMP), and the latter I don't get how to use it. Does this RIOT CBOR implementation support dynamically types payloads anyway?

From my view, JSON wire level support isn't that attractive for AutobahnC. CBOR is better. UBJSON might be fine too. And there is no reason to support multiple serialization formats in AutobahnC.

IMO, making AutobahnC XXX-only (where XXX is either CBOR or UBJSON) would be fine.

I'm also not sure if we really can achieve completely malloc-free implementation of AutobahnC. FWIW, this is what the RIOT docs have to say about memory.

oberstet commented 8 years ago

Regarding the user API, my current view would be that we have to decide about the following:

do we want to (A) support dynamically typed WAMP user payload OR do we (B) require all messages including user payloads to be precompiled (some IDL, then generate C headers/serializer/deserializer code for everything include user payload) ?
If we do (A), then we need to answer: to we want to expose a (C) streaming, malloc-less user API for args/kwargs or (D) expose args/kwargs as complete objects (and hence must use a malloc like thing, and also have then limits for payload length)

ericchapman commented 8 years ago

@oberstet My answer in pieces

MCU Size

The MCU parameters can be interpreted as follows

512/256 KB Flash - program only. Persists through reset/power
64/32 KB RAM - program or data. Does not persist through reset/power so usually only used for data.
64K FlexNVM - application specific. Persists through reset/power
4K EEPROM - application specific. Persists through reset/power

You are interpreting it correctly and I agree with your 32 KB for data proposal. This is considered "small" in the ARM Cortex M4 domain. Let's capture this and close that part.

malloc, JSON, CBOR, args, kwargs

I agree this is the most difficult. I keep landing back on this every time i try to think about parsing the payload because as you pointed out, it bubbles up through the entire library.

Let's make some assumptions about this system and the answer will hopefully be clear assuming we both agree with the assumptions

With a 32 KB RAM limit this means our library shouldn't need more than 25% of that or 8 KB (I think we can make it smaller. Just putting some limits on it)
I think for the application we need to support any type of WAMP compliant args and kwargs and not use statically generated message structures (use option (A))

Conclusion - We need to use a SAX like parser (option (A) (C)) and place the burden on the user code to store the things that it needs where it can use malloc if it desires. I think this is the only way we are going to fit the library into the footprint and support any message type coming in. SAX style parsers are a pain BUT this seems to make more sense vs. creating a DOM like parser in such a small memory footprint when the message payload is unknown. I feel it is too limiting to try and force the user to connect to a statically specified system and make them recompile if they introduce a new message type.

I would say we use UBJSON if it is trivial router side

CBOR note

It looks like the array length is actually a return from the method. http://riot-os.org/api/cbor_8h.html#aed22e8f18d3fa7409326f2c71892054a

size_t cbor_deserialize_array   (   const cbor_stream_t *   stream,
    size_t  offset,
    size_t *    array_length 
)   

size_t offset = cbor_deserialize_array(&stream, 0, &array_length);

array_length is an output since they are passing a pointer to it.

ericchapman commented 8 years ago

After thinking about it a little more, I think I came to a conclusion that might help us close all of this. The overall goal for this library should be lightweight given it is in raw C. Any system that has enough memory to start doing DOM parsing most likely supports embedded C++ which would be much more friendly for creating true "args" and "kwargs".

I would suggest we do the following from a milestone perspective

get autobahn C working with SAX parsing and WAMP basic profile
port it into the proposed device you listed earlier
update Crossbar.io to support the connection type
add advanced profile as needed (at least WAMPCRA)

Once we complete this, I think 1 of 2 things will happen

we enhance the embedded C library to have a compile option that returns a DOM object rather than SAX for args and kwargs using malloc/free, OR
we recode in embedded C++ and offer that as a library if you are looking for DOM in an embedded environment

My gut is telling me there won't be a need for DOM in raw C given the reasoning for using raw C.

oberstet commented 8 years ago

You are interpreting it correctly and I agree with your 32 KB for data proposal. This is considered "small" in the ARM Cortex M4 domain. Let's capture this and close that part.

Agreed.

So we target devices with at least 32KB RAM for data, and we target to use only 25% = 8KB (which makes sense) of that for AutobahnC.

I would say we use UBJSON if it is trivial router side

Agreed.

I will add UBJSON support to AutobahnPython and Crossbar.io.

oberstet commented 8 years ago

Any system that has enough memory to start doing DOM parsing most likely supports embedded C++ which would be much more friendly for creating true "args" and "kwargs".

Agreed. That's also my view. The whole point of AutobahnC is to go where C++ and Linux can't. And no-malloc, no-TCP are 2 aspects in this.

My gut is telling me there won't be a need for DOM in raw C given the reasoning for using raw C.

Yep. Agreed.

I also agree to the milestone plan you've outlined. Makes sense.

I'll add the UBJSON to AB/CB, and try to come up with a "UDP fuzzing bridge" (randomly dropping/reordering UDP datagrams).

This is exciting;) Cool to see how our discussing is progressing ..

ericchapman commented 8 years ago

@oberstet Agreed on the exciting part and how the discussion is progressing :). I am always excited to get my embedded stuff out (I have a scope/logic analyzer and signal generator) and I have been wanting WAMP on those devices since I started using Crossbar a year or so ago.

I think the only thing really left other than code reviews and things that come up is the interface when making calls to the session and what that interface looks like given we are using SAX serialization/deserialization. Give me a day or two to play around but I think I can come up with something that makes sense and feels right.

oberstet commented 8 years ago

@ericchapman alright, UBJSON added to Autobahn (https://github.com/crossbario/autobahn-python/pull/639). Will do Crossbar.io later ..

oberstet commented 8 years ago

@ericchapman I think this might touch on the discussion here https://github.com/crossbario/autobahn-c/issues/8

ericchapman commented 8 years ago

@oberstet About SAX, so I realized with going with a SAX like deserializer that this also means we will most likely want a SAX like serializer. This means that "args", "kwargs", and "options" creation are going to need to be callbacks so that the user code can generate the payload. Do you agree?

oberstet commented 8 years ago

@ericchapman I have thought about that (serializer) also. A callback based serializer would be necessary - even only sending 1 string requires that, if the underlying stack has fixed, limited buffers. This, combined with the fact that the underlying network protocol (UDP, even with full 64k) is limited, will make things really complex IMO. Because if we'd allow a single WAMP message to cross multiple UDP datagrams, the full unreliable/unorder UDP aspect kick in - even for a single WAMP message. I think this is .. a research project on it's own.

Coping with retransmission/reordering of WAMP messages (but whole WAMP messages) is already stretching things. But adding frag/reas to that .. it's way too ambitious I'd say.

So right now I am leaning towards: on such WAMP transports, there is a (possibly negotiated) maximum size for WAMP messages. Eg 1000 octets.

Note that we have progressive call results at the WAMP level, and can extend that to progressive calls. So that would compensate for above limitations in a way ..

oberstet commented 8 years ago

Linking: https://github.com/RIOT-OS/RIOT/issues/5371

oberstet commented 8 years ago

@ericchapman UBJSON support has landed on AutobahnPython and Crossbar.io trunk:

HTTP/1.1 101 Switching Protocols
Server: Crossbar/0.13.2
X-Powered-By: AutobahnPython/0.13.1
Upgrade: WebSocket
Connection: Upgrade
Sec-WebSocket-Protocol: wamp.2.ubjson.batched
Sec-WebSocket-Accept: m3vvAYqSx9sxQyjf1EV+7q53r5o=

Example: https://github.com/crossbario/crossbarexamples/tree/master/serializers

crossbario / autobahn-c