WeaselGames / godot_luaAPI

Godot LuaAPI
https://luaapi.weaselgames.info
Other
347 stars 27 forks source link

Running Lua with sandboxed resources #190

Closed jbromberg closed 6 months ago

jbromberg commented 6 months ago

I'm looking to add user scripting with Lua to my project. Since users can write scripts, they can't be trusted and scripts can potentially be malicious. Is there any way to run the Lua scripts in separate threads with sandboxed resources so that if a user writes an infinite loop or is using too many resources we could simply kill that Lua script process and prevent the main Godot thread from being blocked?

I've experimented with this a bit using Godot threads but I'm still having trouble since I ultimately need to be able to sync data between Godot and Lua (imagine a user script in Lua that is updating the position of a node - this has to be reconciled and updated on the main thread).

Trey2k commented 6 months ago

Preventing infinite loops is a little bit more tricky. While running it on a separate thread would work, as you mentioned its not always ideal. This can be done synchronously using a LuaHook, Documentation for how to use them with this addon can be found here.

The basic idea is you can have a hook execute under different scenarios. IE Every X line. Here is an example of forceing a co routine to yield every frame

extends Node2D

var lua: LuaAPI
var coroutine: LuaCoroutine

var checkups: int = 0

func _checkup_hook(lua: LuaAPI, event: int, line: int):
    checkups += 1
    if checkups >= 15:
        print("Thread has been running for too long. Forceing yield", event)
        # force a 1 secound yield.
        var coroutine: LuaCoroutine = lua.get_running_coroutine()
        coroutine.yield_state([1])
        # force exit with an error
        # return LuaError.new_error("Frame execution time exceeded")

func _ready():
    lua = LuaAPI.new()
    coroutine = lua.new_coroutine()
    # Set the hook to trigger every 15 lines.
    coroutine.set_hook(_checkup_hook, lua.HOOK_MASK_LINE, 15)
    coroutine.load_string("""
    while true do
        -- Being a bad resource and never yielding
        print("Looping")
    end
    """)

var yieldTime = 0
var timeSince = 0
func _process(delta):
    timeSince += delta
    # If the coroutine has finished executing or if not enough time has passed, do not resume the coroutine.
    if coroutine.is_done() || timeSince <= yieldTime:
        return

    checkups = 0
    var ret = coroutine.resume([])
    if ret is LuaError:
        print("ERROR %d: " % ret.type + ret.message)
        return
    if coroutine.is_done():
        return

    print("Yielding for %f"% ret[0])
    yieldTime = ret[0]
    timeSince = 0
Trey2k commented 6 months ago

Keep in mind there is a major performance drawback when using hooks like this

jbromberg commented 6 months ago

Got it. This definitely seems like the simplest approach for now.

Your snippet works as expected but when I test with a more complicated one that uses .call_function() I get an error: attempt to yield across a C-call boundary

var lua: LuaAPI
var coroutine: LuaCoroutine

var checkups: int = 0

func _checkup_hook(lua: LuaAPI, event: int, line: int):
    checkups += 1
    if checkups >= 15:
        print("Thread has been running for too long. Forceing yield", event)
        # force a 1 secound yield.
        var coroutine: LuaCoroutine = lua.get_running_coroutine()
        coroutine.yield_state([1])
        # force exit with an error
        # return LuaError.new_error("Frame execution time exceeded")

func _ready():
    lua = LuaAPI.new()
    coroutine = lua.new_coroutine()
    # Set the hook to trigger every 15 lines.
    coroutine.set_hook(_checkup_hook, lua.HOOK_MASK_LINE, 15)
    coroutine.load_string("""
    function start()
        while true do
            -- Being a bad resource and never yielding
            print("Looping")
        end
    end
    """)

    coroutine.resume([])

    if coroutine.function_exists("start"):
        var ret = coroutine.call_function("start", []) # Error here

        if ret is LuaError:
            print(ret.message)

var yieldTime = 0
var timeSince = 0
func _process(delta):
    timeSince += delta
    # If the coroutine has finished executing or if not enough time has passed, do not resume the coroutine.
    if coroutine.is_done() || timeSince <= yieldTime:
        return

    checkups = 0
    var ret = coroutine.resume([])
    coroutine.call_function("start", [])
    print("resume")
    if ret is LuaError:
        print("ERROR %d: " % ret.type + ret.message)
        return
    if coroutine.is_done():
        return

    print("Yielding for %f"% ret[0])
    yieldTime = ret[0]
    timeSince = 0

Also, is there a way you recommend that I can measure the performance impact of using hooks?

Trey2k commented 6 months ago

Got it. This definitely seems like the simplest approach for now.

Your snippet works as expected but when I test with a more complicated one that uses .call_function() I get an error: attempt to yield across a C-call boundary

The issue here is when you call lua functions the hook will still trigger, however the coroutine is not executing so the functions are effectively running in the parent state, not within the coroutine. A simple work around is to have a toggle like this

extends Node2D

var lua: LuaAPI
var coroutine: LuaCoroutine

var checkups: int = 0
var exception: bool = false

func _checkup_hook(lua: LuaAPI, event: int, line: int):
    if exception:
        return

    checkups += 1
    if checkups >= 15:
        print("Thread has been running for too long. Forceing yield", event)
        # force a 1 secound yield.
        var coroutine: LuaCoroutine = lua.get_running_coroutine()
        coroutine.yield_state([1])
        # force exit with an error
        # return LuaError.new_error("Frame execution time exceeded")

func _ready():
    lua = LuaAPI.new()
    coroutine = lua.new_coroutine()
    # Set the hook to trigger every 15 lines.
    coroutine.set_hook(_checkup_hook, lua.HOOK_MASK_LINE, 15)
    coroutine.load_string("""
    function start()
        while true do
            -- Being a bad resource and never yielding
            print("Looping")
        end
    end
    """)

    coroutine.resume([])
    exception = true
    if coroutine.function_exists("start"):
        var ret = coroutine.call_function("start", []) # Error here

        if ret is LuaError:
            print(ret.message)
    exception = false

var yieldTime = 0
var timeSince = 0
func _process(delta):
    timeSince += delta
    # If the coroutine has finished executing or if not enough time has passed, do not resume the coroutine.
    if coroutine.is_done() || timeSince <= yieldTime:
        return

    checkups = 0
    var ret = coroutine.resume([])
    coroutine.call_function("start", [])
    print("resume")
    if ret is LuaError:
        print("ERROR %d: " % ret.type + ret.message)
        return
    if coroutine.is_done():
        return

    print("Yielding for %f"% ret[0])
    yieldTime = ret[0]
    timeSince = 0

Also, is there a way you recommend that I can measure the performance impact of using hooks?

I would just start working on what you need, then run with and without the hook set and measure the difference

Trey2k commented 6 months ago

With the excretion like above the function call its self could still run a infinite loop, so a better approach would be to have a toggle and error out the parent state instead when you are invoking a method.

Trey2k commented 6 months ago

For some clarification as its a bit confusing, the coroutine.call/push/pull methods just alias the methods on the base lua state. Coroutines do not have unique memory, they all share memory with their parent state and these methods just access the memory directly. The coroutine versions of those methods really only exist for convince.

jbromberg commented 6 months ago

With the excretion like above the function call its self could still run a infinite loop, so a better approach would be to have a toggle and error out the parent state instead when you are invoking a method.

Would you be able to provide an example of what you mean here?

For some clarification as its a bit confusing, the coroutine.call/push/pull methods just alias the methods on the base lua state. Coroutines do not have unique memory, they all share memory with their parent state and these methods just access the memory directly. The coroutine versions of those methods really only exist for convince.

Got it. So it's just running them based on the last coroutine to have .resume() called?

Trey2k commented 6 months ago

Would you be able to provide an example of what you mean here?

extends Node2D

var lua: LuaAPI
var coroutine: LuaCoroutine

var checkups: int = 0
var is_coroutine: bool = false

func _checkup_hook(lua: LuaAPI, event: int, line: int): 
    checkups += 1
    if checkups >= 15:
        print("Thread has been running for too long. Forceing yield")
        # force a 1 secound yield.
        if is_coroutine:
            var coroutine: LuaCoroutine = lua.get_running_coroutine()
            coroutine.yield_state([1])
        else:
        # force exit with an error
            return LuaError.new_error("Frame execution time exceeded")

func _ready():
    lua = LuaAPI.new()
    coroutine = lua.new_coroutine()
    # Set the hook to trigger every 15 lines.
    coroutine.set_hook(_checkup_hook, lua.HOOK_MASK_LINE, 15)
    coroutine.load_string("""
    function start()
        while true do
            -- Being a bad resource and never yielding
            print("Looping")
        end
    end
    """)
    is_coroutine = true
    coroutine.resume([])
    is_coroutine = false

    if coroutine.function_exists("start"):
        var ret = coroutine.call_function("start", []) # Error here

        if ret is LuaError:
            print(ret.message)

var yieldTime = 0
var timeSince = 0
func _process(delta):
    timeSince += delta
    # If the coroutine has finished executing or if not enough time has passed, do not resume the coroutine.
    if coroutine.is_done() || timeSince <= yieldTime:
        return

    checkups = 0
    var ret = coroutine.resume([])
    coroutine.call_function("start", [])
    print("resume")
    if ret is LuaError:
        print("ERROR %d: " % ret.type + ret.message)
        return
    if coroutine.is_done():
        return

    print("Yielding for %f"% ret[0])
    yieldTime = ret[0]
    timeSince = 0

Got it. So it's just running them based on the last coroutine to have .resume() called?

No, when you make a coroutine you use LuaAPI.new_coroutine, the LuaAPI object its self is the only actual lua state with memory. The coroutines are like childeren threads of the state. Even though when you call courtine.resume its a different process its sharing the same memory as all other coroutines made from the LuaAPI object, as well as the object its self.

There is no way to invoke a method within a coroutine really from the C api side. So they are actually running in the base LuaAPI object.

jbromberg commented 6 months ago

Thanks for your help with this. I've managed to get it working nicely for my use case. Sharing my implementation below in case anyone stumbles upon this in the future.

ManagedCoroutine class

class_name ManagedCoroutine
extends LuaCoroutine

var _check_count: int = 0
var _check_limit: int

func _init(lua: LuaAPI, call_limit: int, checks: int = 1):
    _check_limit = checks
    bind(lua)
    set_hook(_hook, lua.HOOK_MASK_COUNT, call_limit / checks)

func _hook(lua: LuaAPI, event: int, line: int):
    _check_count += 1

    if _check_count >= _check_limit:
        _check_count = 0
        return LuaError.new_error("Reached execution limit")

# Allows a continuous call to happen without reaching the execution limit (like the update function). This only works if the _check_limit and call_limit are properly calibrated.
func managed_call(name: String, args: Array) -> Variant:
    _check_count = 0
    return call_function(name, args)

Usage

var lua_script: String = "-- Runs on start
function start()
    -- Your code here
end

-- Runs every frame. Delta is the amount of time passed since the last frame in seconds.
function update(delta)
    -- Your code here
end"

var lua: LuaAPI
var update_coroutine: ManagedCoroutine

var _has_start := false
var _has_update := false

func _ready():
    lua = LuaAPI.new()

    var start_coroutine := ManagedCoroutine.new(lua, 10000)
    start_coroutine.load_string(lua_script)
    start_coroutine.resume([])
    _has_start = start_coroutine.function_exists("start")
    _has_update = start_coroutine.function_exists("update")

    if _has_start:
        var start_return = start_coroutine.call_function("start", [])

        if start_return is LuaError:
            print(start_return.message)

func _process(delta):
    if !lua: return

    if !update_coroutine:
        # By creating this with a check limit >1 it allows us to keep calling this continuously without reaching our execution limit
        update_coroutine = ManagedCoroutine.new(lua, 10000, 2)
        update_coroutine.resume([])

    if _has_update:
        var update_return = update_coroutine.managed_call("update", [delta])

        if update_return is LuaError:
            print(update_return.message)