sibson / vncdotool

A command line VNC client and python library
Other
451 stars 120 forks source link

Capture Command Hangs #267

Open squat opened 1 year ago

squat commented 1 year ago

Please include the following information:

vncdotool version 1.2.0

VNC server and version WayVNC 0.6.2

Steps to reproduce:

vncdotool -i -v -v -v -s 192.168.0.1::5900 capture out.png

Output:

INFO:root:connecting to 192.168.0.1:5900
INFO:twisted:Starting factory <vncdotool.command.VNCDoCLIFactory object at 0x7fb290c80750>
INFO:twisted:Using protocol version 3.8
INFO:twisted:Offered <AuthTypes.NONE: 1>
INFO:twisted:Native PixelFormat(bpp=32, depth=24, bigendian=False, truecolor=True, redmax=255, greenmax=255, bluemax=255, redshift=16, greenshift=8, blueshift=0) bytes=4
INFO:twisted:Offering <Encoding.RAW: 0>
INFO:twisted:Offering <Encoding.PSEUDO_DESKTOP_SIZE: -223>
INFO:twisted:Offering <Encoding.PSEUDO_LAST_RECT: -224>
INFO:twisted:Offering <Encoding.PSEUDO_QEMU_EXTENDED_KEY_EVENT: -258>
INFO:root:connected to b'WayVNC'
DEBUG:vncdotool.client:captureScreen out.png
INFO:twisted:x=0 y=0 w=0 h=0 <Encoding.PSEUDO_QEMU_EXTENDED_KEY_EVENT: -258>

Expected result I expected the command to complete and produce a file.

Which erroneous result did you get instead The command never completes. I have to interrupt it.

Additional information Other vncdotool commands do work just fine, such as type.

pmhahn commented 1 year ago
josch commented 4 months ago

Same issue with vncdotool 1.2.0 and wayvnc 0.8.0. Any updates?

josch commented 4 months ago

Since vncdotool is not working for me, I re-implemented the needed functionality in a short Python script. Maybe it helps others:

#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-3.0
#
# Copyright 2024 Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
#
# inspired by https://github.com/barneygale/pytest-vnc
# Copyright 2022 - 2023, Barney Gale <barney.gale@gmail.com>

import socket
from collections import namedtuple
from dataclasses import field
from PIL import Image
import numpy
import time
import sys
from keysymdef import keysymdef

Point = namedtuple("Point", "x y")
Rect = namedtuple("Rect", "x y width height")

pixel_formats = {
    "rgba": b"\x20\x18\x00\x01\x00\xff\x00\xff\x00\xff\x00\x08\x10\x00\x00\x00",
}

encodings = {
    6,  # zlib
}

key_codes = {}
key_codes.update((name, code) for name, code, char in keysymdef)
key_codes.update((chr(char), code) for name, code, char in keysymdef if char)

def write_mouse(sock, buttons, position):
    sock.sendall(
        b"\x05"
        + buttons.to_bytes(1, "big")
        + position.x.to_bytes(2, "big")
        + position.y.to_bytes(2, "big")
    )
    time.sleep(0.05)

def slice_rect(rect, *channels):
    return (
        slice(rect.y, rect.y + rect.height),
        slice(rect.x, rect.x + rect.width),
    ) + channels

def capture(sock, rect: Rect):
    sock.sendall(
        b"\x03\x00"
        + rect.x.to_bytes(2, "big")
        + rect.y.to_bytes(2, "big")
        + rect.width.to_bytes(2, "big")
        + rect.height.to_bytes(2, "big")
    )
    pixels = numpy.zeros((rect.height, rect.width, 4), "B")
    while True:
        update_type = read_int(sock, 1)
        assert update_type == 0  # video
        read(sock, 1)  # padding
        for _ in range(read_int(sock, 2)):
            area_rect = Rect(
                read_int(sock, 2),
                read_int(sock, 2),
                read_int(sock, 2),
                read_int(sock, 2),
            )
            area_encoding = read_int(sock, 4)
            assert area_encoding == 0  # raw not compressed
            area = read(sock, area_rect.height * area_rect.width * 4)
            area = numpy.ndarray((area_rect.height, area_rect.width, 4), "B", area)
            pixels[slice_rect(area_rect)] = area
            pixels[slice_rect(area_rect, 3)] = 255
        if pixels[slice_rect(rect, 3)].all():
            return pixels[slice_rect(rect)]

def read(sock: socket, length: int) -> bytes:
    data = b""
    while len(data) < length:
        data += sock.recv(length - len(data))
    return data

def read_int(sock: socket, length: int) -> int:
    return int.from_bytes(read(sock, length), "big")

def move(sock, point):
    write_mouse(sock, 0, point)
    time.sleep(1)

def click(sock, point):
    write_mouse(sock, 1, point)
    time.sleep(1)
    write_mouse(sock, 0, point)
    time.sleep(1)

def keypress(sock, key):
    data = key_codes[key].to_bytes(4, "big")
    sock.sendall(b"\x04\x01\x00\x00" + data)
    time.sleep(0.1)
    sock.sendall(b"\x04\x00\x00\x00" + data)
    time.sleep(1)

def screenshot(sock, rect, filename):
    pixels = capture(sock, rect)
    image = Image.fromarray(pixels)
    image.save(filename)

class RecvNullExc(Exception):
    pass

def connect():
    for i in range(100):
        try:
            sock = socket.create_connection(("127.0.0.1", 5910), timeout=5)
            print("connected", file=sys.stderr)
            intro = sock.recv(12)
            if len(intro) != 12:
                raise RecvNullExc
            print("received intro", file=sys.stderr)
        except ConnectionRefusedError:
            print("trying to connect...", file=sys.stderr)
            time.sleep(2)
            pass
        except RecvNullExc:
            print("cannot read, retrying...", file=sys.stderr)
            time.sleep(2)
            pass
        else:
            print("leaving loop", file=sys.stderr)
            break
    assert intro[:4] == b"RFB "
    sock.sendall(b"RFB 003.008\n")

    num_auth_types = read_int(sock, 1)
    assert num_auth_types == 1, num_auth_types
    auth_type = int.from_bytes(read(sock, num_auth_types))
    assert auth_type == 1, auth_type

    sock.sendall(b"\x01")

    auth_result = read_int(sock, 4)
    assert auth_result == 0

    sock.sendall(b"\x01")
    width, height = read_int(sock, 2), read_int(sock, 2)
    assert (width, height) == (1920, 1080)
    rect = Rect(0, 0, width, height)
    read(sock, 16)
    read(sock, read_int(sock, 4))
    sock.sendall(
        b"\x00\x00\x00\x00"
        + pixel_formats["rgba"]
        + b"\x02\x00"
        + len(encodings).to_bytes(2, "big")
        + b"".join(encoding.to_bytes(4, "big") for encoding in encodings)
    )

    return sock, rect

def main():
    sock = None
    rect = None
    mousepos = Point(0, 0)
    for line in sys.stdin:
        if line.startswith("#"):
            continue
        line = line.strip()
        tokens = line.split()
        if len(tokens) < 1:
            raise Exception("no tokens")
        match tokens[0]:
            case "connect":
                sock, rect = connect()
            case "sleep":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                time.sleep(float(tokens[1]))
            case "capture":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                if sock is None or rect is None:
                    raise Exception("must connect first")
                screenshot(sock, rect, tokens[1])
            case "key":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                keypress(sock, tokens[1])
            case "mousemove":
                if len(tokens) != 3:
                    raise Exception(f"missing argument {line}")
                mousepos = Point(int(tokens[1]), int(tokens[2]))
                move(sock, mousepos)
            case "click":
                click(sock, mousepos)
            case _:
                raise Exception(f"unknown command: {tokens[0]}")

    if sock is not None:
        sock.close()

if __name__ == "__main__":
    main()

It can be used like this:

cat << END | python3 vnc.py
connect
sleep 20
mousemove 1050 625
capture test.png
click
key p
sleep 1
key Tab

The keysymdef module is from here: https://github.com/barneygale/keysymdef/blob/main/keysymdef.py

Have fun! :) END

dlinh2011 commented 4 months ago

Hello, I encounter the same problem. I'm using Windows 10, vncdotool 1.2.0, VNC server of MFP Sharp. I really need the capture screen function to do GUI test, can someone give me a hint of what's happening ? Thank you

> vncdotool -i -v -v -v -s 10.0.0.43::5900 capture out.png
INFO:root:connecting to 10.0.0.43:5900
INFO:twisted:Starting factory <vncdotool.command.VNCDoCLIFactory object at 0x000001E745720C80>
INFO:twisted:Using protocol version 3.8
INFO:twisted:Offered <AuthTypes.VNC_AUTHENTICATION: 2>
VNC password:
INFO:twisted:Native PixelFormat(bpp=32, depth=24, bigendian=False, truecolor=True, redmax=255, greenmax=255, bluemax=255, redshift=16, greenshift=8, blueshift=0) bytes=4
INFO:twisted:Offering <Encoding.RAW: 0>
INFO:twisted:Offering <Encoding.PSEUDO_DESKTOP_SIZE: -223>
INFO:twisted:Offering <Encoding.PSEUDO_LAST_RECT: -224>
INFO:twisted:Offering <Encoding.PSEUDO_QEMU_EXTENDED_KEY_EVENT: -258>
INFO:root:connected to b'VNC'
DEBUG:vncdotool.client:captureScreen out.png
INFO:twisted:x=12 y=14 w=1024 h=600 <Encoding.RAW: 0>
gailrosen commented 4 months ago

great solution -- how to send username and password over the socket?

Since vncdotool is not working for me, I re-implemented the needed functionality in a short Python script. Maybe it helps others:

#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-3.0
#
# Copyright 2024 Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
#
# inspired by https://github.com/barneygale/pytest-vnc
# Copyright 2022 - 2023, Barney Gale <barney.gale@gmail.com>

import socket
from collections import namedtuple
from dataclasses import field
from PIL import Image
import numpy
import time
import sys
from keysymdef import keysymdef

Point = namedtuple("Point", "x y")
Rect = namedtuple("Rect", "x y width height")

pixel_formats = {
    "rgba": b"\x20\x18\x00\x01\x00\xff\x00\xff\x00\xff\x00\x08\x10\x00\x00\x00",
}

encodings = {
    6,  # zlib
}

key_codes = {}
key_codes.update((name, code) for name, code, char in keysymdef)
key_codes.update((chr(char), code) for name, code, char in keysymdef if char)

def write_mouse(sock, buttons, position):
    sock.sendall(
        b"\x05"
        + buttons.to_bytes(1, "big")
        + position.x.to_bytes(2, "big")
        + position.y.to_bytes(2, "big")
    )
    time.sleep(0.05)

def slice_rect(rect, *channels):
    return (
        slice(rect.y, rect.y + rect.height),
        slice(rect.x, rect.x + rect.width),
    ) + channels

def capture(sock, rect: Rect):
    sock.sendall(
        b"\x03\x00"
        + rect.x.to_bytes(2, "big")
        + rect.y.to_bytes(2, "big")
        + rect.width.to_bytes(2, "big")
        + rect.height.to_bytes(2, "big")
    )
    pixels = numpy.zeros((rect.height, rect.width, 4), "B")
    while True:
        update_type = read_int(sock, 1)
        assert update_type == 0  # video
        read(sock, 1)  # padding
        for _ in range(read_int(sock, 2)):
            area_rect = Rect(
                read_int(sock, 2),
                read_int(sock, 2),
                read_int(sock, 2),
                read_int(sock, 2),
            )
            area_encoding = read_int(sock, 4)
            assert area_encoding == 0  # raw not compressed
            area = read(sock, area_rect.height * area_rect.width * 4)
            area = numpy.ndarray((area_rect.height, area_rect.width, 4), "B", area)
            pixels[slice_rect(area_rect)] = area
            pixels[slice_rect(area_rect, 3)] = 255
        if pixels[slice_rect(rect, 3)].all():
            return pixels[slice_rect(rect)]

def read(sock: socket, length: int) -> bytes:
    data = b""
    while len(data) < length:
        data += sock.recv(length - len(data))
    return data

def read_int(sock: socket, length: int) -> int:
    return int.from_bytes(read(sock, length), "big")

def move(sock, point):
    write_mouse(sock, 0, point)
    time.sleep(1)

def click(sock, point):
    write_mouse(sock, 1, point)
    time.sleep(1)
    write_mouse(sock, 0, point)
    time.sleep(1)

def keypress(sock, key):
    data = key_codes[key].to_bytes(4, "big")
    sock.sendall(b"\x04\x01\x00\x00" + data)
    time.sleep(0.1)
    sock.sendall(b"\x04\x00\x00\x00" + data)
    time.sleep(1)

def screenshot(sock, rect, filename):
    pixels = capture(sock, rect)
    image = Image.fromarray(pixels)
    image.save(filename)

class RecvNullExc(Exception):
    pass

def connect():
    for i in range(100):
        try:
            sock = socket.create_connection(("127.0.0.1", 5910), timeout=5)
            print("connected", file=sys.stderr)
            intro = sock.recv(12)
            if len(intro) != 12:
                raise RecvNullExc
            print("received intro", file=sys.stderr)
        except ConnectionRefusedError:
            print("trying to connect...", file=sys.stderr)
            time.sleep(2)
            pass
        except RecvNullExc:
            print("cannot read, retrying...", file=sys.stderr)
            time.sleep(2)
            pass
        else:
            print("leaving loop", file=sys.stderr)
            break
    assert intro[:4] == b"RFB "
    sock.sendall(b"RFB 003.008\n")

    num_auth_types = read_int(sock, 1)
    assert num_auth_types == 1, num_auth_types
    auth_type = int.from_bytes(read(sock, num_auth_types))
    assert auth_type == 1, auth_type

    sock.sendall(b"\x01")

    auth_result = read_int(sock, 4)
    assert auth_result == 0

    sock.sendall(b"\x01")
    width, height = read_int(sock, 2), read_int(sock, 2)
    assert (width, height) == (1920, 1080)
    rect = Rect(0, 0, width, height)
    read(sock, 16)
    read(sock, read_int(sock, 4))
    sock.sendall(
        b"\x00\x00\x00\x00"
        + pixel_formats["rgba"]
        + b"\x02\x00"
        + len(encodings).to_bytes(2, "big")
        + b"".join(encoding.to_bytes(4, "big") for encoding in encodings)
    )

    return sock, rect

def main():
    sock = None
    rect = None
    mousepos = Point(0, 0)
    for line in sys.stdin:
        if line.startswith("#"):
            continue
        line = line.strip()
        tokens = line.split()
        if len(tokens) < 1:
            raise Exception("no tokens")
        match tokens[0]:
            case "connect":
                sock, rect = connect()
            case "sleep":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                time.sleep(float(tokens[1]))
            case "capture":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                if sock is None or rect is None:
                    raise Exception("must connect first")
                screenshot(sock, rect, tokens[1])
            case "key":
                if len(tokens) != 2:
                    raise Exception(f"missing argument {line}")
                keypress(sock, tokens[1])
            case "mousemove":
                if len(tokens) != 3:
                    raise Exception(f"missing argument {line}")
                mousepos = Point(int(tokens[1]), int(tokens[2]))
                move(sock, mousepos)
            case "click":
                click(sock, mousepos)
            case _:
                raise Exception(f"unknown command: {tokens[0]}")

    if sock is not None:
        sock.close()

if __name__ == "__main__":
    main()

It can be used like this:

cat << END | python3 vnc.py
connect
sleep 20
mousemove 1050 625
capture test.png
click
key p
sleep 1
key Tab

The keysymdef module is from here: https://github.com/barneygale/keysymdef/blob/main/keysymdef.py

Have fun! :) END

dlinh2011 commented 4 months ago

@gailrosen The script is inspired by https://github.com/barneygale/pytest-vnc so you can find missing implementations there !