xmrig / xmrig-cuda

NVIDIA CUDA plugin for XMRig miner
GNU General Public License v3.0
358 stars 155 forks source link

Error: Invalid configuration argument #129

Closed quydau35 closed 2 years ago

quydau35 commented 2 years ago

I got the Invalid configuration argument on all of my Kepler-based with compute capability 3.0 cards image nvidia-smi outputs:

+-----------------------------------------------------------------------------+
| NVIDIA-SMI 426.78       Driver Version: 426.78       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  Quadro K1100M      WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   53C    P8    N/A /  N/A |    653MiB /  2048MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|    0     10584      C   ...ypt\Monero\xmrig\xmrig-6.15.1\xmrig.exe N/A      |
+-----------------------------------------------------------------------------+

and never got this error when I run with maxwell cards (with compute capability > 5.0) like quadro K2200 etc,... I use the xmrig-6.15.1 and xmrig-cuda 6.15.1 built with cuda toolkit 10.1. Does anyone have any idea why this is happen? and how to fix it? I tried find namespace AstroBWT_Dero in src/AstroBWT/dero/AstroBWT.cu but doesn't see any abnormal configuration. Here is my configuration file with deleted wallet address:

{
    "api": {
        "id": null,
        "worker-id": null
    },
    "http": {
        "enabled": false,
        "host": "127.0.0.1",
        "port": 0,
        "access-token": null,
        "restricted": true
    },
    "autosave": true,
    "background": false,
    "colors": true,
    "title": true,
    "randomx": {
        "init": -1,
        "init-avx2": -1,
        "mode": "auto",
        "1gb-pages": false,
        "rdmsr": true,
        "wrmsr": true,
        "cache_qos": false,
        "numa": true,
        "scratchpad_prefetch_mode": 1
    },
    "cpu": {
        "enabled": true,
        "huge-pages": true,
        "huge-pages-jit": true,
        "hw-aes": null,
        "priority": 2,
        "memory-pool": false,
        "yield": false,
        "asm": true,
        "argon2-impl": null,
        "astrobwt-max-size": 550,
        "astrobwt-avx2": false,
        "argon2": [0, 2, 4, 6, 5, 7],
        "astrobwt": [0, 1, 2, 3, 4, 5, 6, 7],
        "cn": [
            [1, 0],
            [1, 2],
            [1, 4]
        ],
        "cn-heavy": [
            [1, 0],
            [1, 2]
        ],
        "cn-lite": [
            [1, 0],
            [1, 2],
            [1, 4],
            [1, 6],
            [1, 5],
            [1, 7]
        ],
        "cn-pico": [
            [2, 0],
            [2, 1],
            [2, 2],
            [2, 3],
            [2, 4],
            [2, 5],
            [2, 6],
            [2, 7]
        ],
        "cn/upx2": [
            [2, 0],
            [2, 1],
            [2, 2],
            [2, 3],
            [2, 4],
            [2, 5],
            [2, 6],
            [2, 7]
        ],
        "rx": [0, 2, 4],
        "rx/arq": [0, 1, 2, 3, 4, 5, 6, 7],
        "rx/wow": [0, 2, 4, 6, 5, 7],
        "cn-lite/0": false,
        "cn/0": false,
        "rx/keva": "rx/wow"
    },
    "opencl": {
        "enabled": false,
        "cache": true,
        "loader": null,
        "platform": "AMD",
        "adl": true
    },
    "cuda": {
        "enabled": true,
        "loader": "xmrig-cuda.dll",
        "nvml": true,
        "astrobwt": [
            {
                "index": 0,
                "threads": 32,
                "blocks": 2,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn": [
            {
                "index": 0,
                "threads": 128,
                "blocks": 6,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn-heavy": [
            {
                "index": 0,
                "threads": 64,
                "blocks": 6,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn-lite": [
            {
                "index": 0,
                "threads": 128,
                "blocks": 6,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn-pico": [
            {
                "index": 0,
                "threads": 4,
                "blocks": 16,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn/2": [
            {
                "index": 0,
                "threads": 4,
                "blocks": 16,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "cn/upx2": [
            {
                "index": 0,
                "threads": 4,
                "blocks": 16,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "kawpow": [
            {
                "index": 0,
                "threads": 256,
                "blocks": 4096,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],
        "rx": [
            {
                "index": 0,
                "threads": 32,
                "blocks": 4,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1,
                "dataset_host": true
            }
        ],
        "cn-lite/0": false,
        "cn/0": false
    },
    "log-file": null,
    "donate-level": 1,
    "donate-over-proxy": 1,
    "pools": [
        {
            "algo": null,
            "coin": "DERO",
            "url": "sg.dero.herominers.com:1117",
            "user": "my_wallet_address",
            "pass": "myzbook15",
            "rig-id": null,
            "nicehash": false,
            "keepalive": true,
            "enabled": true,
            "tls": true,
            "tls-fingerprint": null,
            "daemon": false,
            "socks5": null,
            "self-select": null,
            "submit-to-origin": false
        }
    ],
    "retries": 5,
    "retry-pause": 5,
    "print-time": 60,
    "health-print-time": 60,
    "dmi": true,
    "syslog": false,
    "tls": {
        "enabled": false,
        "protocols": null,
        "cert": null,
        "cert_key": null,
        "ciphers": null,
        "ciphersuites": null,
        "dhparam": null
    },
    "dns": {
        "ipv6": false,
        "ttl": 30
    },
    "user-agent": null,
    "verbose": 0,
    "watch": true,
    "pause-on-battery": false,
    "pause-on-active": false
}
Spudz76 commented 2 years ago

Confirmed on my K1100M

Can't seem to make it work. It didn't fail launch when using:

        "astrobwt": [
            {
                "index": 0,
                "threads": 32,
                "blocks": 3,
                "bfactor": 8,
                "bsleep": 25,
                "affinity": -1
            }
        ],

But it never got a hashrate either and more or less hung my system (Linux doesn't actually use bsleep). Had to ssh in and kill it after while.

AstroBWT already didn't compile at all with CUDA 8 or Fermi perhaps there are other issues with Kepler for some reason even though it compiles (but won't launch).

So it seems there are some combinations of thread/block that will launch but most will hit invalid config arg.

I had some "normal" Desktop Kepler 35's (GTX770) but they are not currently online or testable. It could just be the Mobile Kepler 30 is deficient somehow compared to higher 3x archs.

In general I quit mining the GPU on this Lenovo W540 laptop because the shared cooling solution is already undersized with just the CPU making heat. CPU at 90C makes the GPU 90C real fast and then everything dies (poweroff for thermal protect). So perhaps even if it worked it would just overheat (cup half full?)

Spudz76 commented 2 years ago

Okay I merged the newest src/3rdparty/cub and now it works with 32/3

See also #130 if you can build from that and test with 32/3 (or other combos) on your own K1100M to verify it at least works for you.

 * CUDA         10.2/11.2/6.15.2-dev
 * NVML         11.460.91.03/460.91.03 press e for health report
 * CUDA GPU     #0 01:00.0 Quadro K1100M 705/1400 MHz smx:2 arch:30 mem:1275/1999 MB
[2021-10-04 10:23:41.590]  net      use pool gulf.moneroocean.stream:20004 TLSv1.3 54.188.223.206
[2021-10-04 10:23:41.590]  net      new job from gulf.moneroocean.stream:20004 diff 454 algo astrobwt height 6214889
[2021-10-04 10:23:41.590]  nvidia   use profile  astrobwt  (1 thread) scratchpad 20480 KB
|  # | GPU |  BUS ID | INTENSITY | THREADS | BLOCKS | BF |  BS | MEMORY | NAME
|  0 |   0 | 01:00.0 |        96 |      32 |      3 |  8 |   0 |    892 | Quadro K1100M
[2021-10-04 10:23:41.678]  nvidia   READY threads 1/1 (87 ms)
[2021-10-04 10:23:45.914]  nvidia   accepted (1/0) diff 454 (515 ms)
[2021-10-04 10:23:50.924]  nvidia   accepted (2/0) diff 454 (516 ms)
[2021-10-04 10:23:52.152]  nvidia   accepted (3/0) diff 454 (519 ms)
[2021-10-04 10:23:55.917]  nvidia   accepted (4/0) diff 454 (516 ms)
[2021-10-04 10:23:58.392]  nvidia   accepted (5/0) diff 454 (517 ms)
[2021-10-04 10:24:01.068]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214889
[2021-10-04 10:24:05.744]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214890
[2021-10-04 10:24:09.978]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214891
[2021-10-04 10:24:17.153]  nvidia   accepted (6/0) diff 3510 (516 ms)
[2021-10-04 10:24:45.826]  nvidia   #0 01:00.0   0W 63C 705/1400 MHz
[2021-10-04 10:24:45.826]  miner    speed 10s/60s/15m 78.83 n/a n/a H/s max 84.27 H/s
[2021-10-04 10:24:58.264]  nvidia   #0 01:00.0   0W 63C 705/1400 MHz
|   CUDA # | AFFINITY | 10s  H/s | 60s  H/s | 15m  H/s |
|        0 |       -1 |    84.82 |    80.40 |      n/a | #0 01:00.0 Quadro K1100M
|        - |        - |    78.62 |    79.36 |      n/a |
[2021-10-04 10:25:00.141]  miner    speed 10s/60s/15m 78.62 79.36 n/a H/s max 84.27 H/s
[2021-10-04 10:25:07.349]  signal   Ctrl+C received, exiting
[2021-10-04 10:25:07.637]  nvidia   stopped (287 ms)
Spudz76 commented 2 years ago

Also my nvidia-smi for reference:

+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Quadro K1100M       Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   57C    P0    N/A /  N/A |    777MiB /  1999MiB |     16%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

Used toolkit 10.2 against somewhat recent driver on Linux. Did not test toolkit 10.1 or similar revision driver as you are running on Windows, but expect it to still work.

quydau35 commented 2 years ago

Hi, I try build #130 and got few error due to the lack of agent/agent_radix_sort_histogram.cuh. This is simply solved by provide the original cub library and correct the path and recompile it.

And run with 32 threads/3blocks solves all the issue! Thanks for your helps! I built with cuda 10.1, test with K1100M and also K600 and it runs like charm!

You're correct that running on mobile GPU is not a good idea and its temperature raise up to its critical temp really fast.

For reference, my K600 got around 50H/s and my K1100 got around 78H/s.

Looking forward for this pull request to be merged into master and everybody could enjoy mining with old Kepler cards.

Okay I merged the newest src/3rdparty/cub and now it works with 32/3

See also #130 if you can build from that and test with 32/3 (or other combos) on your own K1100M to verify it at least works for you.

 * CUDA         10.2/11.2/6.15.2-dev
 * NVML         11.460.91.03/460.91.03 press e for health report
 * CUDA GPU     #0 01:00.0 Quadro K1100M 705/1400 MHz smx:2 arch:30 mem:1275/1999 MB
[2021-10-04 10:23:41.590]  net      use pool gulf.moneroocean.stream:20004 TLSv1.3 54.188.223.206
[2021-10-04 10:23:41.590]  net      new job from gulf.moneroocean.stream:20004 diff 454 algo astrobwt height 6214889
[2021-10-04 10:23:41.590]  nvidia   use profile  astrobwt  (1 thread) scratchpad 20480 KB
|  # | GPU |  BUS ID | INTENSITY | THREADS | BLOCKS | BF |  BS | MEMORY | NAME
|  0 |   0 | 01:00.0 |        96 |      32 |      3 |  8 |   0 |    892 | Quadro K1100M
[2021-10-04 10:23:41.678]  nvidia   READY threads 1/1 (87 ms)
[2021-10-04 10:23:45.914]  nvidia   accepted (1/0) diff 454 (515 ms)
[2021-10-04 10:23:50.924]  nvidia   accepted (2/0) diff 454 (516 ms)
[2021-10-04 10:23:52.152]  nvidia   accepted (3/0) diff 454 (519 ms)
[2021-10-04 10:23:55.917]  nvidia   accepted (4/0) diff 454 (516 ms)
[2021-10-04 10:23:58.392]  nvidia   accepted (5/0) diff 454 (517 ms)
[2021-10-04 10:24:01.068]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214889
[2021-10-04 10:24:05.744]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214890
[2021-10-04 10:24:09.978]  net      new job from gulf.moneroocean.stream:20004 diff 3510 algo astrobwt height 6214891
[2021-10-04 10:24:17.153]  nvidia   accepted (6/0) diff 3510 (516 ms)
[2021-10-04 10:24:45.826]  nvidia   #0 01:00.0   0W 63C 705/1400 MHz
[2021-10-04 10:24:45.826]  miner    speed 10s/60s/15m 78.83 n/a n/a H/s max 84.27 H/s
[2021-10-04 10:24:58.264]  nvidia   #0 01:00.0   0W 63C 705/1400 MHz
|   CUDA # | AFFINITY | 10s  H/s | 60s  H/s | 15m  H/s |
|        0 |       -1 |    84.82 |    80.40 |      n/a | #0 01:00.0 Quadro K1100M
|        - |        - |    78.62 |    79.36 |      n/a |
[2021-10-04 10:25:00.141]  miner    speed 10s/60s/15m 78.62 79.36 n/a H/s max 84.27 H/s
[2021-10-04 10:25:07.349]  signal   Ctrl+C received, exiting
[2021-10-04 10:25:07.637]  nvidia   stopped (287 ms)
Spudz76 commented 2 years ago

Fixed the missing files, forgot to add previously nonexistent ones to the commit.