mbevand / silentarmy

Zcash miner optimized for AMD & Nvidia GPUs
341 stars 188 forks source link

Android running, how to reduce the use of gpu. #112

Open kuangzhongwen opened 6 years ago

kuangzhongwen commented 6 years ago

I have been ported to android to run, an average of 1-4 sols/s. But now there is a problem, mobile devices on the use of gpu up to 99%, running mining, other features of the phone can not be used. Is there any good optimization scheme? I tuned param.h and recreated the _kernel.h file, but the effect is not good. Ask God for help.

define PARAM_N 200

define PARAM_K 9

define PREFIX (PARAM_N / (PARAM_K + 1))

define NR_INPUTS (1 << PREFIX)

// Approximate log base 2 of number of elements in hash tables

define APX_NR_ELMS_LOG (PREFIX + 1)

// Number of rows and slots is affected by this; 20 offers the best performance

define NR_ROWS_LOG 18

// Setting this to 1 might make SILENTARMY faster, see TROUBLESHOOTING.md

define OPTIM_SIMPLIFY_ROUND 1

// Number of collision items to track, per thread

define COLL_DATA_SIZE_PER_TH (NR_SLOTS * 5)

// Ratio of time of sleeping before rechecking if task is done (0-1)

define SLEEP_RECHECK_RATIO 0.60

// Ratio of time to busy wait for the solution (0-1) // The higher value the higher CPU usage with Nvidia

define SLEEP_SKIP_RATIO 0.005

// Make hash tables OVERHEAD times larger than necessary to store the average // number of elements per row. The ideal value is as small as possible to // reduce memory usage, but not too small or else elements are dropped from the // hash tables. // // The actual number of elements per row is closer to the theoretical average // (less variance) when NR_ROWS_LOG is small. So accordingly OVERHEAD can be // smaller. // // Even (as opposed to odd) values of OVERHEAD sometimes significantly decrease // performance as they cause VRAM channel conflicts.

if NR_ROWS_LOG == 16

error "NR_ROWS_LOG = 16 is currently broken - do not use"

define OVERHEAD 3

elif NR_ROWS_LOG == 18

define OVERHEAD 3

elif NR_ROWS_LOG == 19

define OVERHEAD 5

elif NR_ROWS_LOG == 20 && OPTIM_SIMPLIFY_ROUND

define OVERHEAD 6

elif NR_ROWS_LOG == 20

define OVERHEAD 9

endif

define NR_ROWS (1 << NR_ROWS_LOG)

define NR_SLOTS ((1 << (APX_NR_ELMS_LOG - NR_ROWS_LOG)) * OVERHEAD)

// Length of 1 element (slot) in bytes

define SLOT_LEN 32

// Total size of hash table

define HT_SIZE (NR_ROWS NR_SLOTS SLOT_LEN)

// Length of Zcash block header, nonce (part of header)

define ZCASH_BLOCK_HEADER_LEN 140

// Offset of nTime in header

define ZCASH_BLOCK_OFFSET_NTIME (4 + 3 * 32)

// Length of nonce

define ZCASH_NONCE_LEN 32

// Length of encoded representation of solution size

define ZCASH_SOLSIZE_LEN 3

// Solution size (1344 = 0x540) represented as a compact integer, in hex

define ZCASH_SOLSIZE_HEX "fd4005"

// Length of encoded solution (512 * 21 bits / 8 = 1344 bytes)

define ZCASH_SOL_LEN ((1 << PARAM_K) * (PREFIX + 1) / 8)

// Last N_ZERO_BYTES of nonce must be zero due to my BLAKE2B optimization

define N_ZERO_BYTES 12

// Number of bytes Zcash needs out of Blake

define ZCASH_HASH_LEN 50

// Number of wavefronts per SIMD for the Blake kernel. // Blake is ALU-bound (beside the atomic counter being incremented) so we need // at least 2 wavefronts per SIMD to hide the 2-clock latency of integer // instructions. 10 is the max supported by the hw.

define BLAKE_WPS 10

// Maximum number of solutions reported by kernel to host

define MAX_SOLS 10

// Length of SHA256 target

define SHA256_TARGET_LEN (256 / 8)

if (NR_SLOTS < 16)

define BITS_PER_ROW 4

define ROWS_PER_UINT 8

define ROW_MASK 0x0F

else

define BITS_PER_ROW 8

define ROWS_PER_UINT 4

define ROW_MASK 0xFF

endif

// Optional features

undef ENABLE_DEBUG

/* * Return the offset of Xi in bytes from the beginning of the slot. /

define xi_offset_for_round(round) (8 + ((round) / 2) * 4)

// An (uncompressed) solution stores (1 << PARAM_K) 32-bit values

define SOL_SIZE ((1 << PARAM_K) * 4)

typedef struct sols_s { uint nr; uint likely_invalids; uchar valid[MAX_SOLS]; uint values[MAX_SOLS][(1 << PARAM_K)]; } sols_t;

Saikatsaha1996 commented 2 years ago

I have been ported to android to run, an average of 1-4 sols/s. But now there is a problem, mobile devices on the use of gpu up to 99%, running mining, other features of the phone can not be used. Is there any good optimization scheme? I tuned param.h and recreated the _kernel.h file, but the effect is not good. Ask God for help.

define PARAM_N 200 #define PARAM_K 9 #define PREFIX (PARAM_N / (PARAM_K + 1)) #define NR_INPUTS (1 << PREFIX) // Approximate log base 2 of number of elements in hash tables #define APX_NR_ELMS_LOG (PREFIX + 1) // Number of rows and slots is affected by this; 20 offers the best performance #define NR_ROWS_LOG 18

// Setting this to 1 might make SILENTARMY faster, see TROUBLESHOOTING.md #define OPTIM_SIMPLIFY_ROUND 1

// Number of collision items to track, per thread #define COLL_DATA_SIZE_PER_TH (NR_SLOTS * 5)

// Ratio of time of sleeping before rechecking if task is done (0-1) #define SLEEP_RECHECK_RATIO 0.60 // Ratio of time to busy wait for the solution (0-1) // The higher value the higher CPU usage with Nvidia #define SLEEP_SKIP_RATIO 0.005

// Make hash tables OVERHEAD times larger than necessary to store the average // number of elements per row. The ideal value is as small as possible to // reduce memory usage, but not too small or else elements are dropped from the // hash tables. // // The actual number of elements per row is closer to the theoretical average // (less variance) when NR_ROWS_LOG is small. So accordingly OVERHEAD can be // smaller. // // Even (as opposed to odd) values of OVERHEAD sometimes significantly decrease // performance as they cause VRAM channel conflicts. #if NR_ROWS_LOG == 16 #error "NR_ROWS_LOG = 16 is currently broken - do not use" #define OVERHEAD 3 #elif NR_ROWS_LOG == 18 #define OVERHEAD 3 #elif NR_ROWS_LOG == 19 #define OVERHEAD 5 #elif NR_ROWS_LOG == 20 && OPTIM_SIMPLIFY_ROUND #define OVERHEAD 6 #elif NR_ROWS_LOG == 20 #define OVERHEAD 9 #endif

define NR_ROWS (1 << NR_ROWS_LOG) #define NR_SLOTS ((1 << (APX_NR_ELMS_LOG - NR_ROWS_LOG)) OVERHEAD) // Length of 1 element (slot) in bytes #define SLOT_LEN 32 // Total size of hash table #define HT_SIZE (NR_ROWS NR_SLOTS SLOT_LEN) // Length of Zcash block header, nonce (part of header) #define ZCASH_BLOCK_HEADER_LEN 140 // Offset of nTime in header #define ZCASH_BLOCK_OFFSET_NTIME (4 + 3 32) // Length of nonce #define ZCASH_NONCE_LEN 32 // Length of encoded representation of solution size #define ZCASH_SOLSIZE_LEN 3 // Solution size (1344 = 0x540) represented as a compact integer, in hex #define ZCASH_SOLSIZE_HEX "fd4005" // Length of encoded solution (512 21 bits / 8 = 1344 bytes) #define ZCASH_SOL_LEN ((1 << PARAM_K) (PREFIX + 1) / 8) // Last N_ZERO_BYTES of nonce must be zero due to my BLAKE2B optimization #define N_ZERO_BYTES 12 // Number of bytes Zcash needs out of Blake #define ZCASH_HASH_LEN 50 // Number of wavefronts per SIMD for the Blake kernel. // Blake is ALU-bound (beside the atomic counter being incremented) so we need // at least 2 wavefronts per SIMD to hide the 2-clock latency of integer // instructions. 10 is the max supported by the hw. #define BLAKE_WPS 10 // Maximum number of solutions reported by kernel to host #define MAX_SOLS 10 // Length of SHA256 target #define SHA256_TARGET_LEN (256 / 8)

if (NR_SLOTS < 16) #define BITS_PER_ROW 4 #define ROWS_PER_UINT 8 #define ROW_MASK 0x0F #else #define BITS_PER_ROW 8 #define ROWS_PER_UINT 4 #define ROW_MASK 0xFF #endif

// Optional features #undef ENABLE_DEBUG

/* * Return the offset of Xi in bytes from the beginning of the slot. / #define xi_offset_for_round(round) (8 + ((round) / 2) * 4)

// An (uncompressed) solution stores (1 << PARAM_K) 32-bit values #define SOL_SIZE ((1 << PARAM_K) * 4) typedef struct sols_s { uint nr; uint likely_invalids; uchar valid[MAX_SOLS]; uint values[MAX_SOLS][(1 << PARAM_K)]; } sols_t;

How to compile it ? :/ Getting error can you help me ?..

Saikatsaha1996 commented 2 years ago

Screenshot_2022-05-21-21-51-21-959_com termux