Closed Yang507 closed 11 months ago
原网络模型结构如下
7767517
215 232
Input 0 0 1 0
Convolution 338 1 1 0 338 0=32 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=0 6=864
BatchNorm 339 1 1 338 339 0=32
Clip 340 1 1 339 340 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 341 1 1 340 341 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=288 7=32
BatchNorm 342 1 1 341 342 0=32
Clip 343 1 1 342 343 0=0.000000e+00 1=6.000000e+00
Convolution 344 1 1 343 344 0=16 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=512
BatchNorm 345 1 1 344 345 0=16
Convolution 346 1 1 345 346 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=1536
BatchNorm 347 1 1 346 347 0=96
Clip 348 1 1 347 348 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 349 1 1 348 349 0=96 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=0 6=864 7=96
BatchNorm 350 1 1 349 350 0=96
Clip 351 1 1 350 351 0=0.000000e+00 1=6.000000e+00
Convolution 352 1 1 351 352 0=24 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=2304
BatchNorm 353 1 1 352 353 0=24
Split splitncnn_0 1 2 353 353_splitncnn_0 353_splitncnn_1
Convolution 354 1 1 353_splitncnn_1 354 0=144 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=3456
BatchNorm 355 1 1 354 355 0=144
Clip 356 1 1 355 356 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 357 1 1 356 357 0=144 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=1296 7=144
BatchNorm 358 1 1 357 358 0=144
Clip 359 1 1 358 359 0=0.000000e+00 1=6.000000e+00
Convolution 360 1 1 359 360 0=24 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=3456
BatchNorm 361 1 1 360 361 0=24
BinaryOp 362 2 1 353_splitncnn_0 361 362 0=0
Convolution 363 1 1 362 363 0=144 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=3456
BatchNorm 364 1 1 363 364 0=144
Clip 365 1 1 364 365 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 366 1 1 365 366 0=144 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=0 6=1296 7=144
BatchNorm 367 1 1 366 367 0=144
Clip 368 1 1 367 368 0=0.000000e+00 1=6.000000e+00
Convolution 369 1 1 368 369 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=4608
BatchNorm 370 1 1 369 370 0=32
Split splitncnn_1 1 2 370 370_splitncnn_0 370_splitncnn_1
Convolution 371 1 1 370_splitncnn_1 371 0=192 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=6144
BatchNorm 372 1 1 371 372 0=192
Clip 373 1 1 372 373 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 374 1 1 373 374 0=192 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=1728 7=192
BatchNorm 375 1 1 374 375 0=192
Clip 376 1 1 375 376 0=0.000000e+00 1=6.000000e+00
Convolution 377 1 1 376 377 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=6144
BatchNorm 378 1 1 377 378 0=32
BinaryOp 379 2 1 370_splitncnn_0 378 379 0=0
Split splitncnn_2 1 2 379 379_splitncnn_0 379_splitncnn_1
Convolution 380 1 1 379_splitncnn_1 380 0=192 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=6144
BatchNorm 381 1 1 380 381 0=192
Clip 382 1 1 381 382 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 383 1 1 382 383 0=192 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=1728 7=192
BatchNorm 384 1 1 383 384 0=192
Clip 385 1 1 384 385 0=0.000000e+00 1=6.000000e+00
Convolution 386 1 1 385 386 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=6144
BatchNorm 387 1 1 386 387 0=32
BinaryOp 388 2 1 379_splitncnn_0 387 388 0=0
Split splitncnn_3 1 2 388 388_splitncnn_0 388_splitncnn_1
Convolution 389 1 1 388_splitncnn_1 389 0=192 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=6144
BatchNorm 390 1 1 389 390 0=192
Clip 391 1 1 390 391 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 392 1 1 391 392 0=192 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=0 6=1728 7=192
BatchNorm 393 1 1 392 393 0=192
Clip 394 1 1 393 394 0=0.000000e+00 1=6.000000e+00
Convolution 395 1 1 394 395 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=12288
BatchNorm 396 1 1 395 396 0=64
Split splitncnn_4 1 2 396 396_splitncnn_0 396_splitncnn_1
Convolution 397 1 1 396_splitncnn_1 397 0=384 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 398 1 1 397 398 0=384
Clip 399 1 1 398 399 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 400 1 1 399 400 0=384 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=3456 7=384
BatchNorm 401 1 1 400 401 0=384
Clip 402 1 1 401 402 0=0.000000e+00 1=6.000000e+00
Convolution 403 1 1 402 403 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 404 1 1 403 404 0=64
BinaryOp 405 2 1 396_splitncnn_0 404 405 0=0
Split splitncnn_5 1 2 405 405_splitncnn_0 405_splitncnn_1
Convolution 406 1 1 405_splitncnn_1 406 0=384 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 407 1 1 406 407 0=384
Clip 408 1 1 407 408 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 409 1 1 408 409 0=384 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=3456 7=384
BatchNorm 410 1 1 409 410 0=384
Clip 411 1 1 410 411 0=0.000000e+00 1=6.000000e+00
Convolution 412 1 1 411 412 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 413 1 1 412 413 0=64
BinaryOp 414 2 1 405_splitncnn_0 413 414 0=0
Split splitncnn_6 1 2 414 414_splitncnn_0 414_splitncnn_1
Convolution 415 1 1 414_splitncnn_1 415 0=384 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 416 1 1 415 416 0=384
Clip 417 1 1 416 417 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 418 1 1 417 418 0=384 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=3456 7=384
BatchNorm 419 1 1 418 419 0=384
Clip 420 1 1 419 420 0=0.000000e+00 1=6.000000e+00
Convolution 421 1 1 420 421 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 422 1 1 421 422 0=64
BinaryOp 423 2 1 414_splitncnn_0 422 423 0=0
Convolution 424 1 1 423 424 0=384 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=24576
BatchNorm 425 1 1 424 425 0=384
Clip 426 1 1 425 426 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 427 1 1 426 427 0=384 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=3456 7=384
BatchNorm 428 1 1 427 428 0=384
Clip 429 1 1 428 429 0=0.000000e+00 1=6.000000e+00
Convolution 430 1 1 429 430 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=36864
BatchNorm 431 1 1 430 431 0=96
Split splitncnn_7 1 2 431 431_splitncnn_0 431_splitncnn_1
Convolution 432 1 1 431_splitncnn_1 432 0=576 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=55296
BatchNorm 433 1 1 432 433 0=576
Clip 434 1 1 433 434 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 435 1 1 434 435 0=576 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=5184 7=576
BatchNorm 436 1 1 435 436 0=576
Clip 437 1 1 436 437 0=0.000000e+00 1=6.000000e+00
Convolution 438 1 1 437 438 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=55296
BatchNorm 439 1 1 438 439 0=96
BinaryOp 440 2 1 431_splitncnn_0 439 440 0=0
Split splitncnn_8 1 2 440 440_splitncnn_0 440_splitncnn_1
Convolution 441 1 1 440_splitncnn_1 441 0=576 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=55296
BatchNorm 442 1 1 441 442 0=576
Clip 443 1 1 442 443 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 444 1 1 443 444 0=576 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=5184 7=576
BatchNorm 445 1 1 444 445 0=576
Clip 446 1 1 445 446 0=0.000000e+00 1=6.000000e+00
Convolution 447 1 1 446 447 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=55296
BatchNorm 448 1 1 447 448 0=96
BinaryOp 449 2 1 440_splitncnn_0 448 449 0=0
Split splitncnn_9 1 2 449 449_splitncnn_0 449_splitncnn_1
Convolution 450 1 1 449_splitncnn_1 450 0=576 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=55296
BatchNorm 451 1 1 450 451 0=576
Clip 452 1 1 451 452 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 453 1 1 452 453 0=576 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=0 6=5184 7=576
BatchNorm 454 1 1 453 454 0=576
Clip 455 1 1 454 455 0=0.000000e+00 1=6.000000e+00
Convolution 456 1 1 455 456 0=160 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=92160
BatchNorm 457 1 1 456 457 0=160
Split splitncnn_10 1 2 457 457_splitncnn_0 457_splitncnn_1
Convolution 458 1 1 457_splitncnn_1 458 0=960 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=153600
BatchNorm 459 1 1 458 459 0=960
Clip 460 1 1 459 460 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 461 1 1 460 461 0=960 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=8640 7=960
BatchNorm 462 1 1 461 462 0=960
Clip 463 1 1 462 463 0=0.000000e+00 1=6.000000e+00
Convolution 464 1 1 463 464 0=160 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=153600
BatchNorm 465 1 1 464 465 0=160
BinaryOp 466 2 1 457_splitncnn_0 465 466 0=0
Split splitncnn_11 1 2 466 466_splitncnn_0 466_splitncnn_1
Convolution 467 1 1 466_splitncnn_1 467 0=960 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=153600
BatchNorm 468 1 1 467 468 0=960
Clip 469 1 1 468 469 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 470 1 1 469 470 0=960 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=8640 7=960
BatchNorm 471 1 1 470 471 0=960
Clip 472 1 1 471 472 0=0.000000e+00 1=6.000000e+00
Convolution 473 1 1 472 473 0=160 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=153600
BatchNorm 474 1 1 473 474 0=160
BinaryOp 475 2 1 466_splitncnn_0 474 475 0=0
Convolution 476 1 1 475 476 0=960 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=153600
BatchNorm 477 1 1 476 477 0=960
Clip 478 1 1 477 478 0=0.000000e+00 1=6.000000e+00
ConvolutionDepthWise 479 1 1 478 479 0=960 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=0 6=8640 7=960
BatchNorm 480 1 1 479 480 0=960
Clip 481 1 1 480 481 0=0.000000e+00 1=6.000000e+00
Convolution 482 1 1 481 482 0=320 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=307200
BatchNorm 483 1 1 482 483 0=320
Convolution 484 1 1 483 484 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=163840
BatchNorm 485 1 1 484 485 0=512
Clip 486 1 1 485 486 0=0.000000e+00 1=6.000000e+00
Convolution 487 1 1 486 487 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768
Split splitncnn_12 1 2 487 487_splitncnn_0 487_splitncnn_1
Convolution 488 1 1 487_splitncnn_1 488 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
Deconvolution 489 1 1 487_splitncnn_0 489 0=64 1=2 11=2 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=16384
Convolution 490 1 1 449_splitncnn_0 490 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=6144
BinaryOp 491 2 1 490 489 491 0=0
Split splitncnn_13 1 2 491 491_splitncnn_0 491_splitncnn_1
Convolution 492 1 1 491_splitncnn_1 492 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
Deconvolution 493 1 1 491_splitncnn_0 493 0=64 1=2 11=2 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=16384
Convolution 494 1 1 388_splitncnn_0 494 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2048
BinaryOp 495 2 1 494 493 495 0=0
Convolution 496 1 1 495 496 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
Convolution 497 1 1 496 497 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 498 1 1 497 498
Convolution 499 1 1 498 499 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 500 1 1 499 500
Split splitncnn_14 1 2 500 500_splitncnn_0 500_splitncnn_1
Convolution 501 1 1 500_splitncnn_1 501 0=12 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=6912
Convolution 502 1 1 500_splitncnn_0 502 0=48 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=27648
Convolution 503 1 1 492 503 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 504 1 1 503 504
Convolution 505 1 1 504 505 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 506 1 1 505 506
Split splitncnn_15 1 2 506 506_splitncnn_0 506_splitncnn_1
Convolution 507 1 1 506_splitncnn_1 507 0=12 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=6912
Convolution 508 1 1 506_splitncnn_0 508 0=48 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=27648
Convolution 509 1 1 488 509 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 510 1 1 509 510
Convolution 511 1 1 510 511 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
ReLU 512 1 1 511 512
Split splitncnn_16 1 2 512 512_splitncnn_0 512_splitncnn_1
Convolution 513 1 1 512_splitncnn_1 513 0=12 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=6912
Convolution 514 1 1 512_splitncnn_0 514 0=48 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=27648
Permute 515 1 1 501 515 0=3
Reshape 517 1 1 515 517 0=12 1=-1
Reshape 519 1 1 517 519 0=1 1=-1
Permute 520 1 1 502 520 0=3
Reshape 522 1 1 520 522 0=48 1=-1
Reshape 524 1 1 522 524 0=4 1=-1
Permute 525 1 1 507 525 0=3
Reshape 527 1 1 525 527 0=12 1=-1
Reshape 529 1 1 527 529 0=1 1=-1
Permute 530 1 1 508 530 0=3
Reshape 532 1 1 530 532 0=48 1=-1
Reshape 534 1 1 532 534 0=4 1=-1
Permute 535 1 1 513 535 0=3
Reshape 537 1 1 535 537 0=12 1=-1
Reshape 539 1 1 537 539 0=1 1=-1
Permute 540 1 1 514 540 0=3
Reshape 542 1 1 540 542 0=48 1=-1
Reshape 544 1 1 542 544 0=4 1=-1
Concat 545 3 1 519 529 539 545 0=0
Concat 546 3 1 524 534 544 546 0=0
armv7 ?硬件,系统是?
跑mobilenet和yolov4-tiny,量化后速度都有提升,我在想是不是该模型的问题,测试时单线程运行
Processor : AArch64 Processor rev 4 (aarch64)
processor : 0
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 1
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 2
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 3
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 4
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 5
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 6
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
processor : 7
BogoMIPS : 38.40
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4
Hardware : Qualcomm Technologies, Inc SDM450
交叉编译:
set(CMAKE_SYSTEM_NAME Android)
set(CMAKE_SYSTEM_PROCESSOR armv7-a)
set(CMAKE_SYSTEM_VERSION 28)
set(ANDROID_NATIVE_API_LEVEL 24)
set(ANDROID_PLATFORM android-24)
set(CMAKE_FIND_ROOT_PATH /opt/)
set(PLATEFORM_COMPILER_TOOL "/opt/toolchains/llvm/prebuilt/linux-x86_64/bin/")
set(CMAKE_C_COMPILER "${PLATEFORM_COMPILER_TOOL}clang")
set(CMAKE_CXX_COMPILER "${PLATEFORM_COMPILER_TOOL}clang++")
set(CMAKE_LD_FLAGS "-L/opt/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a")
# android
set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a)
set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
set(CMAKE_ANDROID_NDK /opt)
set(CMAKE_ANDROID_STL_TYPE c++_shared) # c++_shared or c++_static
set(CMAKE_ANDROID_ARM_MODE arm)
set(CMAKE_ANDROID_ARM_NEON ON)
set(TARGET_ARCH arm64)
set(TARGET_ARCH_VARIANT armv7-a-neon)
set(TARGET_CPU_VARIANT cortex-a53)
交叉编译:
set(CMAKE_SYSTEM_NAME Android) set(CMAKE_SYSTEM_PROCESSOR armv7-a) set(CMAKE_SYSTEM_VERSION 28) set(ANDROID_NATIVE_API_LEVEL 24) set(ANDROID_PLATFORM android-24) set(CMAKE_FIND_ROOT_PATH /opt/) set(PLATEFORM_COMPILER_TOOL "/opt/toolchains/llvm/prebuilt/linux-x86_64/bin/") set(CMAKE_C_COMPILER "${PLATEFORM_COMPILER_TOOL}clang") set(CMAKE_CXX_COMPILER "${PLATEFORM_COMPILER_TOOL}clang++") set(CMAKE_LD_FLAGS "-L/opt/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a") # android set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a) set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang) set(CMAKE_ANDROID_NDK /opt) set(CMAKE_ANDROID_STL_TYPE c++_shared) # c++_shared or c++_static set(CMAKE_ANDROID_ARM_MODE arm) set(CMAKE_ANDROID_ARM_NEON ON) set(TARGET_ARCH arm64) set(TARGET_ARCH_VARIANT armv7-a-neon) set(TARGET_CPU_VARIANT cortex-a53)
明明是 arm64 的 cpu,为何编译 armv7 架构?
嗯,目前这边都是用的32位,所以编译armv7架构
arm64 cpu 使用arm64编译运行,会大幅度快于armv7编译的程序 armv7上目前缺少非relu激活的requantize优化,这个模型中用了 relu6/clip 激活,建议改成 relu 激活的模型,量化后速度会快
非常感谢nihui酱
arm64 cpu 使用arm64编译运行,会大幅度快于armv7编译的程序 armv7上目前缺少非relu激活的requantize优化,这个模型中用了 relu6/clip 激活,建议改成 relu 激活的模型,量化后速度会快
那么请问一下在armv7中leaky relu进行requantize优化了吗,也会比relu更慢一些么
armv7上对mv3和ghostnet做完int8量化后,推理速度全都变慢了 有遇到同样问题的吗? mv3 int8量化: min = 70.41 max = 88.00 avg = 81.15 mv3 float32: min = 62.04 max = 77.53 avg = 70.97 ghost int8量化: min = 95.24 max = 117.95 avg = 107.40 ghost float32: min = 84.34 max = 101.64 avg = 91.60
@redshzh 解决了吗?
本来均值为0,量化中修改norm={1.0,1.0,1.0};能成功转换为int8格式,推理得到正确结果