최적화를 위해 : Custom OP
성능을 떨어뜨리는 연산들은 직접 고쳐 보자.
1. Custom 연산자 구현하기
12 exSDPAttention FLOAT16 NPU (6,64,1,1531),(6,64,1,1531),... (6,64,1,1531) 198835/0/198835 46461 \ 100.0%/0.0%/0.0% 3444 exSDPAttention:/blocks.0/attn/MatMul_1_exsdpa
232 Resize FLOAT16 CPU (1,64,68,90),(1),(1),(4) (1,64,136,180) 0/0/0 34261 \ 0.0%/0.0%/0.0% 765 Resize:/depth_head/refinenet2/Resizeimport numpy as np
from rknn.api import RKNN
from rknn.api.custom_op import get_node_attr
class exSDPAttention:
op_type = 'exSDPAttention'
def shape_infer(self, node, in_shapes, in_dtypes):
out_shapes = in_shapes.copy()
out_dtypes = in_dtypes.copy()
return out_shapes, out_dtypes
def compute(self, node, inputs):
query, key, value = inputs[:3]
attention_scores = np.matmul(query, key.transpose(-2, -1))
attention_probs = np.softmax(attention_scores, axis=-1)
context_layer = np.matmul(attention_probs, value)
outputs = [context_layer]
return outputs
if __name__ == '__main__':
custom_model_path = 'depth_anything_vits_19.onnx'
# Create RKNN object
rknn = RKNN(verbose=True)
# Pre-process config
print('--> Config model')
rknn.config(target_platform='rk3588')
print('done')
# Register cstSigmoid op
print('--> Register exSDPAttention op')
ret = rknn.reg_custom_op(exSDPAttention())
if ret != 0:
print('Register exSDPAttention op failed!')
exit(ret)
print('done')
# Load model
print('--> Loading model')
ret = rknn.load_onnx(model=custom_model_path)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
ret = rknn.build(do_quantization=False)
if ret != 0:
print('Build model failed!')
exit(ret)
print('done')
# Export rknn model
print('--> Export rknn model')
ret = rknn.export_rknn('depth_anything_OP19_custom_op_ver_1.rknn')
if ret != 0:
print('Export rknn model failed!')
exit(ret)
print('done')
rknn.release()2. 성능 평가하기

Last updated