Android NDK之 使用 arm-v7a 汇编实现两数之和

2 阅读1分钟

关键词: NDK armv7a WebRTC arm汇编 CMake

最近适配对讲程序,在webrtc的库编译的过程中,发现其为arm的平台定制了汇编程序以优化平方根倒数算法速度,上次写汇编还是8086的,借此机会初步尝试下android上arm汇编

具体jni工程建立就不介绍了,Android Studio直接可以从模板创建

工程目录如下

kryo@WSL1:/mnt/k/Android/NDK-Project/XXX/src/main$ tree 
.
├── AndroidManifest.xml
├── cpp
│   ├── asm
│   │   ├── CMakeLists.txt
│   │   ├── asm_defines.h
│   │   ├── asm_jni.cpp
│   │   ├── asm_jni.h
│   │   ├── tow_sum_armv7a.S
│   │   └── tow_sum_cpp.cpp
└── java
    └── com
        └── kryo
            ├── asm
            │   └── TowSumAsm.java
            └── ...

1、C++接口编写

asm_jni.h

#ifndef TOW_SUM_AMS_TEST_H
#define TOW_SUM_AMS_TEST_H

#include <jni.h>

#ifdef USE_ASM
    extern "C" int32_t
    tow_sum_asm(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t ret_len, int32_t target);
#else
    extern "C" int32_t
    tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t target);
#endif

#endif //TOW_SUM_AMS_TEST_H

这里分别使用asm和c代码各自实现一个暴搜版本的两数之和接口。关于asm传递5个参数是有用意的,涉及到函数调用约定,armv7a前4个参数用寄存器传参,超过4个的用栈传递

2、汇编实现

写汇编时我习惯先参考C代码去推导

tow_sum_cpp.cpp

#include "asm_jni.h"

extern "C" int32_t tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len,int32_t target) {
    for (int i = 0; i < data_len; ++i) {
        for (int j = i + 1; j < data_len; ++j) {
            if (data_in[i] + data_in[j] == target) {
                data_out[0] = i;
                data_out[1] = j;
                return 0;
            }
        }
    }
    data_out[0] = 0;
    data_out[1] = 0;
    return -1;
}

以下是具体汇编代码的实现,基本每行都给出了注释

tow_sum_armv7a.S

@ Input:(
@        int32_t* data_in, -> r0 &data_in
@        int32_t* data_out,-> r1 &data_out
@        int32_t  data_len, -> r2
@        int32_t  ret_len, -> r3
@        int32_t target -> [sp])
@ Output: r0 32 bit unsigned integer
@
@ r4: i-index
@ r5: j-index
@ r6: target
@ r7: num1-buff
@ r8: num2-buff
@ r9: sum cache


#include "asm_defines.h"

GLOBAL_FUNCTION tow_sum
.align  4
DEFINE_FUNCTION tow_sum
    push {r4-r11} 		@ 保存现场

    ldr r6, [sp, #32] 	@ 保存了8个寄存器,偏移8*4bytes取得第5个参数

    mov r4, #0 			@ 初始化第一个数的索引 i
    mov r5, #0 			@ 初始化第二个数的索引 j


LOOP_1:
    sub r9, r2, #1 		@ 数组长度-1
    cmp r4, r9			@ 判断i是否数组最后一个
    beq FAL				@ 是就查找失败
    mov r5, r4  		@ j = i

LOOP_2:
    add r5, r5, #1 		@ j ++
    lsl r9, r4, #2		@ 把索引 i 乘4得到地址偏移量
    ldr r7, [r0, r9]	@ r7 = data_in[i],寄存器相对寻址, r0为 data_in的地址,加上偏移量取的数组元素
    lsl r9, r5, #2
    ldr r8, [r0, r9]	@ 同上得到 r8 = data_in[j]
    add r9, r8, r7		@ 两数之和
    cmp r9, r6			@ 与目标做比较
    beq SUC				@ 成功
    add r9, r5, #1		@ 没有成功
    cmp r9, r2			@ if j < data_len
    bne LOOP_2			@ then:下一轮j的查找
    add r4, r4, #1		@ else: j没找到,把i++
    b LOOP_1			@ 下一轮 i的查找

SUC:
    str r4, [r1]		@ data_out[0] = i
    str r5, [r1, #4]	@ data_out[1] = j
    mov r0, #0			@ return 0
    b END

FAL:
    mov r4, #0
    mov r5, #0
    mov r0, #-1			@ return -1
    b SUC

END:
    pop {r4-r11}		@ 还原现场
    bx  lr

3、JNI实现

asm_jni.cpp

#include "asm_jni.h"
#include <android/log.h>

#define TAG "ASM_TEST"

#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)

#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT jintArray JNICALL
    Java_com_kryo_asm_TowSumAsm_towsum(JNIEnv *env, jobject thiz, jintArray data, jint target) {

        jintArray r_array = env->NewIntArray(2);
        jint *elements_out = env->GetIntArrayElements(r_array, NULL);

        jsize length = env->GetArrayLength(data);
        jint *elements_in = env->GetIntArrayElements(data, NULL);

    #ifdef USE_ASM
        LOGD("call tow_sum_asm !\n");
        tow_sum_asm(elements_in, elements_out, (size_t) length, 2, (size_t) target);
    #else
        LOGD("call tow_sum_cpp !\n");
        tow_sum_cpp(elements_in, elements_out, (size_t) length, (size_t) target);
    #endif

        env->ReleaseIntArrayElements(data, elements_in, 0);
        env->ReleaseIntArrayElements(r_array, elements_out, 0);

        return r_array;
    }
#ifdef __cplusplus
}
#endif

TowSumAsm.java

public class TowSumAsm {
    static {
        System.loadLibrary("asm");
    }
    public native int[] towsum(int[] data, int target);
}

最后贴一下从webrtc中copy来的asm_defines.h

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef KRYO_INCLUDE_ASM_DEFINES_H_
#define KRYO_INCLUDE_ASM_DEFINES_H_

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

// Define the macros used in ARM assembly code, so that for Mac or iOS builds
// we add leading underscores for the function names.
#ifdef __APPLE__
.macro GLOBAL_FUNCTION name
.global _\name
.private_extern _\name
.endm
.macro DEFINE_FUNCTION name
_\name:
.endm
.macro CALL_FUNCTION name
bl _\name
.endm
.macro GLOBAL_LABEL name
.global _\name
.private_extern _\name
.endm
#else
.macro GLOBAL_FUNCTION name
.global \name
.hidden \name
.endm
.macro DEFINE_FUNCTION name
#if defined(__linux__) && defined(__ELF__)
.type \name,%function
#endif
\name:
.endm
.macro CALL_FUNCTION name
bl \name
.endm
.macro GLOBAL_LABEL name
.global \name
.hidden \name
.endm
#endif

// With Apple's clang compiler, for instructions ldrb, strh, etc.,
// the condition code is after the width specifier. Here we define
// only the ones that are actually used in the assembly files.
#if (defined __llvm__) && (defined __APPLE__)
.macro streqh reg1, reg2, num
strheq \reg1, \reg2, \num
.endm
#endif
.text
#endif  // KRYO_INCLUDE_ASM_DEFINES_H_

4、CMakeLists.txt编写生成libasm.so

CMakeLists.txt

cmake_minimum_required(VERSION 3.10.2)

project("asm")

ENABLE_LANGUAGE(ASM) #启用汇编支持

if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_armv7a.S)
    add_definitions(-DUSE_ASM)
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_cpp.cpp)
else()
    message(FATAL_ERROR "Unsupported ABI: ${ANDROID_ABI}")
endif()

target_link_libraries(asm
        log)

5、运行测试

TowSumAsm towSumAsm = new TowSumAsm();
int[] result = towSumAsm.towsum(new int[]{1, 3, 5, 7, 9}, 12);
Log.d(TAG, "result " + result[0] + " " + result[1]);
2024-04-05 10:24:29.269 19863-19863 ASM_TEST                com.kryo.demo                        D  call tow_sum_asm !
2024-04-05 10:24:29.269 19863-19863 JNI_Activity            com.kryo.demo                        D  result 1 4

Reference