NeuralNetworks

Summary

Enumerations

Anonymous Enum 53{
  ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
}
enum
For ANeuralNetworksModel_setOperandValue, values with a length smaller or equal to this will be immediately copied into the model.
Anonymous Enum 54{
  ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN = 32
}
enum
For ANeuralNetworksCompilation_setCaching, specify the size of the cache token required from the application.
DeviceTypeCode{
  ANEURALNETWORKS_DEVICE_UNKNOWN = 0,
  ANEURALNETWORKS_DEVICE_OTHER = 1,
  ANEURALNETWORKS_DEVICE_CPU = 2,
  ANEURALNETWORKS_DEVICE_GPU = 3,
  ANEURALNETWORKS_DEVICE_ACCELERATOR = 4
}
enum
Device types.
DurationCode{
  ANEURALNETWORKS_DURATION_ON_HARDWARE = 0,
  ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
  ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
  ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3
}
enum
Different duration measurements.
FeatureLevelCode{
  ANEURALNETWORKS_FEATURE_LEVEL_1 = 27,
  ANEURALNETWORKS_FEATURE_LEVEL_2 = 28,
  ANEURALNETWORKS_FEATURE_LEVEL_3 = 29,
  ANEURALNETWORKS_FEATURE_LEVEL_4 = 30,
  ANEURALNETWORKS_FEATURE_LEVEL_5 = 31,
  ANEURALNETWORKS_FEATURE_LEVEL_6 = 1000006,
  ANEURALNETWORKS_FEATURE_LEVEL_7 = 1000007,
  ANEURALNETWORKS_FEATURE_LEVEL_8 = 1000008
}
enum
NNAPI feature levels.
FuseCode{
  ANEURALNETWORKS_FUSED_NONE = 0,
  ANEURALNETWORKS_FUSED_RELU = 1,
  ANEURALNETWORKS_FUSED_RELU1 = 2,
  ANEURALNETWORKS_FUSED_RELU6 = 3
}
enum
Fused activation function types.
OperandCode{
  ANEURALNETWORKS_FLOAT32 = 0,
  ANEURALNETWORKS_INT32 = 1,
  ANEURALNETWORKS_UINT32 = 2,
  ANEURALNETWORKS_TENSOR_FLOAT32 = 3,
  ANEURALNETWORKS_TENSOR_INT32 = 4,
  ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5,
  ANEURALNETWORKS_BOOL = 6,
  ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
  ANEURALNETWORKS_TENSOR_FLOAT16 = 8,
  ANEURALNETWORKS_TENSOR_BOOL8 = 9,
  ANEURALNETWORKS_FLOAT16 = 10,
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
  ANEURALNETWORKS_TENSOR_QUANT16_ASYMM = 12,
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
  ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
  ANEURALNETWORKS_MODEL = 15
}
enum
Operand types.
OperationCode{
  ANEURALNETWORKS_ADD = 0,
  ANEURALNETWORKS_AVERAGE_POOL_2D = 1,
  ANEURALNETWORKS_CONCATENATION = 2,
  ANEURALNETWORKS_CONV_2D = 3,
  ANEURALNETWORKS_DEPTHWISE_CONV_2D = 4,
  ANEURALNETWORKS_DEPTH_TO_SPACE = 5,
  ANEURALNETWORKS_DEQUANTIZE = 6,
  ANEURALNETWORKS_EMBEDDING_LOOKUP = 7,
  ANEURALNETWORKS_FLOOR = 8,
  ANEURALNETWORKS_FULLY_CONNECTED = 9,
  ANEURALNETWORKS_HASHTABLE_LOOKUP = 10,
  ANEURALNETWORKS_L2_NORMALIZATION = 11,
  ANEURALNETWORKS_L2_POOL_2D = 12,
  ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION = 13,
  ANEURALNETWORKS_LOGISTIC = 14,
  ANEURALNETWORKS_LSH_PROJECTION = 15,
  ANEURALNETWORKS_LSTM = 16,
  ANEURALNETWORKS_MAX_POOL_2D = 17,
  ANEURALNETWORKS_MUL = 18,
  ANEURALNETWORKS_RELU = 19,
  ANEURALNETWORKS_RELU1 = 20,
  ANEURALNETWORKS_RELU6 = 21,
  ANEURALNETWORKS_RESHAPE = 22,
  ANEURALNETWORKS_RESIZE_BILINEAR = 23,
  ANEURALNETWORKS_RNN = 24,
  ANEURALNETWORKS_SOFTMAX = 25,
  ANEURALNETWORKS_SPACE_TO_DEPTH = 26,
  ANEURALNETWORKS_SVDF = 27,
  ANEURALNETWORKS_TANH = 28,
  ANEURALNETWORKS_BATCH_TO_SPACE_ND = 29,
  ANEURALNETWORKS_DIV = 30,
  ANEURALNETWORKS_MEAN = 31,
  ANEURALNETWORKS_PAD = 32,
  ANEURALNETWORKS_SPACE_TO_BATCH_ND = 33,
  ANEURALNETWORKS_SQUEEZE = 34,
  ANEURALNETWORKS_STRIDED_SLICE = 35,
  ANEURALNETWORKS_SUB = 36,
  ANEURALNETWORKS_TRANSPOSE = 37,
  ANEURALNETWORKS_ABS = 38,
  ANEURALNETWORKS_ARGMAX = 39,
  ANEURALNETWORKS_ARGMIN = 40,
  ANEURALNETWORKS_AXIS_ALIGNED_BBOX_TRANSFORM = 41,
  ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM = 42,
  ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_RNN = 43,
  ANEURALNETWORKS_BOX_WITH_NMS_LIMIT = 44,
  ANEURALNETWORKS_CAST = 45,
  ANEURALNETWORKS_CHANNEL_SHUFFLE = 46,
  ANEURALNETWORKS_DETECTION_POSTPROCESSING = 47,
  ANEURALNETWORKS_EQUAL = 48,
  ANEURALNETWORKS_EXP = 49,
  ANEURALNETWORKS_EXPAND_DIMS = 50,
  ANEURALNETWORKS_GATHER = 51,
  ANEURALNETWORKS_GENERATE_PROPOSALS = 52,
  ANEURALNETWORKS_GREATER = 53,
  ANEURALNETWORKS_GREATER_EQUAL = 54,
  ANEURALNETWORKS_GROUPED_CONV_2D = 55,
  ANEURALNETWORKS_HEATMAP_MAX_KEYPOINT = 56,
  ANEURALNETWORKS_INSTANCE_NORMALIZATION = 57,
  ANEURALNETWORKS_LESS = 58,
  ANEURALNETWORKS_LESS_EQUAL = 59,
  ANEURALNETWORKS_LOG = 60,
  ANEURALNETWORKS_LOGICAL_AND = 61,
  ANEURALNETWORKS_LOGICAL_NOT = 62,
  ANEURALNETWORKS_LOGICAL_OR = 63,
  ANEURALNETWORKS_LOG_SOFTMAX = 64,
  ANEURALNETWORKS_MAXIMUM = 65,
  ANEURALNETWORKS_MINIMUM = 66,
  ANEURALNETWORKS_NEG = 67,
  ANEURALNETWORKS_NOT_EQUAL = 68,
  ANEURALNETWORKS_PAD_V2 = 69,
  ANEURALNETWORKS_POW = 70,
  ANEURALNETWORKS_PRELU = 71,
  ANEURALNETWORKS_QUANTIZE = 72,
  ANEURALNETWORKS_QUANTIZED_16BIT_LSTM = 73,
  ANEURALNETWORKS_RANDOM_MULTINOMIAL = 74,
  ANEURALNETWORKS_REDUCE_ALL = 75,
  ANEURALNETWORKS_REDUCE_ANY = 76,
  ANEURALNETWORKS_REDUCE_MAX = 77,
  ANEURALNETWORKS_REDUCE_MIN = 78,
  ANEURALNETWORKS_REDUCE_PROD = 79,
  ANEURALNETWORKS_REDUCE_SUM = 80,
  ANEURALNETWORKS_ROI_ALIGN = 81,
  ANEURALNETWORKS_ROI_POOLING = 82,
  ANEURALNETWORKS_RSQRT = 83,
  ANEURALNETWORKS_SELECT = 84,
  ANEURALNETWORKS_SIN = 85,
  ANEURALNETWORKS_SLICE = 86,
  ANEURALNETWORKS_SPLIT = 87,
  ANEURALNETWORKS_SQRT = 88,
  ANEURALNETWORKS_TILE = 89,
  ANEURALNETWORKS_TOPK_V2 = 90,
  ANEURALNETWORKS_TRANSPOSE_CONV_2D = 91,
  ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
  ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN = 93,
  ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR = 94,
  ANEURALNETWORKS_QUANTIZED_LSTM = 95,
  ANEURALNETWORKS_IF = 96,
  ANEURALNETWORKS_WHILE = 97,
  ANEURALNETWORKS_ELU = 98,
  ANEURALNETWORKS_HARD_SWISH = 99,
  ANEURALNETWORKS_FILL = 100,
  ANEURALNETWORKS_RANK = 101,
  ANEURALNETWORKS_BATCH_MATMUL = 102,
  ANEURALNETWORKS_PACK = 103,
  ANEURALNETWORKS_MIRROR_PAD = 104,
  ANEURALNETWORKS_REVERSE = 105
}
enum
Operation types.
PaddingCode{
  ANEURALNETWORKS_PADDING_SAME = 1,
  ANEURALNETWORKS_PADDING_VALID = 2
}
enum
Implicit padding algorithms.
PreferenceCode{
  ANEURALNETWORKS_PREFER_LOW_POWER = 0,
  ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER = 1,
  ANEURALNETWORKS_PREFER_SUSTAINED_SPEED = 2
}
enum
Execution preferences.
PriorityCode{
  ANEURALNETWORKS_PRIORITY_LOW = 90,
  ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
  ANEURALNETWORKS_PRIORITY_HIGH = 110,
  ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM
}
enum
Relative execution priority.
ResultCode{
  ANEURALNETWORKS_NO_ERROR = 0,
  ANEURALNETWORKS_OUT_OF_MEMORY = 1,
  ANEURALNETWORKS_INCOMPLETE = 2,
  ANEURALNETWORKS_UNEXPECTED_NULL = 3,
  ANEURALNETWORKS_BAD_DATA = 4,
  ANEURALNETWORKS_OP_FAILED = 5,
  ANEURALNETWORKS_BAD_STATE = 6,
  ANEURALNETWORKS_UNMAPPABLE = 7,
  ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE = 8,
  ANEURALNETWORKS_UNAVAILABLE_DEVICE = 9,
  ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT = 10,
  ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT = 11,
  ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT = 12,
  ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT = 13,
  ANEURALNETWORKS_DEAD_OBJECT = 14
}
enum
Result codes.

Typedefs

ANeuralNetworksBurst typedef
ANeuralNetworksBurst is an opaque type that can be used to reduce the latency of a rapid sequence of executions.
ANeuralNetworksCompilation typedef
ANeuralNetworksCompilation is an opaque type that can be used to compile a machine learning model.
ANeuralNetworksDevice typedef
ANeuralNetworksDevice is an opaque type that represents a device.
ANeuralNetworksEvent typedef
ANeuralNetworksEvent is an opaque type that represents an event that will be signaled once an execution completes.
ANeuralNetworksExecution typedef
ANeuralNetworksExecution is an opaque type that can be used to apply a machine learning model to a set of inputs.
ANeuralNetworksMemory typedef
ANeuralNetworksMemory is an opaque type that represents memory.
ANeuralNetworksMemoryDesc typedef
ANeuralNetworksMemoryDesc is an opaque type that represents a memory descriptor.
ANeuralNetworksModel typedef
ANeuralNetworksModel is an opaque type that contains a description of the mathematical operations that constitute the model.
ANeuralNetworksOperandType typedef
ANeuralNetworksOperandType describes the type of an operand.
ANeuralNetworksOperationType typedef
int32_t
Aliasing to OperationCode, used in function ANeuralNetworksModel_addOperation.
ANeuralNetworksSymmPerChannelQuantParams typedef
Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand.

Functions

ANeuralNetworksBurst_create(ANeuralNetworksCompilation *compilation, ANeuralNetworksBurst **burst)
int
Create a ANeuralNetworksBurst to apply the given compilation.
ANeuralNetworksBurst_free(ANeuralNetworksBurst *burst)
void
Destroys the burst object.
ANeuralNetworksCompilation_create(ANeuralNetworksModel *model, ANeuralNetworksCompilation **compilation)
int
Create a ANeuralNetworksCompilation to compile the given model.
ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices, ANeuralNetworksCompilation **compilation)
int
Create a ANeuralNetworksCompilation to compile the given model for a specified set of devices.
ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
int
Indicate that we have finished modifying a compilation.
ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
void
Destroy a compilation.
ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput(const ANeuralNetworksCompilation *compilation, uint32_t index, uint32_t *alignment)
int
Get the preferred buffer and memory alignment of an input to an execution created from a particular compilation.
ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput(const ANeuralNetworksCompilation *compilation, uint32_t index, uint32_t *alignment)
int
Get the preferred buffer and memory alignment of an output to an execution created from a particular compilation.
ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput(const ANeuralNetworksCompilation *compilation, uint32_t index, uint32_t *padding)
int
Get the preferred buffer and memory end padding of an input to an execution created from a particular compilation.
ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput(const ANeuralNetworksCompilation *compilation, uint32_t index, uint32_t *padding)
int
Get the preferred memory end padding of an output to an execution created from a particular compilation.
ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation *compilation, const char *cacheDir, const uint8_t *token)
int
Sets the compilation caching signature and the cache directory.
ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation, int32_t preference)
int
Sets the execution preference.
ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation *compilation, int priority)
int
Set the execution priority.
ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation *compilation, uint64_t duration)
int
Set the maximum expected duration for compiling the model.
ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice *device, int64_t *featureLevel)
int
Get the NNAPI feature level of the specified NNAPI device.
ANeuralNetworksDevice_getName(const ANeuralNetworksDevice *device, const char **name)
int
Get the name of the specified device.
ANeuralNetworksDevice_getType(const ANeuralNetworksDevice *device, int32_t *type)
int
Get the type of a given device.
ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice *device, const char **version)
int
Get the version of the driver implementation of the specified device.
ANeuralNetworksDevice_wait(const ANeuralNetworksDevice *device)
int
Wait until the device is in a live state.
ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent **event)
int
Create a ANeuralNetworksEvent from a sync_fence file descriptor.
ANeuralNetworksEvent_free(ANeuralNetworksEvent *event)
void
Destroys the event.
ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent *event, int *sync_fence_fd)
int
Get sync_fence file descriptor from the event.
ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event)
int
Waits until the execution completes.
ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution *execution, ANeuralNetworksBurst *burst)
int
Schedule synchronous evaluation of the execution on a burst object.
ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
int
Schedule synchronous evaluation of the execution.
ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation, ANeuralNetworksExecution **execution)
int
Create a ANeuralNetworksExecution to apply the given compilation.
ANeuralNetworksExecution_enableInputAndOutputPadding(ANeuralNetworksExecution *execution, bool enable)
int
Specifies whether the ANeuralNetworksExecution is able to accept padded input and output buffers and memory objects.
ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution)
void
Destroy an execution.
ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution *execution, int32_t durationCode, uint64_t *duration)
int
Get the time spent in the latest computation evaluated on the specified ANeuralNetworksExecution, in nanoseconds.
ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions)
int
Get the dimensional information of the specified output operand of the model of the latest computation evaluated on ANeuralNetworksExecution.
ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution, int32_t index, uint32_t *rank)
int
Get the dimensional information of the specified output operand of the model of the latest computation evaluated on ANeuralNetworksExecution.
ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, const void *buffer, size_t length)
int
Associate a user buffer with an input of the model of the ANeuralNetworksExecution.
ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, const ANeuralNetworksMemory *memory, size_t offset, size_t length)
int
Associate a region of a memory object with an input of the model of the ANeuralNetworksExecution.
ANeuralNetworksExecution_setLoopTimeout(ANeuralNetworksExecution *execution, uint64_t duration)
int
Set the maximum duration of WHILE loops in the specified execution.
ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution *execution, bool measure)
int
Specifies whether duration of the ANeuralNetworksExecution is to be measured.
ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, void *buffer, size_t length)
int
Associate a user buffer with an output of the model of the ANeuralNetworksExecution.
ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, const ANeuralNetworksMemory *memory, size_t offset, size_t length)
int
Associate a region of a memory object with an output of the model of the ANeuralNetworksExecution.
ANeuralNetworksExecution_setReusable(ANeuralNetworksExecution *execution, bool reusable)
int
Specifies whether the ANeuralNetworksExecution can be reused for multiple computations.
ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution *execution, uint64_t duration)
int
Set the maximum expected duration of the specified execution.
ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution, ANeuralNetworksEvent **event)
int
Schedule asynchronous evaluation of the execution.
ANeuralNetworksExecution_startComputeWithDependencies(ANeuralNetworksExecution *execution, const ANeuralNetworksEvent *const *dependencies, uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent **event)
int
Schedule asynchronous evaluation of the execution with dependencies.
ANeuralNetworksMemoryDesc_addInputRole(ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index, float frequency)
int
Specify that a memory object will be playing the role of an input to an execution created from a particular compilation.
ANeuralNetworksMemoryDesc_addOutputRole(ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index, float frequency)
int
Specify that a memory object will be playing the role of an output to an execution created from a particular compilation.
ANeuralNetworksMemoryDesc_create(ANeuralNetworksMemoryDesc **desc)
int
Create a ANeuralNetworksMemoryDesc with no properties.
ANeuralNetworksMemoryDesc_finish(ANeuralNetworksMemoryDesc *desc)
int
Indicate that we have finished modifying a memory descriptor.
ANeuralNetworksMemoryDesc_free(ANeuralNetworksMemoryDesc *desc)
void
Destroy a memory descriptor.
ANeuralNetworksMemoryDesc_setDimensions(ANeuralNetworksMemoryDesc *desc, uint32_t rank, const uint32_t *dimensions)
int
Set the dimensional information of the memory descriptor.
ANeuralNetworksMemory_copy(const ANeuralNetworksMemory *src, const ANeuralNetworksMemory *dst)
int
Copies data from one memory object to another.
ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer *ahwb, ANeuralNetworksMemory **memory)
int
Creates a shared memory object from an AHardwareBuffer handle.
ANeuralNetworksMemory_createFromDesc(const ANeuralNetworksMemoryDesc *desc, ANeuralNetworksMemory **memory)
int
Creates a memory object from a memory descriptor.
ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset, ANeuralNetworksMemory **memory)
int
Creates a shared memory object from a file descriptor.
ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory)
void
Delete a memory object.
ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model, const ANeuralNetworksOperandType *type)
int
Add an operand to a model.
ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs)
int
Add an operation to a model.
ANeuralNetworksModel_create(ANeuralNetworksModel **model)
int
Create an empty ANeuralNetworksModel.
ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
int
Indicate that we have finished modifying a model.
ANeuralNetworksModel_free(ANeuralNetworksModel *model)
void
Destroy a model.
ANeuralNetworksModel_getSupportedOperationsForDevices(const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices, bool *supportedOps)
int
Get the supported operations for a specified set of devices.
ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs)
int
Specifies which operands will be the model's inputs and outputs.
ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model, bool allow)
int
Specifies whether ANEURALNETWORKS_TENSOR_FLOAT32 is allowed to be calculated with range and/or precision as low as that of the IEEE 754 16-bit floating-point format.
ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
int
Sets an operand's per channel quantization parameters.
ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index, const void *buffer, size_t length)
int
Sets an operand to a constant value.
ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset, size_t length)
int
Sets an operand to a value stored in a memory object.
ANeuralNetworksModel_setOperandValueFromModel(ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksModel *value)
int
Sets an operand to a value that is a reference to another NNAPI model.
ANeuralNetworks_getDefaultLoopTimeout()
uint64_t
Get the default timeout value for WHILE loops.
ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice **device)
int
Get the representation of the specified device.
ANeuralNetworks_getDeviceCount(uint32_t *numDevices)
int
Get the number of available devices.
ANeuralNetworks_getMaximumLoopTimeout()
uint64_t
Get the maximum timeout value for WHILE loops.
ANeuralNetworks_getRuntimeFeatureLevel()
int64_t
Get the NNAPI runtime feature level.

Structs

ANeuralNetworksOperandType

ANeuralNetworksOperandType describes the type of an operand.

ANeuralNetworksSymmPerChannelQuantParams

Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand.

Enumerations

Anonymous Enum 53

Declared in android/NeuralNetworksTypes.h
 Anonymous Enum 53

For ANeuralNetworksModel_setOperandValue, values with a length smaller or equal to this will be immediately copied into the model.

The size is in bytes.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES

Anonymous Enum 54

Declared in android/NeuralNetworksTypes.h
 Anonymous Enum 54

For ANeuralNetworksCompilation_setCaching, specify the size of the cache token required from the application.

The size is in bytes.

Available since NNAPI feature level 3.

Properties
ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN

DeviceTypeCode

Declared in android/NeuralNetworksTypes.h
 DeviceTypeCode

Device types.

The type of NNAPI device.

Properties
ANEURALNETWORKS_DEVICE_ACCELERATOR

Dedicated accelerator for Machine Learning workloads.

ANEURALNETWORKS_DEVICE_CPU

The device runs NNAPI models on single or multi-core CPU.

ANEURALNETWORKS_DEVICE_GPU

The device can run NNAPI models and also accelerate graphics APIs such as OpenGL ES and Vulkan.

ANEURALNETWORKS_DEVICE_OTHER

The device does not fall into any category below.

ANEURALNETWORKS_DEVICE_UNKNOWN

The device type cannot be provided.

DurationCode

Declared in android/NeuralNetworksTypes.h
 DurationCode

Different duration measurements.

Durations are measured in nanoseconds.

Available since NNAPI feature level 3.

Properties
ANEURALNETWORKS_DURATION_IN_DRIVER
ANEURALNETWORKS_DURATION_ON_HARDWARE
ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER
ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE

FeatureLevelCode

Declared in android/NeuralNetworksTypes.h
 FeatureLevelCode

NNAPI feature levels.

Each update of the NNAPI specification yields a new NNAPI feature level enum value. NNAPI feature level corrseponds to an NNAPI specification version that a driver and/or the NNAPI runtime can implement.

A feature level up to and including "FEATURE_LEVEL_5" maps directly to the Android API level that introduced the corresponding update of the NNAPI specification. Feature levels after Android API level 31 have no association with API level because the NNAPI specification can be updated between Android API releases. Outputs of ANeuralNetworksDevice_getFeatureLevel and ANeuralNetworks_getRuntimeFeatureLevel must be compared against these enum values instead of the Android API level.

Properties
ANEURALNETWORKS_FEATURE_LEVEL_1

NNAPI specification available in Android O-MR1, Android NNAPI feature level 1.

ANEURALNETWORKS_FEATURE_LEVEL_2

NNAPI specification available in Android P, Android NNAPI feature level 2.

ANEURALNETWORKS_FEATURE_LEVEL_3

NNAPI specification available in Android Q, Android NNAPI feature level 3.

ANEURALNETWORKS_FEATURE_LEVEL_4

NNAPI specification available in Android R, Android NNAPI feature level 4.

ANEURALNETWORKS_FEATURE_LEVEL_5

NNAPI specification available in Android S, Android NNAPI feature level 5.

After Android S, the NNAPI specification can be updated between Android API releases.

ANEURALNETWORKS_FEATURE_LEVEL_6

Android NNAPI feature level 6.

ANEURALNETWORKS_FEATURE_LEVEL_7

Android NNAPI feature level 7.

ANEURALNETWORKS_FEATURE_LEVEL_8

Android NNAPI feature level 8.

FuseCode

Declared in android/NeuralNetworksTypes.h
 FuseCode

Fused activation function types.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_FUSED_NONE

NO fused activation function.

ANEURALNETWORKS_FUSED_RELU

Fused ReLU activation function.

ANEURALNETWORKS_FUSED_RELU1

Fused ReLU1 activation function.

ANEURALNETWORKS_FUSED_RELU6

Fused ReLU6 activation function.

OperandCode

Declared in android/NeuralNetworksTypes.h
 OperandCode

Operand types.

The type of an operand in a model.

Types prefaced with ANEURALNETWORKS_TENSOR_* must be used for tensor data (i.e., tensors with at least one dimension). Types not prefaced by ANEURALNETWORKS_TENSOR_* represent scalar values and must have no dimensions.

Although we define many types, most operators accept just a few types. Most used are ANEURALNETWORKS_TENSOR_FLOAT32, ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, and ANEURALNETWORKS_INT32.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_BOOL

An 8 bit boolean scalar value.

Values of this operand type are either true or false. A zero value represents false; any other value represents true.

Available since NNAPI feature level 3.

ANEURALNETWORKS_FLOAT16

An IEEE 754 16 bit floating point scalar value.

Available since NNAPI feature level 3.

ANEURALNETWORKS_FLOAT32

A 32 bit floating point scalar value.

ANEURALNETWORKS_INT32

A signed 32 bit integer scalar value.

ANEURALNETWORKS_MODEL

A reference to a model.

ANeuralNetworksModel_setOperandValueFromModel must be used to set the value for an Operand of this type.

Available since NNAPI feature level 4.

ANEURALNETWORKS_TENSOR_BOOL8

A tensor of 8 bit boolean values.

Values of this operand type are either true or false. A zero value represents false; any other value represents true.

Available since NNAPI feature level 3.

ANEURALNETWORKS_TENSOR_FLOAT16

A tensor of IEEE 754 16 bit floating point values.

Available since NNAPI feature level 3.

ANEURALNETWORKS_TENSOR_FLOAT32

A tensor of 32 bit floating point values.

ANEURALNETWORKS_TENSOR_INT32

A tensor of 32 bit integer values.

ANEURALNETWORKS_TENSOR_QUANT16_ASYMM

A tensor of 16 bit unsigned integers that represent real numbers.

Attached to this tensor are two numbers that can be used to convert the 16 bit integer to the real value and vice versa. These two numbers are:

  • scale: a 32 bit floating point value greater than zero.
  • zeroPoint: a 32 bit integer, in range [0, 65535].

The formula is: real_value = (integer_value - zeroPoint) * scale.

Available since NNAPI feature level 3.

ANEURALNETWORKS_TENSOR_QUANT16_SYMM

A tensor of 16 bit signed integers that represent real numbers.

Attached to this tensor is a number representing real value scale that is used to convert the 16 bit number to a real value in the following way: realValue = integerValue * scale.

scale is a 32 bit floating point with value greater than zero.

Available since NNAPI feature level 3.

ANEURALNETWORKS_TENSOR_QUANT8_ASYMM

A tensor of 8 bit unsigned integers that represent real numbers.

Attached to this tensor are two numbers that can be used to convert the 8 bit integer to the real value and vice versa. These two numbers are:

  • scale: a 32 bit floating point value greater than zero.
  • zeroPoint: a 32 bit integer, in range [0, 255].

The formula is: real_value = (integer_value - zeroPoint) * scale.

ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED

A tensor of 8 bit signed integers that represent real numbers.

Attached to this tensor are two numbers that can be used to convert the 8 bit integer to the real value and vice versa. These two numbers are:

  • scale: a 32 bit floating point value greater than zero.
  • zeroPoint: a 32 bit integer, in range [-128, 127].

The formula is: real_value = (integer_value - zeroPoint) * scale.

Available since NNAPI feature level 4.

ANEURALNETWORKS_TENSOR_QUANT8_SYMM

A tensor of 8 bit signed integers that represent real numbers.

Attached to this tensor is a number representing real value scale that is used to convert the 8 bit number to a real value in the following way: realValue = integerValue * scale.

scale is a 32 bit floating point with value greater than zero.

Available since NNAPI feature level 3.

ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL

A tensor of 8 bit signed integers that represent real numbers.

This tensor is associated with additional fields that can be used to convert the 8 bit signed integer to the real value and vice versa. These fields are:

  • channelDim: a 32 bit unsigned integer indicating channel dimension.
  • scales: an array of positive 32 bit floating point values. The size of the scales array must be equal to dimensions[channelDim].

ANeuralNetworksModel_setOperandSymmPerChannelQuantParams must be used to set the parameters for an Operand of this type.

The channel dimension of this tensor must not be unknown (dimensions[channelDim] != 0).

The formula is: realValue[..., C, ...] = integerValue[..., C, ...] * scales[C] where C is an index in the Channel dimension.

Available since NNAPI feature level 3.

ANEURALNETWORKS_UINT32

An unsigned 32 bit integer scalar value.

OperationCode

Declared in android/NeuralNetworksTypes.h
 OperationCode

Operation types.

The type of an operation in a model.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_ABS

Computes the absolute value of a tensor, element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_ADD

Adds two tensors, element-wise.

Takes two input tensors of identical OperandCode and compatible dimensions. The output is the sum of both input tensors, optionally modified by an activation function.

Two dimensions are compatible when:

  1. they are equal, or
  2. one of them is 1

The size of the output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

Example:

input1.dimension = {4, 1, 2}
input2.dimension = {5, 4, 3, 1}
output.dimension = {5, 4, 3, 2}

Since NNAPI feature level 3, generic zero-sized input tensor is supported. Zero dimension is only compatible with 0 or 1. The size of the output dimension is zero if either of corresponding input dimension is zero.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_ARGMAX

Returns the index of the largest element along an axis.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor specifying the input. Must be non-empty.
  • 1: An ANEURALNETWORKS_INT32 scalar specifying the axis to reduce across. Negative index is used to specify axis from the end (e.g. -1 for the last axis). Must be in the range [-n, n).

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_ARGMIN

Returns the index of the smallest element along an axis.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor specifying the input. Must be non-empty.
  • 1: An ANEURALNETWORKS_INT32 scalar specifying the axis to reduce across. Negative index is used to specify axis from the end (e.g. -1 for the last axis). Must be in the range [-n, n).

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_AVERAGE_POOL_2D

Performs a 2-D average pooling operation.

The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, channel] =
    sum_{di, dj}(
        input[b, strides[1] * i + di, strides[2] * j + dj, channel]
    ) / sum(1)

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the left, in the ‘width’ dimension.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the right, in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the top, in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the bottom, in the ‘height’ dimension.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 6: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 7: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 8: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 9: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 10: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Inputs (implicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the implicit padding scheme, has to be one of the PaddingCode values.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 6: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 7: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_AXIS_ALIGNED_BBOX_TRANSFORM

Transform axis-aligned bounding box proposals using bounding box deltas.

Given the positions of bounding box proposals and the corresponding bounding box deltas for each class, return the refined bounding box regions. The resulting bounding boxes are cliped against the edges of the image.

Supported tensor OperandCode:

Inputs:

  • 0: A 2-D Tensor of shape [num_rois, 4], specifying the locations of the bounding box proposals, each line with format [x1, y1, x2, y2]. For tensor of type ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, the zeroPoint must be 0 and the scale must be 0.125. Zero num_rois is supported for this tensor.
  • 1: A 2-D Tensor of shape [num_rois, num_classes * 4], specifying the bounding box delta for each region of interest and each class. The bounding box deltas are organized in the following order [dx, dy, dw, dh], where dx and dy is the relative correction factor for the center position of the bounding box with respect to the width and height, dw and dh is the log-scale relative correction factor for the width and height. For input0 of type ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM or ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED. Zero num_rois is supported for this tensor.
  • 2: An 1-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [num_rois], specifying the batch index of each box. Boxes with the same batch index are grouped together. Zero num_rois is supported for this tensor.
  • 3: A 2-D Tensor of shape [batches, 2], specifying the information of each image in the batch, each line with format [image_height, image_width].

Outputs:

  • 0: A tensor of the same OperandCode as input0, with shape [num_rois, num_classes * 4], specifying the coordinates of each output bounding box for each class, with format [x1, y1, x2, y2]. For type of ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, the scale must be 0.125 and the zero point must be 0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_BATCH_MATMUL

Performs multiplication of two tensors in batches.

Multiplies all slices of two input tensors and arranges the individual results in a single output tensor of the same batch size. Each pair of slices in the same batch have identical OperandCode. Each slice can optionally be adjointed (transpose and conjugate) before multiplication.

The two input tensors and the output tensor must be 2-D or higher and have the same batch size.

Supported tensor OperandCode:

Supported tensor rank: at least 2 and up to 4

Inputs:

  • 0: A tensor with 2-D or higher shape [..., r_x, c_x].
  • 1: A tensor with 2-D or higher shape [..., r_y, c_y]. It has the same OperandCode and batch size as input0.
  • 2: An optional ANEURALNETWORKS_BOOL scalar adj_x, default to false. Set to true to adjoint the slices of input0.
  • 3: An optional ANEURALNETWORKS_BOOL scalar adj_y, default to false. Set to true to adjoint the slices of input1.

Outputs:

  • 0: A tensor with 2-D or higher shape [..., r_o, c_o], where r_o = c_x if adj_x else r_x c_o = r_y if adj_y else c_y

Available since NNAPI feature level 6.

ANEURALNETWORKS_BATCH_TO_SPACE_ND

BatchToSpace for N-dimensional tensors.

This operation reshapes the batch dimension (dimension 0) into M + 1 dimensions of shape block_shape + [batch], interleaves these blocks back into the grid defined by the spatial dimensions [1, ..., M], to obtain a result with the same rank as the input.

This is the reverse of SpaceToBatch.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Inputs:

  • 0: An n-D tensor, specifying the tensor to be reshaped
  • 1: A 1-D Tensor of ANEURALNETWORKS_TENSOR_INT32, the block sizes for each spatial dimension of the input tensor. All values must be >= 1.
  • 2: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since API level 29.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM

A recurrent neural network layer that applies an LSTM cell to a sequence of inputs in forward and backward directions.

The op supports cross-linking via an auxiliary input. Regular cell feeds one input into the two RNN cells in the following way:

  INPUT  (INPUT_REVERSED)
    |         |

| FW_LSTM BW_LSTM |

| | FW_OUT BW_OUT

An op with cross-linking takes two inputs and feeds them into the RNN cells in the following way:

  AUX_INPUT   (AUX_INPUT_REVERSED)
      |             |
INPUT | (INPUT_R'D.)|
  |   |       |     |

| \ / \ / |

| FW_LSTM BW_LSTM |

| | FW_OUT BW_OUT

The cross-linking mode is enabled iff auxiliary input and auxiliary weights are present. While stacking this op on top of itself, this allows to connect both forward and backward outputs from previous cell to the next cell's input.

Since NNAPI feature level 4 parallel linking mode is supported. The mode is enabled if auxiliary input is present but auxiliary weights are omitted. In this case, the cell feeds inputs into the RNN in the following way:

  INPUT (AUX_INPUT_REVERSED)
    |         |

| FW_LSTM BW_LSTM |

| | FW_OUT BW_OUT

While stacking this op on top of itself, this allows to connect both forward and backward outputs from previous cell to the next cell's corresponding inputs.

Supported tensor OperandCode:

Supported tensor rank: 3, either time-major or batch-major.

All input and output tensors must be of the same type.

Inputs:

  • 0: The input. A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major: [batch_size, max_time, input_size] where "max_time" is the number of timesteps (sequence length), "batch_size" corresponds to the batching dimension, and "input_size" is the size of the input.
  • 1: The forward input-to-input weights. Optional. A 2-D tensor of shape [fw_num_units, input_size], where “fw_num_units” corresponds to the number of forward cell units.
  • 2: The forward input-to-forget weights. A 2-D tensor of shape [fw_num_units, input_size].
  • 3: The forward input-to-cell weights. A 2-D tensor of shape [fw_num_units, input_size].
  • 4: The forward input-to-output weights. A 2-D tensor of shape [fw_num_units, input_size].
  • 5: The forward recurrent-to-input weights. Optional. A 2-D tensor of shape [fw_num_units, fw_output_size], where “fw_output_size” corresponds to either the number of cell units (i.e., fw_num_units), or the second dimension of the “fw_projection_weights”, if defined.
  • 6: The forward recurrent-to-forget weights. A 2-D tensor of shape [fw_num_units, fw_output_size].
  • 7: The forward recurrent-to-cell weights. A 2-D tensor of shape [fw_num_units, fw_output_size].
  • 8: The forward recurrent-to-output weights. A 2-D tensor of shape [fw_num_units, fw_output_size].
  • 9: The forward cell-to-input weights. Optional. A 1-D tensor of shape [fw_num_units].
  • 10: The forward cell-to-forget weights. Optional. A 1-D tensor of shape [fw_num_units].
  • 11: The forward cell-to-output weights. Optional. A 1-D tensor of shape [fw_num_units].
  • 12: The forward input gate bias. Optional. A 1-D tensor of shape [fw_num_units].
  • 13: The forward forget gate bias. A 1-D tensor of shape [fw_num_units].
  • 14: The forward cell gate bias. A 1-D tensor of shape [fw_num_units].
  • 15: The forward output gate bias. A 1-D tensor of shape [fw_num_units].
  • 16: The forward projection weights. Optional. A 2-D tensor of shape [fw_output_size, fw_num_units].
  • 17: The forward projection bias. Optional. A 1-D tensor of shape [fw_output_size].
  • 18: The backward input-to-input weights. Optional. A 2-D tensor of shape [bw_num_units, input_size], where “bw_num_units” corresponds to the number of backward cell units.
  • 19: The backward input-to-forget weights. A 2-D tensor of shape [bw_num_units, input_size].
  • 20: The backward input-to-cell weights. A 2-D tensor of shape [bw_num_units, input_size].
  • 21: The backward input-to-output weights. A 2-D tensor of shape [bw_num_units, input_size].
  • 22: The backward recurrent-to-input weights. Optional. A 2-D tensor of shape [bw_num_units, bw_output_size], where “bw_output_size” corresponds to either the number of cell units (i.e., “bw_num_units”), or the second dimension of the “bw_projection_weights”, if defined.
  • 23: The backward recurrent-to-forget weights. A 2-D tensor of shape [bw_num_units, bw_output_size].
  • 24: The backward recurrent-to-cell weights. A 2-D tensor of shape [bw_num_units, bw_output_size].
  • 25: The backward recurrent-to-output weights. A 2-D tensor of shape [bw_num_units, bw_output_size].
  • 26: The backward cell-to-input weights. Optional. A 1-D tensor of shape [bw_num_units].
  • 27: The backward cell-to-forget weights. Optional. A 1-D tensor of shape [bw_num_units].
  • 28: The backward cell-to-output weights. Optional. A 1-D tensor of shape [bw_num_units].
  • 29: The backward input gate bias. Optional. A 1-D tensor of shape [bw_num_units].
  • 30: The backward forget gate bias. A 1-D tensor of shape [bw_num_units].
  • 31: The backward cell gate bias. A 1-D tensor of shape [bw_num_units].
  • 32: The backward output gate bias. A 1-D tensor of shape [bw_num_units].
  • 33: The backward projection weights. Optional. A 2-D tensor of shape [bw_output_size, bw_num_units].
  • 34: The backward projection bias. Optional. A 1-D tensor of shape [bw_output_size].
  • 35: The forward input activation state. A 2-D tensor of shape [batch_size, bw_output_size].
  • 36: The forward input cell state. A 2-D tensor of shape [batch_size, bw_num_units].
  • 37: The backward input activation state. A 2-D tensor of shape [batch_size, bw_output_size].
  • 38: The backward input cell state. A 2-D tensor of shape [batch_size, bw_num_units].
  • 39: The auxiliary input. Optional. A 3-D tensor of shape [max_time, batch_size, aux_input_size], where “batch_size” corresponds to the batching dimension, and “aux_input_size” is the size of the auxiliary input. Optional. See the docs above for the usage modes explanation.
  • 40: The forward auxiliary input-to-input weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [fw_num_units, aux_input_size].
  • 41: The forward auxiliary input-to-forget weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [fw_num_units, aux_input_size].
  • 42: The forward auxiliary input-to-cell weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [fw_num_units, aux_input_size].
  • 43: The forward auxiliary input-to-output weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [fw_num_units, aux_input_size].
  • 44: The backward auxiliary input-to-input weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [bw_num_units, aux_input_size].
  • 45: The backward auxiliary input-to-forget weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [bw_num_units, aux_input_size].
  • 46: The backward auxiliary input-to-cell weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [bw_num_units, aux_input_size].
  • 47: The backward auxiliary input-to-output weights. Optional. See the docs above for the usage modes explanation. A 2-D tensor of shape [bw_num_units, aux_input_size].
  • 48: The activation function. A value indicating the activation function:
    • 0: None;
    • 1: Relu;
    • 3: Relu6;
    • 4: Tanh;
    • 6: Sigmoid.
  • 49: The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. If all the input tensors have type ANEURALNETWORKS_TENSOR_FLOAT32, this scalar must be of the type ANEURALNETWORKS_FLOAT32, otherwise if all the input tensors have the type ANEURALNETWORKS_TENSOR_FLOAT16, this scalar must be of type ANEURALNETWORKS_FLOAT16.
  • 50: The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. If all the input tensors have type ANEURALNETWORKS_TENSOR_FLOAT32, this scalar must be of the type ANEURALNETWORKS_FLOAT32, otherwise if all the input tensors have the type ANEURALNETWORKS_TENSOR_FLOAT16, this scalar must be of type ANEURALNETWORKS_FLOAT16.
  • 51: merge_outputs An ANEURALNETWORKS_BOOL scalar specifying if the outputs from forward and backward cells should be merged.
  • 52: time_major An ANEURALNETWORKS_BOOL scalar specifying the shape format of input and output tensors.
  • 53: The forward input layer normalization weights. Optional. A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs to activation at input gate.
  • 54: The forward forget layer normalization weights. Optional. A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs to activation at forget gate.
  • 55: The forward cell layer normalization weights. Optional. A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs to activation at cell gate.
  • 56: The forward output layer normalization weights. Optional. A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs to activation at output gate.
  • 57: The backward input layer normalization weights. Optional. A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs to activation at input gate.
  • 58: The backward forget layer normalization weights. Optional. A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs to activation at forget gate.
  • 59: The backward cell layer normalization weights. Optional. A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs to activation at cell gate.
  • 60: The backward output layer normalization weights. Optional. A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs to activation at output gate.

Outputs:

  • 0: The forward output. A 3-D tensor of shape: If time-major and not merge_outputs: [max_time, batch_size, fw_output_size] If time-major and merge_outputs: [max_time, batch_size, fw_output_size + bw_output_size] If batch-major and not merge_outputs: [batch_size, max_time, fw_output_size] If batch-major and merge_outputs: [batch_size, max_time, fw_output_size + bw_output_size]
  • 1: The backward output. Unused if merge_outputs is true. A 3-D tensor of shape: If time-major: [max_time, batch_size, bw_output_size] If batch-major: [batch_size, max_time, bw_output_size]
  • 2: The forward activation state output. A 2-D tensor of shape [batch_size, fw_output_size] containing an activation state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then outputs 3-5 must be present as well. Available since NNAPI feature level 4.
  • 3: The forward cell state output. A tensor of shape [batch_size, fw_cell_size] containing a cell state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then outputs 2, 4, 5 must be present as well. Available since NNAPI feature level 4.
  • 4: The backward activation state output. A 2-D tensor of shape [batch_size, bw_output_size] containing an activation state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then outputs 2, 3, 5 must be present as well. Available since NNAPI feature level 4.
  • 5: The backward cell state output. A tensor of shape [batch_size, bw_cell_size] containing a cell state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then outputs 2-4 must be present as well. Available since NNAPI feature level 4.

Available since NNAPI feature level 3.

Important: As of NNAPI feature level 3, there is no way to get the output state tensors out and NNAPI does not maintain internal states. This operator does not support the usage pattern in which multiple cells are chained and state tensors are propagated.

ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_RNN

A recurrent neural network layer that applies a basic RNN cell to a sequence of inputs in forward and backward directions.

This Op unrolls the input along the sequence dimension, and implements the following operation for each element in the sequence s = 1...sequence_length: fw_outputs[s] = fw_state = activation(inputs[s] * fw_input_weights’ + fw_state * fw_recurrent_weights’ + fw_bias)

And for each element in sequence t = sequence_length : 1 bw_outputs[t] = bw_state = activation(inputs[t] * bw_input_weights’ + bw_state * bw_recurrent_weights’ + bw_bias)

Where:

  • “{fw,bw}_input_weights” is a weight matrix that multiplies the inputs;
  • “{fw,bw}_recurrent_weights” is a weight matrix that multiplies the current “state” which itself is the output from the previous time step computation;
  • “{fw,bw}_bias” is a bias vector (added to each output vector in the batch);
  • “activation” is the function passed as the “fused_activation_function” argument (if not “NONE”).

The op supports cross-linking via an auxiliary input. Regular cell feeds one input into the two RNN cells in the following way:

  INPUT  (INPUT_REVERSED)
    |         |

| FW_RNN BW_RNN |

| | FW_OUT BW_OUT

An op with cross-linking takes two inputs and feeds them into the RNN cells in the following way:

  AUX_INPUT   (AUX_INPUT_REVERSED)
      |             |
INPUT | (INPUT_R'D.)|
  |   |       |     |

| \ / \ / |

| FW_RNN BW_RNN |

| | FW_OUT BW_OUT

The cross-linking mode is enabled iff auxiliary input and auxiliary weights are present. While stacking this op on top of itself, this allows to connect both forward and backward outputs from previous cell to the next cell's input.

Since NNAPI feature level 4 parallel linking mode is supported. The mode is enabled if auxiliary input is present but auxiliary weights are omitted. In this case, the cell feeds inputs into the RNN in the following way:

  INPUT (AUX_INPUT_REVERSED)
    |         |

| FW_RNN BW_RNN |

| | FW_OUT BW_OUT

While stacking this op on top of itself, this allows to connect both forward and backward outputs from previous cell to the next cell's corresponding inputs.

Supported tensor OperandCode:

The input tensors must all be the same type.

Inputs:

  • 0: input. A 3-D tensor. The shape is defined by the input 6 (timeMajor). If it is set to true, then the input has a shape [maxTime, batchSize, inputSize], otherwise the input has a shape [batchSize, maxTime, inputSize].
  • 1: fwWeights. A 2-D tensor of shape [fwNumUnits, inputSize].
  • 2: fwRecurrentWeights. A 2-D tensor of shape [fwNumUnits, fwNumUnits].
  • 3: fwBias. A 1-D tensor of shape [fwNumUnits].
  • 4: fwHiddenState. A 2-D tensor of shape [batchSize, fwNumUnits]. Specifies a hidden state input for the first time step of the computation.
  • 5: bwWeights. A 2-D tensor of shape [bwNumUnits, inputSize].
  • 6: bwRecurrentWeights. A 2-D tensor of shape [bwNumUnits, bwNumUnits].
  • 7: bwBias. A 1-D tensor of shape [bwNumUnits].
  • 8: bwHiddenState A 2-D tensor of shape [batchSize, bwNumUnits]. Specifies a hidden state input for the first time step of the computation.
  • 9: auxInput. A 3-D tensor. The shape is defined by the input 6 (timeMajor). If it is set to true, then the input has a shape [maxTime, batchSize, auxInputSize], otherwise the input has a shape [batchSize, maxTime, auxInputSize]. Can be omitted. See the docs above for the usage modes explanation.
  • 10:fwAuxWeights. A 2-D tensor of shape [fwNumUnits, auxInputSize]. Can be omitted. See the docs above for the usage modes explanation.
  • 11:bwAuxWeights. A 2-D tensor of shape [bwNumUnits, auxInputSize]. Can be omitted. See the docs above for the usage modes explanation.
  • 12:fusedActivationFunction. A FuseCode value indicating the activation function. If “NONE” is specified then it results in a linear activation.
  • 13:timeMajor An ANEURALNETWORKS_BOOL scalar specifying the shape format of input and output tensors.
  • 14:mergeOutputs An ANEURALNETWORKS_BOOL scalar specifying if the outputs from forward and backward cells are separate (if set to false) or concatenated (if set to true). Outputs:
  • 0: fwOutput. A 3-D tensor. The first two dimensions of the shape are defined by the input 6 (timeMajor) and the third dimension is defined by the input 14 (mergeOutputs). If timeMajor is set to true, then the first two dimensions are [maxTime, batchSize], otherwise they are set to [batchSize, maxTime]. If mergeOutputs is set to true, then the third dimension is equal to (fwNumUnits + bwNumUnits), otherwise it is set to fwNumUnits.
  • 1: bwOutput. A 3-D tensor. If the input 14 (mergeOutputs) is set to true, then this tensor is not produced. The shape is defined by the input 6 (timeMajor). If it is set to true, then the shape is set to [maxTime, batchSize, bwNumUnits], otherwise the shape is set to [batchSize, maxTime, bwNumUnits].
  • 2: The forward hidden state output. A 2-D tensor of shape [batchSize, fwNumUnits] containing a hidden state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then output 3 must be present as well. Available since NNAPI feature level 4.
  • 3: The backward hidden state output. A 2-D tensor of shape [batchSize, bwNumUnits] containing a hidden state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then output 2 must be present as well. Available since NNAPI feature level 4.

Available since NNAPI feature level 3.

Important: As of NNAPI feature level 3, there is no way to get the output state tensors out and NNAPI does not maintain internal states. This operator does not support the usage pattern in which multiple cells are chained and state tensors are propagated.

ANEURALNETWORKS_BOX_WITH_NMS_LIMIT

Greedily selects a subset of bounding boxes in descending order of score.

This op applies NMS algorithm to each class. In each loop of execution, the box with maximum score gets selected and removed from the pending set. The scores of the rest of boxes are lowered according to the intersection-over-union (IOU) overlapping with the previously selected boxes and a specified NMS kernel method. Any boxes with score less than a threshold are removed from the pending set.

Three NMS kernels are supported:

  • Hard: score_new = score_old * (1 if IoU < threshold else 0)
  • Linear: score_new = score_old * (1 if IoU < threshold else 1 - IoU)
  • Gaussian: score_new = score_old * exp(- IoU^2 / sigma)

Axis-aligned bounding boxes are represented by its upper-left corner coordinate (x1,y1) and lower-right corner coordinate (x2,y2). A valid bounding box should satisfy x1 <= x2 and y1 <= y2.

Supported tensor OperandCode:

Inputs:

  • 0: A 2-D Tensor of shape [num_rois, num_classes], specifying the score of each bounding box proposal. The boxes are grouped by batches in the first dimension. Zero num_rois is supported for this tensor.
  • 1: A 2-D Tensor specifying the bounding boxes of shape [num_rois, num_classes * 4], organized in the order [x1, y1, x2, y2]. The boxes are grouped by batches in the first dimension. The sequential order of the boxes corresponds with input0. For input0 of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, with zeroPoint of 0 and scale of 0.125. For input0 of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, with zeroPoint of -128 and scale of 0.125. Zero num_rois is supported for this tensor.
  • 2: A 1-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [num_rois], specifying the batch index of each box. Boxes with the same batch index are grouped together.
  • 3: An ANEURALNETWORKS_FLOAT32 scalar, score_threshold. Boxes with scores lower than the threshold are filtered before sending to the NMS algorithm.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the maximum number of selected bounding boxes for each image. Set to a negative value for unlimited number of output bounding boxes.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the NMS kernel method, options are 0:hard, 1:linear, 2:gaussian.
  • 6: An ANEURALNETWORKS_FLOAT32 scalar, specifying the IoU threshold in hard and linear NMS kernel. This field is ignored if gaussian kernel is selected.
  • 7: An ANEURALNETWORKS_FLOAT32 scalar, specifying the sigma in gaussian NMS kernel. This field is ignored if gaussian kernel is not selected.
  • 8: An ANEURALNETWORKS_FLOAT32 scalar, nms_score_threshold. Boxes with scores lower than the threshold are dropped during the score updating phase in soft NMS.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_CAST

Casts a tensor to a type.

This operation ignores the scale and zeroPoint of quanized tensors, e.g. it treats a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM input as a tensor of uint8 values.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: A tensor.

Outputs:

  • 0: A tensor with the same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_CHANNEL_SHUFFLE

Shuffle the channels of the input tensor.

Given an input tensor and a integer value of num_groups, CHANNEL_SHUFFLE divide the channel dimension into num_groups groups, and reorganize the channels by grouping channels with the same index in each group.

Along the channel dimension, the output is calculated using this formula:

output_channel[k * num_groups + g] = input_channel[g * group_size + k]

where group_size = num_channels / num_groups

The number of channels must be divisible by num_groups.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: An n-D tensor, specifying the tensor to be shuffled.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the number of groups.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the dimension channel shuffle would be performed on. Negative index is used to specify axis from the end (e.g. -1 for the last axis). Must be in the range [-n, n).

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_CONCATENATION

Concatenates the input tensors along the given dimension.

The input tensors must have identical OperandCode and the same dimensions except the dimension along the concatenation axis.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: The output, a tensor of the same OperandCode as the input tensors. The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. Since NNAPI feature level 3, for a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM tensor, the scale and zeroPoint values can be different from input tensors. Before NNAPI feature level 3 they have to be the same as for the input tensors.

Available since NNAPI feature level 1.

ANEURALNETWORKS_CONV_2D

Performs a 2-D convolution operation.

The CONV_2D op sweeps a 2-D filter that can mix channels together over a batch of images, applying the filter to each window of each image of the appropriate size.

The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, channel] =
    sum_{di, dj, k} (
        input[b, strides[1] * i + di, strides[2] * j + dj, k] *
        filter[channel, di, dj, k]
    ) + bias[channel]

Supported tensor OperandCode configurations:

Available since NNAPI feature level 3:

Available since NNAPI feature level 4:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

Inputs (implicit padding):

Outputs:

  • 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. Before NNAPI feature level 3, for output tensor of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, the following condition must be satisfied: output_scale > input_scale * filter_scale

Available since NNAPI feature level 1.

ANEURALNETWORKS_DEPTHWISE_CONV_2D

Performs a depthwise 2-D convolution operation.

Given an input tensor of shape [batches, height, width, depth_in] and a filter tensor of shape [1, filter_height, filter_width, depth_out] containing depth_out convolutional filters of depth 1, DEPTHWISE_CONV applies a different filter to each input channel (expanding from 1 channel to channel_multiplier channels for each), then concatenates the results together.

The output has depth_out = depth_in * depth_multiplier channels. The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, k * channel_multiplier + q] =
    sum_{di, dj} (
        input[b, strides[1] * i + di, strides[2] * j + dj, k] *
        filter[1, di, dj, k * channel_multiplier + q]
    ) + bias[k * channel_multiplier + q]

Supported tensor OperandCode configurations:

Available since NNAPI feature level 3:

Available since NNAPI feature level 4:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

Inputs (implicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
  • 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], specifying the filter.
  • 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input tensor of type ANEURALNETWORKS_TENSOR_FLOAT32 or ANEURALNETWORKS_TENSOR_FLOAT16 the bias must be of the same type. For filter tensor of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, the bias should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 and bias_scale == input_scale * filter_scale. For filter tensor of ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL, the bias should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 and bias_scale of 0. The actual scale of each value 'i' is equal to bias_scale[i] = input_scale * filter_scale[i].
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the implicit padding scheme, has to be one of the PaddingCode values.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 6: An ANEURALNETWORKS_INT32 scalar, specifying the depthwise multiplier.
  • 7: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 8: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.
  • 9: An optional ANEURALNETWORKS_INT32 scalar, specifying the dilation factor for width. Defaults to 1. If set to k > 1, there will be k-1 skipped cells between each filter element on width dimension. If this input is set, input 10 (dilation factor for height) must be specified as well. Available since NNAPI feature level 3.
  • 10: An optional ANEURALNETWORKS_INT32 scalar, specifying the dilation factor for height. Defaults to 1. If set to k > 1, there will be k-1 skipped cells between each filter element on height dimension. If this input is set, input 9 (dilation factor for width) must be specified as well. Available since NNAPI feature level 3.

Outputs:

  • 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. Before NNAPI feature level 3, for output tensor of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, the following condition must be satisfied: output_scale > input_scale * filter_scale

Available since NNAPI feature level 1.

ANEURALNETWORKS_DEPTH_TO_SPACE

Rearranges data from depth into blocks of spatial data.

More specifically, this op outputs a copy of the input tensor where values from the depth dimension are moved in spatial blocks to the height and width dimensions. The value block_size indicates the input block size and how the data is moved.

Chunks of data of size block_size * block_size from depth are rearranged into non-overlapping blocks of size block_size x block_size.

The width of the output tensor is input_depth * block_size, whereas the height is input_height * block_size. The depth of the input tensor must be divisible by block_size * block_size

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Inputs:

  • 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the block_size. block_size must be >=1 and block_size * block_size must be a divisor of the input depth.
  • 2: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_DEQUANTIZE

Dequantizes the input tensor.

The formula is:

output = (input - zeroPoint) * scale.

Supported input tensor OperandCode:

Supported output tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: A tensor. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

  • 0: A tensor with the same shape as input0.

Available since NNAPI feature level 1.

ANEURALNETWORKS_DETECTION_POSTPROCESSING

Apply postprocessing steps to bounding box detections.

Bounding box detections are generated by applying transformation on a set of predefined anchors with the bounding box deltas from bounding box regression. A final step of hard NMS is applied to limit the number of returned boxes.

Supported tensor OperandCode:

Inputs:

  • 0: A 3-D Tensor of shape [batches, num_anchors, num_classes], specifying the score of each anchor with each class. Class 0 for each [batches, num_anchors, 0] is background and will be ignored.
  • 1: A 3-D Tensor of shape [batches, num_anchors, length_box_encoding], with the first four values in length_box_encoding specifying the bounding box deltas. The box deltas are encoded in the order of [dy, dx, dh, dw], where dy and dx is the linear-scale relative correction factor for the center position of the bounding box with respect to the width and height, dh and dw is the log-scale relative correction factor for the width and height. All the entries in length_box_encoding beyond the first four values are ignored in this operation.
  • 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each predefined anchor, with format [ctr_y, ctr_x, h, w], where ctr_y and ctr_x are the center position of the box, and h and w are the height and the width.
  • 3: An ANEURALNETWORKS_FLOAT32 scalar, specifying the scaling factor for dy in bounding box deltas.
  • 4: An ANEURALNETWORKS_FLOAT32 scalar, specifying the scaling factor for dx in bounding box deltas.
  • 5: An ANEURALNETWORKS_FLOAT32 scalar, specifying the scaling factor for dh in bounding box deltas.
  • 6: An ANEURALNETWORKS_FLOAT32 scalar, specifying the scaling factor for dw in bounding box deltas.
  • 7: An ANEURALNETWORKS_BOOL scalar, set to true to use regular multi-class NMS algorithm that do NMS separately for each class, set to false for a faster algorithm that only do one single NMS using the highest class score..
  • 8: An ANEURALNETWORKS_INT32 scalar, max_num_detections, specifying the maximum number of boxes for the output. Boxes with the lowest scores are discarded to meet the limit.
  • 9: An ANEURALNETWORKS_INT32 scalar, only used when input7 is set to false, specifying the maximum number of classes per detection.
  • 10: An ANEURALNETWORKS_INT32 scalar, only used when input7 is set to true, specifying the maximum number of detections when applying NMS algorithm for each single class.
  • 11: A scalar, score_threshold. Boxes with scores lower than the threshold are filtered before sending to the NMS algorithm. The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT32.
  • 12: A scalar, specifying the IoU threshold for hard NMS. The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT32.
  • 13: An ANEURALNETWORKS_BOOL scalar, set to true to include background class in the list of label map for the output, set to false to not include the background. When the background class is included, it has label 0 and the output classes start at 1 in the label map, otherwise, the output classes start at 0.

Outputs:

  • 0: A 2-D tensor of the same OperandCode as input0, with shape [batches, max_num_detections], specifying the score of each output detections.
  • 1: A 3-D tensor of shape [batches, max_num_detections, 4], specifying the coordinates of each output bounding box, with format [y1, x1, y2, x2].
  • 2: A 2-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [batches, max_num_detections], specifying the class label for each output detection.
  • 3: An 1-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [batches], specifying the number of valid output detections for each batch.

Available since NNAPI feature level 3.

ANEURALNETWORKS_DIV

Element-wise division of two tensors.

Takes two input tensors of identical OperandCode and compatible dimensions. The output is the result of dividing the first input tensor by the second, optionally modified by an activation function.

For inputs of ANEURALNETWORKS_TENSOR_INT32, performs "floor division" ("//" in Python). For example, 5 // 2 = 2 -5 // 2 = -3

Two dimensions are compatible when:

  1. they are equal, or
  2. one of them is 1

The size of the output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

Example: input1.dimension = {4, 1, 2} input2.dimension = {5, 4, 3, 1} output.dimension = {5, 4, 3, 2}

Since NNAPI feature level 3, generic zero-sized input tensor is supported. Zero dimension is only compatible with 0 or 1. The size of the output dimension is zero if either of corresponding input dimension is zero.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_ELU

Computes exponential linear activation on the input tensor element-wise.

The output is calculated using the following formula:

ELU(x) = max(0, x) + min(0, alpha * (exp(x) - 1))

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

Outputs:

  • 0: The output tensor of same shape and type as input0.

Available since NNAPI feature level 4.

ANEURALNETWORKS_EMBEDDING_LOOKUP

Looks up sub-tensors in the input tensor.

This operator takes for input a tensor of values (Values) and a one-dimensional tensor of selection indices (Lookups). The output tensor is the concatenation of sub-tensors of Values as selected by Lookups.

Think of Values as being sliced along its first dimension: The entries in Lookups select which slices are concatenated together to create the output tensor.

For example, if Values has shape of [40, 200, 300] and Lookups has shape of [3], all three values found in Lookups are expected to be between 0 and 39. The resulting tensor must have shape of [3, 200, 300].

If a value in Lookups is out of bounds, the operation must fail and an error must be reported.

Supported value tensor OperandCode:

Supported value tensor rank: from 2

Inputs:

  • 0: Lookups. A 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The values are indices into the first dimension of Values.
  • 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are extracted.

Output:

Available since NNAPI feature level 1.

ANEURALNETWORKS_EQUAL

For input tensors x and y, computes x == y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_EXP

Computes exponential of x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_EXPAND_DIMS

Inserts a dimension of 1 into a tensor's shape.

Given a tensor input, this operation inserts a dimension of 1 at the given dimension index of input's shape. The dimension index starts at zero; if you specify a negative dimension index, it is counted backward from the end.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar specifying the dimension index to expand. Must be in the range [-(n + 1), (n + 1)).

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_FILL

Creates a tensor filled with a scalar value.

Supported output tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

Outputs:

  • 0: The output tensor.

Available since NNAPI feature level 4.

ANEURALNETWORKS_FLOOR

Computes element-wise floor() on the input tensor.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor, of the same OperandCode and dimensions as the input tensor.

Available since NNAPI feature level 1.

ANEURALNETWORKS_FULLY_CONNECTED

Denotes a fully (densely) connected layer, which connects all elements in the input tensor with each element in the output tensor.

This layer implements the operation:

outputs = activation(inputs * weights’ + bias)

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor of at least rank 2, specifying the input. If rank is greater than 2, then it gets flattened to a 2-D Tensor. The (flattened) 2-D Tensor is reshaped (if necessary) to [batch_size, input_size], where "input_size" corresponds to the number of inputs to the layer, matching the second dimension of weights, and "batch_size" is calculated by dividing the number of elements by "input_size". Since NNAPI feature level 3, zero batch_size is supported for this tensor.
  • 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where "num_units" corresponds to the number of output nodes.
  • 2: A 1-D tensor, of shape [num_units], specifying the bias. For input tensor of ANEURALNETWORKS_TENSOR_FLOAT32, the bias should also be of ANEURALNETWORKS_TENSOR_FLOAT32. For input tensor of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, the bias should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
  • 3: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.

Outputs:

  • 0: The output tensor, of shape [batch_size, num_units]. Before NNAPI feature level 3, for output tensor of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, the following condition must be satisfied: output_scale > input_scale * filter_scale.

Available since NNAPI feature level 1.

ANEURALNETWORKS_GATHER

Gathers values along an axis.

Produces an output tensor with shape input0.dimension[:axis] + indices.dimension + input0.dimension[axis + 1:] where:

Vector indices (output is rank(input0)).

output[a_0, ..., a_n, i, b_0, ..., b_n] = input0[a_0, ..., a_n, indices[i], b_0, ..., b_n]

Higher rank indices (output is rank(input0) + rank(indices) - 1).

output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] = input0[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor from which to gather values.
  • 1: An ANEURALNETWORKS_INT32 scalar specifying the axis. Negative index is used to specify axis from the end (e.g. -1 for the last axis). Must be in the range [-n, n).
  • 2: A k-D tensor ANEURALNETWORKS_TENSOR_INT32 of indices. The values must be in the bounds of the corresponding dimensions of input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_GENERATE_PROPOSALS

Generate aixs-aligned bounding box proposals.

Bounding box proposals are generated by applying transformation on a set of predefined anchors with the bounding box deltas from bounding box regression. A final step of hard NMS is applied to limit the number of returned boxes.

Axis-aligned bounding boxes are represented by its upper-left corner coordinate (x1,y1) and lower-right corner coordinate (x2,y2). A valid bounding box should satisfy x1 <= x2 and y1 <= y2.

Supported tensor OperandCode:

Inputs:

  • 0: A 4-D Tensor specifying the score of each anchor at each location. With "NHWC" data layout, the tensor shape is [batches, height, width, num_anchors]. With "NCHW" data layout, the tensor shape is [batches, num_anchors, height, width].
  • 1: A 4-D Tensor specifying the bounding box deltas. With "NHWC" data layout, the tensor shape is [batches, height, width, num_anchors * 4]. With "NCHW" data layout, the tensor shape is [batches, num_anchors * 4, height, width]. The box deltas are encoded in the order of [dx, dy, dw, dh], where dx and dy is the linear-scale relative correction factor for the center position of the bounding box with respect to the width and height, dw and dh is the log-scale relative correction factor for the width and height. The last dimensions is the channel dimension.
  • 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each predefined anchor, with format [x1, y1, x2, y2]. For input0 of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM or ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT16_SYMM, with scale of 0.125.
  • 3: A 2-D Tensor of shape [batches, 2], specifying the size of each image in the batch, with format [image_height, image_width]. For input0 of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM or ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT16_SYMM, with scale of 0.125.
  • 4: An ANEURALNETWORKS_FLOAT32 scalar, specifying the ratio from the height of original image to the height of feature map.
  • 5: An ANEURALNETWORKS_FLOAT32 scalar, specifying the ratio from the width of original image to the width of feature map.
  • 6: An ANEURALNETWORKS_INT32 scalar, specifying the maximum number of boxes before going into the hard NMS algorithm. Boxes with the lowest scores are discarded to meet the limit. Set to a non-positive value for unlimited number.
  • 7: An ANEURALNETWORKS_INT32 scalar, specifying the maximum number of boxes returning from the hard NMS algorithm. Boxes with the lowest scores are discarded to meet the limit. Set to a non-positive value for unlimited number.
  • 8: An ANEURALNETWORKS_FLOAT32 scalar, specifying the IoU threshold for hard NMS.
  • 9: An ANEURALNETWORKS_FLOAT32 scalar, min_size. Boxes with height or width lower than the absolute threshold are filtered out.
  • 10: An ANEURALNETWORKS_BOOL scalar, set to true to specify NCHW data layout for input0 and input1. Set to false for NHWC.

Outputs:

  • 0: A tensor of the same OperandCode as input0, of shape [num_output_rois], specifying the score of each output box. The boxes are grouped by batches, but the sequential order in each batch is not guaranteed. For type of ANEURALNETWORKS_TENSOR_QUANT8_ASYMM or ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, the scale and zero point must be the same as input0.
  • 1: A tensor of the same OperandCode as input3, of shape [num_output_rois, 4], specifying the coordinates of each output bounding box for each class, with format [x1, y1, x2, y2]. The sequential order of the boxes corresponds with output0. For type of ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, the scale must be 0.125 and the zero point must be 0.
  • 2: A 1-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [num_output_rois], specifying the batch index of each box. Boxes with the same batch index are grouped together.

Available since NNAPI feature level 3.

ANEURALNETWORKS_GREATER

For input tensors x and y, computes x > y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_GREATER_EQUAL

For input tensors x and y, computes x >= y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_GROUPED_CONV_2D

Performs a grouped 2-D convolution operation.

Given an input tensor of shape [batches, height, width, depth_in] and a filter tensor of shape [depth_out, filter_height, filter_width, depth_group] containing depth_out convolutional filters of depth depth_group, GROUPED_CONV applies a group of different filters to each input channel group, then concatenates the results together.

Specifically, the input channels are divided into num_groups groups, each with depth depth_group, i.e. depth_in = num_groups * depth_group. The convolutional filters are also divided into num_groups groups, i.e. depth_out is divisible by num_groups. GROUPED_CONV applies each group of filters to the corresponding input channel group, and the result are concatenated together.

The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, g * channel_multiplier + q] =
    sum_{di, dj, dk} (
        input[b, strides[1] * i + di, strides[2] * j + dj,
              g * depth_group + dk] *
        filter[g * channel_multiplier + q, di, dj, dk]
    ) + bias[channel]

where channel_multiplier = depth_out / num_groups

Supported tensor OperandCode configurations:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

Inputs (implicit padding):

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_HARD_SWISH

Computes hard-swish activation on the input tensor element-wise.

Hard swish activation is introduced in https://arxiv.org/pdf/1905.02244.pdf

The output is calculated using the following formula:

h-swish(x) = x * max(0, min(6, (x + 3))) / 6

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor, specifying the input. May be zero-sized.

Outputs:

  • 0: The output tensor of same shape and type as input0. Scale and zero point of this tensor may be different from the input tensor's parameters.

Available since NNAPI feature level 4.

ANEURALNETWORKS_HASHTABLE_LOOKUP

Looks up sub-tensors in the input tensor using a key-value map.

This operator takes for input a tensor of values (Values), a one-dimensional tensor of selection values (Lookups) and a one-dimensional tensor that maps these values to Values indexes. The output tensor is the concatenation of sub-tensors of Values as selected by Lookups via Keys.

Think of Values as being sliced along its outer-most dimension. The output is a concatenation of selected slices, with one slice for each entry of Lookups. The slice selected is the one at the same index as the Maps entry that matches the value in Lookups.

For a hit, the corresponding sub-tensor of Values is included in the Output tensor. For a miss, the corresponding sub-tensor in Output must have zero values.

For example, if Values has shape of [40, 200, 300], Keys should have a shape of [40]. If Lookups tensor has shape of [3], three slices are being concatenated, so the resulting tensor must have the shape of [3, 200, 300]. If the first entry in Lookups has the value 123456, that value must be located in Keys tensor. If the sixth entry of Keys contains 123456, the sixth slice of Values must be selected. If no entry in Keys has 123456, a slice of zeroes must be concatenated.

Supported value tensor OperandCode:

Supported value tensor rank: from 2

Inputs:

  • 0: Lookups. A 1-D ANEURALNETWORKS_TENSOR_INT32 tensor with shape [ k ].
  • 1: Keys. A 1-D ANEURALNETWORKS_TENSOR_INT32 tensor with shape [ n ]; Keys and Values pair represent a map, i.e., the ith element in Keys (Keys[i]) is the key to select the ith sub-tensor in Values (Values[i]), where 0 <= i <= n-1. Keys tensor MUST be sorted in ascending order.
  • 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension must be n.

Outputs:

  • 0: Output. A tensor with shape [ k …]. For a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM tensor, the scale and zeroPoint must be the same as input2.
  • 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup hits (True) or not (False). Stored as ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with offset 0 and scale 1.0f. A non-zero byte represents True, a hit. A zero indicates otherwise.

Available since NNAPI feature level 1.

ANEURALNETWORKS_HEATMAP_MAX_KEYPOINT

Localize the maximum keypoints from heatmaps.

This operation approximates the accurate maximum keypoint scores and indices after bicubic upscaling by using Taylor expansion up to the quadratic term.

The bounding box is represented by its upper-left corner coordinate (x1,y1) and lower-right corner coordinate (x2,y2) in the original image. A valid bounding box should satisfy x1 <= x2 and y1 <= y2.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_IF

Executes one of the two referenced models as determined by a boolean value.

The inputs and outputs of the two referenced models must agree with the signature of this operation. That is, if the operation has (3 + n) inputs and m outputs, both models must have n inputs and m outputs with the same types, ranks (if specified), dimensions (if specified), scales, zeroPoints, and other operand parameters as the corresponding operation inputs and outputs.

Inputs:

  • 0: A value of type ANEURALNETWORKS_TENSOR_BOOL8 and shape [1] that determines which of the two referenced models to execute. The operand must have fully specified dimensions.
  • 1: A ANEURALNETWORKS_MODEL reference to the model to be executed if the condition is true.
  • 2: A ANEURALNETWORKS_MODEL reference to the model to be executed if the condition is false.
  • 3 ~ (n + 2): Inputs to be passed to the model selected for execution.

Outputs:

  • 0 ~ (m - 1): Outputs produced by the selected model.

Available since NNAPI feature level 4.

ANEURALNETWORKS_INSTANCE_NORMALIZATION

Applies instance normalization to the input tensor.

The values in the output tensor are computed as:

output[b, h, w, c] =
    (input[b, h, w, c] - mean[b, c]) * gamma /
    sqrt(var[b, c] + epsilon) + beta

Where the mean and variance are computed across the spatial dimensions:

mean[b, c] =
    sum_{h, w}(input[b, h, w, c]) / sum(1)

var[b, c] =
    sum_{h, w}(pow(input[b, h, w, c] - mean[b, c], 2)) / sum(1)

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode and same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_L2_NORMALIZATION

Applies L2 normalization along the axis dimension.

The values in the output tensor are computed as:

output[batch, row, col, channel] =
    input[batch, row, col, channel] /
    sqrt(sum_{c} pow(input[batch, row, col, c], 2))

By default the axis dimension is the last dimension of the input tensor.

Supported tensor OperandCode:

Supported tensor rank: up to 4 Tensors with rank less than 4 are only supported since NNAPI feature level 3.

Inputs:

  • 0: An n-D tensor, specifying the tensor to be normalized.
  • 1: An optional ANEURALNETWORKS_INT32 scalar, default to -1, specifying the dimension normalization would be performed on. Negative index is used to specify axis from the end (e.g. -1 for the last axis). Must be in the range [-n, n). Available since NNAPI feature level 3.

Outputs:

  • 0: A tensor of the same OperandCode and same shape as input0. For ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, the scale must be 1.f / 128 and the zeroPoint must be 128. For ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, the scale must be 1.f / 128 and the zeroPoint must be 0.NOTE: Before NNAPI feature level 4, if the elements along an axis are all zeros, the result is undefined. Since NNAPI feature level 4, if the elements along an axis are all zeros, the result is logical zero.

Available since NNAPI feature level 1.

ANEURALNETWORKS_L2_POOL_2D

Performs an 2-D L2 pooling operation.

The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, c] =
    sqrt(sum_{di, dj} pow(input[b, strides[1] * i + di, strides[2] * j + dj, c], 2) /
         sum(1))

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the left, in the ‘width’ dimension.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the right, in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the top, in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the bottom, in the ‘height’ dimension.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 6: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 7: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 8: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 9: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 10: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Inputs (implicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the implicit padding scheme, has to be one of the PaddingCode values.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 6: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 7: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

  • 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].

Available since NNAPI feature level 1.

ANEURALNETWORKS_LESS

For input tensors x and y, computes x < y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_LESS_EQUAL

For input tensors x and y, computes x <= y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION

Applies Local Response Normalization along the depth dimension.

The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last dimension), and each vector is normalized independently. Within a given vector, each component is divided by the weighted, squared sum of inputs within depth_radius.

The output is calculated using this formula:

sqr_sum[a, b, c, d] = sum(
    pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2))
output = input / pow((bias + alpha * sqr_sum), beta)

For input tensor with rank less than 4, independently normalizes each 1-D slice along specified dimension.

Supported tensor OperandCode:

Supported tensor rank: up to 4 Tensors with rank less than 4 are only supported since NNAPI feature level 3.

Inputs:

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 1.

ANEURALNETWORKS_LOG

Computes natural logarithm of x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_LOGICAL_AND

Returns the truth value of x AND y element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_LOGICAL_NOT

Computes the truth value of NOT x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_LOGICAL_OR

Returns the truth value of x OR y element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_LOGISTIC

Computes sigmoid activation on the input tensor element-wise.

The output is calculated using this formula:

output = 1 / (1 + exp(-input))

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the input. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_LOG_SOFTMAX

Computes the log softmax activations given logits.

The output is calculated using this formula:

output = logits * beta - log(reduce_sum(exp(logits * beta), axis))

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

Outputs:

  • 0: The output tensor of the same OperandCode and shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_LSH_PROJECTION

Projects an input to a bit vector via locality senstive hashing.

Supported input tensor OperandCode:

Supported input tensor rank: from 1

Inputs:

  • 0: Hash functions. Dim.size == 2, DataType: Float. Tensor[0].Dim[0]: Number of hash functions. Tensor[0].Dim[1]: Number of projected output bits generated by each hash function. If the projection type is Sparse: Tensor[0].Dim[1] + ceil(log2(Tensor[0].Dim[0])) <= 32
  • 1: Input. Dim.size >= 1, no restriction on DataType.
  • 2: Weight. Optional. Dim.size == 1, DataType: Float. If not set, each input element is considered to have the same weight of 1.0. Tensor[1].Dim[0] == Tensor[2].Dim[0]
  • 3: Type: Sparse: Value LSHProjectionType_SPARSE(=3) (since NNAPI feature level 3). Computed bit vector is considered to be sparse. Each output element is an int32 made up of multiple bits computed from hash functions.NOTE: To avoid collisions across hash functions, an offset value of k * (1 << Tensor[0].Dim[1]) will be added to each signature, where k is the index of the hash function.Value LSHProjectionType_SPARSE_DEPRECATED(=1). Legacy behavior that does not include the offset value.Dense: Value LSHProjectionType_DENSE(=2). Computed bit vector is considered to be dense. Each output element represents a bit and can take the value of either 0 or 1.

Outputs:

  • 0: If the projection type is Sparse: Output.Dim == { Tensor[0].Dim[0] } A tensor of int32 that represents hash signatures.If the projection type is Dense: Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] } A flattened tensor that represents projected bit vectors.

Available since NNAPI feature level 1. The offset value for sparse projections was added in NNAPI feature level 3.

ANEURALNETWORKS_LSTM

Performs a single time step in a Long Short-Term Memory (LSTM) layer.

The LSTM operation is described by the following equations.

\begin{eqnarray*} i_t =& \sigma(W_{xi}x_t+W_{hi}h_{t-1}+W_{ci}C_{t-1}+b_i) & \\ f_t =& \sigma(W_{xf}x_t+W_{hf}h_{t-1}+W_{cf}C_{t-1}+b_f) & \\ C_t =& clip(f_t \odot C_{t-1} + i_t \odot g(W_{xc}x_t+W_{hc}h_{t-1}+b_c),\ t_{cell}) & \\ o_t =& \sigma(W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o) & \\ & & \\ & clip(W_{proj}(o_t \odot g(C_t))+b_{proj},\ t_{proj}) & if\ there\ is\ a\ projection; \\ h_t =& & \\ & o_t \odot g(C_t) & otherwise. \\ \end{eqnarray*} Where:

  • $x_t$ is the input,
  • $i_t$ is the input gate,
  • $f_t$ is the forget gate,
  • $C_t$ is the cell state,
  • $o_t$ is the output,
  • $h_t$ is the output state,
  • $\sigma$ is the logistic sigmoid function,
  • $g$ is the cell input and cell output activation function, usually $tahn$,
  • $W_{xi}$ is the input-to-input weight matrix,
  • $W_{hi}$ is the recurrent to input weight matrix,
  • $W_{ci}$ is the cell-to-input weight matrix,
  • $b_i$ is the input gate bias,
  • $W_{xf}$ is the input-to-forget weight matrix,
  • $W_{hf}$ is the recurrent-to-forget weight matrix,
  • $W_{cf}$ is the cell-to-forget weight matrix,
  • $b_f$ is the forget gate bias,
  • $W_{xc}$ is the input-to-cell weight matrix,
  • $W_{hc}$ is the recurrent-to-cell weight matrix,
  • $b_c$ is the cell bias,
  • $W_{xo}$ is the input-to-output weight matrix,
  • $W_{ho}$ is the recurrent-to-output weight matrix,
  • $W_{co}$ is the cell-to-output weight matrix,
  • $b_o$ is the output gate bias,
  • $W_{proj}$ is the projection weight matrix,
  • $b_{proj}$ is the projection bias,
  • $t_{cell}$ is the threshold for clipping the cell state, and
  • $t_{proj}$ is the threshold for clipping the projected output.
  • $\odot$ is the Hadamard product that takes two matrices and produces another matrix, each element of which is the product of the corresponding elements of the input matrices.

Since NNAPI feature level 3 LSTM supports layer normalization. In case layer normalization is used, the inputs to internal activation functions (sigmoid and $g$) are normalized, rescaled and recentered following an approach from section 3.1 from https://arxiv.org/pdf/1607.06450.pdf

The operation has the following independently optional inputs:

  • The cell-to-input weights ( $W_{ci}$), cell-to-forget weights ( $W_{cf}$) and cell-to-output weights ( $W_{co}$) either all have values or neither of them have values (i.e., all set to null). If they have values, the peephole optimization is used.
  • The input-to-input weights ( $W_{xi}$), recurrent-to-input weights ( $W_{hi}$) and input gate bias ( $b_i$) either all have values, or none of them have values. If they have no values, coupling of input and forget gates (CIFG) is used, in which case the input gate ( $i_t$) is calculated using the following equation instead. \begin{eqnarray*} i_t = 1 - f_t \end{eqnarray*} In case peephole optimization is used and CIFG is not used cell-to-input ( $W_{ci}$) weights must be present. Otherwise, the cell-to-input weights must have no value.
  • The projection weights ( $W_{proj}$) is required only for the recurrent projection layer, and should otherwise have no value.
  • The projection bias ( $b_{proj}$) may (but not required to) have a value if the recurrent projection layer exists, and should otherwise have no value.
  • (NNAPI feature level 3 or later) The four layer normalization weights either all have values or none of them have values. Additionally, if CIFG is used, input layer normalization weights tensor is omitted and the other layer normalization weights either all have values or none of them have values. Layer normalization is used when the values of all the layer normalization weights are present.

References:

The default non-peephole non-CIFG implementation is based on: http://www.bioinf.jku.at/publications/older/2604.pdf S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.

The peephole implementation and projection layer is based on: https://research.google.com/pubs/archive/43905.pdf Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory recurrent neural network architectures for large scale acoustic modeling." INTERSPEECH, 2014. (However, the concept of peephole optimization was introduced in work prior to this paper.)

The coupling of input and forget gate (CIFG) is based on: http://arxiv.org/pdf/1503.04069.pdf Greff et al. "LSTM: A Search Space Odyssey"

The layer normalization is based on: https://arxiv.org/pdf/1607.06450.pdf Jimmy Ba et al. "Layer Normalization"

Supported tensor OperandCode:

All input and output tensors must be of the same type.

Inputs:

  • 0: The input ( $x_t$). A 2-D tensor of shape [batch_size, input_size], where “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
  • 1: The input-to-input weights ( $W_{xi}$). Optional. A 2-D tensor of shape [num_units, input_size], where “num_units” corresponds to the number of cell units.
  • 2: The input-to-forget weights ( $W_{xf}$). A 2-D tensor of shape [num_units, input_size].
  • 3: The input-to-cell weights ( $W_{xc}$). A 2-D tensor of shape [num_units, input_size].
  • 4: The input-to-output weights ( $W_{xo}$). A 2-D tensor of shape [num_units, input_size].
  • 5: The recurrent-to-input weights ( $W_{hi}$). Optional. A 2-D tensor of shape [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., “num_units”), or the second dimension of the “projection_weights”, if defined.
  • 6: The recurrent-to-forget weights ( $W_{hf}$). A 2-D tensor of shape [num_units, output_size].
  • 7: The recurrent-to-cell weights ( $W_{hc}$). A 2-D tensor of shape [num_units, output_size].
  • 8: The recurrent-to-output weights ( $W_{ho}$). A 2-D tensor of shape [num_units, output_size].
  • 9: The cell-to-input weights ( $W_{ci}$). Optional. A 1-D tensor of shape [num_units].
  • 10:The cell-to-forget weights ( $W_{cf}$). Optional. A 1-D tensor of shape [num_units].
  • 11:The cell-to-output weights ( $W_{co}$). Optional. A 1-D tensor of shape [num_units].
  • 12:The input gate bias ( $b_i$). Optional. A 1-D tensor of shape [num_units].
  • 13:The forget gate bias ( $b_f$). A 1-D tensor of shape [num_units].
  • 14:The cell bias ( $b_c$). A 1-D tensor of shape [num_units].
  • 15:The output gate bias ( $b_o$). A 1-D tensor of shape [num_units].
  • 16:The projection weights ( $W_{proj}$). Optional. A 2-D tensor of shape [output_size, num_units].
  • 17:The projection bias ( $b_{proj}$). Optional. A 1-D tensor of shape [output_size].
  • 18:The output state (in) ( $h_{t-1}$). A 2-D tensor of shape [batch_size, output_size].
  • 19:The cell state (in) ( $C_{t-1}$). A 2-D tensor of shape [batch_size, num_units].
  • 20:The activation function ( $g$). A value indicating the activation function:
    • 0: None;
    • 1: Relu;
    • 3: Relu6;
    • 4: Tanh;
    • 6: Sigmoid.
  • 21:The clipping threshold ( $t_{cell}$) for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. Until NNAPI feature level 3 this scalar must be of type ANEURALNETWORKS_FLOAT32. Since NNAPI feature level 3, if all the input tensors have type ANEURALNETWORKS_TENSOR_FLOAT32, this scalar must be of the type ANEURALNETWORKS_FLOAT32, otherwise if all the input tensors have the type ANEURALNETWORKS_TENSOR_FLOAT16, this scalar must be of type ANEURALNETWORKS_FLOAT16.
  • 22:The clipping threshold ( $t_{proj}$) for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. Until NNAPI feature level 3 this scalar must be of type ANEURALNETWORKS_FLOAT32. Since NNAPI feature level 3, if all the input tensors have type ANEURALNETWORKS_TENSOR_FLOAT32, this scalar must be of the type ANEURALNETWORKS_FLOAT32, otherwise if all the input tensors have the type ANEURALNETWORKS_TENSOR_FLOAT16, this scalar must be of type ANEURALNETWORKS_FLOAT16. Since NNAPI feature level 3 there are additional inputs to this op:
  • 23:The input layer normalization weights. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at input gate.
  • 24:The forget layer normalization weights. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at forget gate.
  • 25:The cell layer normalization weights. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at cell gate.
  • 26:The output layer normalization weights. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at output gate.

Outputs:

  • 0: The scratch buffer. A 2-D tensor of shape [batch_size, num_units * 3] with CIFG, or [batch_size, num_units * 4] without CIFG.
  • 1: The output state (out) ( $h_t$). A 2-D tensor of shape [batch_size, output_size].
  • 2: The cell state (out) ( $C_t$). A 2-D tensor of shape [batch_size, num_units].
  • 3: The output ( $o_t$). A 2-D tensor of shape [batch_size, output_size]. This is effectively the same as the current “output state (out)” value.

Available since NNAPI feature level 1.

ANEURALNETWORKS_MAXIMUM

Returns the element-wise maximum of two tensors.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_MAX_POOL_2D

Performs an 2-D max pooling operation.

The output dimensions are functions of the filter dimensions, stride, and padding.

The values in the output tensor are computed as:

output[b, i, j, channel] =
    max_{di, dj} (
        input[b, strides[1] * i + di, strides[2] * j + dj, channel]
    )

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the left, in the ‘width’ dimension.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the right, in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the top, in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the padding on the bottom, in the ‘height’ dimension.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 6: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 7: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 8: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 9: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 10: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Inputs (implicit padding):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the implicit padding scheme, has to be one of the PaddingCode values.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘width’ dimension.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the stride when walking through input in the ‘height’ dimension.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the filter width.
  • 5: An ANEURALNETWORKS_INT32 scalar, specifying the filter height.
  • 6: An ANEURALNETWORKS_INT32 scalar, and has to be one of the FuseCode values. Specifies the activation to invoke on the result.
  • 7: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_MEAN

Computes the mean of elements across dimensions of a tensor.

Reduces the input tensor along the given dimensions to reduce. Unless keep_dims is true, the rank of the tensor is reduced by 1 for each entry in axis. If keep_dims is true, the reduced dimensions are retained with length 1.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: A tensor, specifying the input.
  • 1: A 1-D Tensor of ANEURALNETWORKS_TENSOR_INT32. The dimensions to reduce. Must be in the range [-rank(input_tensor), rank(input_tensor)).NOTE: When the operation was introduced, the documentation incorrectly stated that if dimensions were empty, the operation would reduce across all dimensions. This behavior was never implemented.
  • 2: An ANEURALNETWORKS_INT32 scalar, keep_dims. If positive, retains reduced dimensions with length 1.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_MINIMUM

Returns the element-wise minimum of two tensors.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_MIRROR_PAD

Pads a tensor with mirrored values.

This operator specifies one of two padding modes: REFLECT or SYMMETRIC. In the case of REFLECT mode, the mirroring excludes the border element on the padding side. In the case of SYMMETRIC mode, the mirroring includes the border element on the padding side.

For example, if the input is the 1-D tensor [1, 2, 3] and the padding is [0, 2] (i.e., pad no elements before the first (and only) dimension, and two elements after the first (and only) dimension), then:

  • REFLECT mode produces the output [1, 2, 3, 2, 1]
  • SYMMETRIC mode produces the output [1, 2, 3, 3, 2]

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: An n-D tensor, specifying the tensor to be padded.
  • 1: A 2-D tensor of ANEURALNETWORKS_TENSOR_INT32, the paddings for each spatial dimension of the input tensor. The shape of the tensor must be {rank(input0), 2}. padding[i, 0] specifies the number of elements to be padded in the front of dimension i. padding[i, 1] specifies the number of elements to be padded after the end of dimension i. Each padding value must be nonnegative. In the case of REFLECT mode, each padding value must be less than the corresponding dimension. In the case of SYMMETRIC mode, each padding value must be less than or equal to the corresponding dimension.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the mode. Options are 0:REFLECT and 1:SYMMETRIC.

Outputs:

  • 0: A tensor of the same OperandCode as input0. The output tensor has the same rank as input0, and each dimension of the output tensor has the same size as the corresponding dimension of the input tensor plus the size of the padding: output0.dimension[i] = padding[i, 0] + input0.dimension[i] + padding[i, 1] For a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED tensor, the scale and zeroPoint must be the same as input0.

Available since NNAPI feature level 7.

ANEURALNETWORKS_MUL

Multiplies two tensors, element-wise.

Takes two input tensors of identical OperandCode and compatible dimensions. The output is the product of both input tensors, optionally modified by an activation function.

Two dimensions are compatible when:

  1. they are equal, or
  2. one of them is 1

The size of the resulting output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

Since NNAPI feature level 3, generic zero-sized input tensor is supported. Zero dimension is only compatible with 0 or 1. The size of the output dimension is zero if either of corresponding input dimension is zero.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_NEG

Computes numerical negative value element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_NOT_EQUAL

For input tensors x and y, computes x != y elementwise.

Supported tensor OperandCode:

Supported tensor rank: from 1

This operation supports broadcasting.

Inputs:

  • 0: A tensor.
  • 1: A tensor of the same OperandCode and dimensions compatible with input0.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_PACK

Packs N input tensors (N >= 1) of rank R into one output tensor of rank R+1.

The tensors are packed along a given axis.

The input tensors must have identical OperandCode and dimensions.

For example, suppose there are N input tensors of shape (A, B, C). If axis is 0, the output tensor will have shape (N, A, B, C). If axis is 1, the output tensor will have shape (A, N, B, C).

All dimensions through the axis dimension determine the output tile count; the remaining dimensions determine the tile shape.

Return to the example of N input tensors of shape (A, B, C). If axis is 0, there are N tiles in the output, each of shape (A, B, C). If axis is 1, there are A*N tiles in the output, each of shape (B, C).

The coordinates of a tile within the output tensor are (t[0],...,t[axis]). The coordinates of a tile within an input tensor are (t[0],...,t[axis-1]). (If axis is 0, an input tensor consists of a single tile.) If we index input tensors starting with 0 (rather than by operand number), then output_tile[t[0],...,t[axis]] = input_tile[t[axis]][t[0],...,t[axis-1]]. That is, all output tile coordinates except for the axis coordinate select the corresponding location within some input tensor; and the axis coordinate selects the input tensor.

Supported tensor OperandCode:

Supported input tensor rank: from 1

Inputs:

Outputs:

  • 0: The packed tensor.

Available since NNAPI feature level 6.

ANEURALNETWORKS_PAD

Pads a tensor.

This operation pads a tensor according to the specified paddings.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: An n-D tensor, specifying the tensor to be padded.
  • 1: A 2-D Tensor of ANEURALNETWORKS_TENSOR_INT32, the paddings for each spatial dimension of the input tensor. The shape of the tensor must be {rank(input0), 2}. padding[i, 0] specifies the number of elements to be padded in the front of dimension i. padding[i, 1] specifies the number of elements to be padded after the end of dimension i.

Outputs:

  • 0: A tensor of the same OperandCode as input0. The output tensor has the same rank as input0, and each dimension of the output tensor has the same size as the corresponding dimension of the input tensor plus the size of the padding: output0.dimension[i] = padding[i, 0] + input0.dimension[i] + padding[i, 1] For a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED tensor, the scale and zeroPoint must be the same as input0.NOTE: Before NNAPI feature level 3, the pad value for ANEURALNETWORKS_TENSOR_QUANT8_ASYMM is undefined. Since NNAPI feature level 3, the pad value is always the logical zero.

Available since NNAPI feature level 2.

ANEURALNETWORKS_PAD_V2

Pads a tensor with the given constant value according to the specified paddings.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode as input0. The output tensor has the same rank as input0, and each dimension of the output tensor has the same size as the corresponding dimension of the input tensor plus the size of the padding: output0.dimension[i] = padding[i, 0] + input0.dimension[i] + padding[i, 1] For a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED tensor, the scale and zeroPoint must be the same as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_POW

Computes the power of one value to another.

Given a tensor base and a tensor exponent, this operation computes base^exponent elementwise.

This operations supports broadcasting. The size of the output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

For example: base.dimension = {4, 1, 2} exponent.dimension = {5, 4, 3, 1} output.dimension = {5, 4, 3, 2}

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: A tensor specifying the base.
  • 1: A tensor specifying the exponent.

Outputs:

  • 0: An output tensor.

Available since NNAPI feature level 3.

ANEURALNETWORKS_PRELU

Parametric Rectified Linear Unit.

It follows: f(x) = alpha * x for x < 0, f(x) = x for x >= 0, where alpha is a learned array with the same OperandCode and compatible dimensions as input x.

Two dimensions are compatible when:

  1. they are equal, or
  2. one of them is 1

The size of the output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

Example: input.dimension = {4, 1, 2} alpha.dimension = {5, 4, 3, 1} output.dimension = {5, 4, 3, 2}

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: A tensor, specifying the input.
  • 1: A tensor of the same OperandCode, and compatible dimensions as input0, specifying the alpha.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_QUANTIZE

Quantizes the input tensor.

The formula for ANEURALNETWORKS_TENSOR_QUANT8_ASYMM output tensor is:

output = max(0, min(255, round(input / scale) + zeroPoint)

The formula for ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED output tensor is:

output = max(-128, min(127, round(input / scale) + zeroPoint)

Supported input tensor OperandCode:

Supported output tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: A tensor, may be zero-sized.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_QUANTIZED_16BIT_LSTM

A version of quantized LSTM, using 16 bit quantization for internal state.

There is no projection layer, so cell state size is equal to the output size.

Inputs:

  • 0: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBatches, inputSize] specifying the input to the LSTM cell. Tensor is quantized with a fixed quantization range of -1, 127/128.
  • 1: The input-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, inputSize] specifying input-to-input part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 2: The input-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, inputSize] specifying input-to-forget part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 3: The input-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, inputSize] specifying input-to-cell part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 4: The input-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, inputSize] specifying input-to-output part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 5: The recurrent-to-input weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, outputSize] specifying recurrent-to-input part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 6: The recurrent-to-forget weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, outputSize] specifying recurrent-to-forget part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 7: The recurrent-to-cell weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, outputSize] specifying recurrent-to-cell part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 8: The recurrent-to-output weights. A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [outputSize, outputSize] specifying recurrent-to-output part of weights for fully-connected layer inside the LSTM cell. Quantization zero point and scale must be the same across all the weights.
  • 9: The input gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input and weights scales and zeroPoint equal to 0.
  • 10:The forget gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input and weights scales and zeroPoint equal to 0.
  • 11:The cell bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input and weights scales and zeroPoint equal to 0.
  • 12:The output gate bias. A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [outputSize] specifying the bias for the fully-connected layer inside the LSTM cell. Bias is quantized with scale being a product of input and weights scales and zeroPoint equal to 0.
  • 13: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT16_SYMM and shape [numBatches, outputSize] specifying the cell state from the previous time step of the LSTM cell. It is quantized using a quantization range of -2^4, 2^4 * 32767/32768.
  • 14: A 2-D tensor of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM and shape [numBathes, outputSize] specifying the output of the LSTM cell from previous time-step. Tensor is quantized with a fixed quantization range of -1, 127/128.

Outputs:

ANEURALNETWORKS_QUANTIZED_LSTM

Quantized version of ANEURALNETWORKS_LSTM.

The input and the output use asymmetric quantized types, while the rest use symmetric ones.

Inputs:

Outputs:

Available since NNAPI feature level 4.

ANEURALNETWORKS_RANDOM_MULTINOMIAL

Draws samples from a multinomial distribution.

Supported tensor OperandCode:

Inputs:

  • 0: A 2-D tensor with shape [batches, classes], specifying the unnormalized log-probabilities for all classes.
  • 1: A scalar ANEURALNETWORKS_INT32, specifying the number of independent samples to draw for each row slice.
  • 2: A 1-D ANEURALNETWORKS_TENSOR_INT32 tensor with shape [2], specifying seeds used to initialize the random distribution. If both provided seeds are 0, both will be randomly generated. Outputs:
  • 0: A 2-D ANEURALNETWORKS_TENSOR_INT32 tensor with shape [batches, samples], containing the drawn samples.

Available since NNAPI feature level 3.

ANEURALNETWORKS_RANK

Returns the rank of a tensor.

The rank of a tensor is the number of dimensions in it. Also known as "order", "degree", "ndims".

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: The input tensor.

Outputs:

Available since NNAPI feature level 4.

ANEURALNETWORKS_REDUCE_ALL

Reduces a tensor by computing the "logical and" of elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode as input0. If all dimensions are reduced and keep_dims is false, the output shape is [1].

Available since NNAPI feature level 3.

ANEURALNETWORKS_REDUCE_ANY

Reduces a tensor by computing the "logical or" of elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode as input0. If all dimensions are reduced and keep_dims is false, the output shape is [1].

Available since NNAPI feature level 3.

ANEURALNETWORKS_REDUCE_MAX

Reduces a tensor by computing the maximum of elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_REDUCE_MIN

Reduces a tensor by computing the minimum of elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_REDUCE_PROD

Reduces a tensor by multiplying elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode as input0. If all dimensions are reduced and keep_dims is false, the output shape is [1].

Available since NNAPI feature level 3.

ANEURALNETWORKS_REDUCE_SUM

Reduces a tensor by summing elements along given dimensions.

If keep_dims is true, the reduced dimensions are retained with length 1. Otherwise, the rank of the tensor is reduced by 1 for each entry in dimensions.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

  • 0: A tensor of the same OperandCode as input0. If all dimensions are reduced and keep_dims is false, the output shape is [1].

Available since NNAPI feature level 3.

ANEURALNETWORKS_RELU

Computes rectified linear activation on the input tensor element-wise.

The output is calculated using this formula:

output = max(0, input)

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the input. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_RELU1

Computes rectified linear 1 activation on the input tensor element-wise.

The output is calculated using this formula:

output = min(1.f, max(-1.f, input))

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the input. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_RELU6

Computes rectified linear 6 activation on the input tensor element-wise.

The output is calculated using this formula:

output = min(6, max(0, input))

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the input. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_RESHAPE

Reshapes a tensor.

Given tensor, this operation returns a tensor that has the same values as tensor, but with a newly specified shape.

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the tensor to be reshaped.
  • 1: A 1-D tensor of ANEURALNETWORKS_TENSOR_INT32, defining the shape of the output tensor. The number of elements implied by shape must be the same as the number of elements in the input tensor.If one component of shape is the special value -1, the size of that dimension is computed so that the total size remains constant. In particular, a shape of [-1] flattens into 1-D. At most one component of shape can be -1.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_RESIZE_BILINEAR

Resizes images to given size using the bilinear interpretation.

Resized images must be distorted if their output aspect ratio is not the same as input aspect ratio. The corner pixels of output may not be the same as corner pixels of input.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Both resizing by shape and resizing by scale are supported.

Inputs (resizing by shape):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Since NNAPI feature level 3, zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the output width of the output tensor.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the output height of the output tensor.
  • 3: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.
  • 4: Align corners. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Available since NNAPI feature level 4.
  • 5: Half pixel centers. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the pixel centers are assumed to be at (0.5, 0.5). This is the default behavior of image.resize in TF 2.0. If this parameter is True, then align_corners parameter must be False. Available since NNAPI feature level 4.

Inputs (resizing by scale, since NNAPI feature level 3):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Zero batches is supported for this tensor.
  • 1: A scalar, specifying width_scale, the scaling factor of the width dimension from the input tensor to the output tensor. The output width is calculated as new_width = floor(width * width_scale). The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 otherwise.
  • 2: A scalar, specifying height_scale, the scaling factor of the height dimension from the input tensor to the output tensor. The output height is calculated as new_height = floor(height * height_scale). The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 otherwise.
  • 3: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0.
  • 4: Align corners. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Available since NNAPI feature level 4.
  • 5: Half pixel centers. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the pixel centers are assumed to be at (0.5, 0.5). This is the default behavior of image.resize in TF 2.0. If this parameter is True, then align_corners parameter must be False. Available since NNAPI feature level 4.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR

Resizes images to given size using the nearest neighbor interpretation.

Resized images must be distorted if their output aspect ratio is not the same as input aspect ratio. The corner pixels of output may not be the same as corner pixels of input.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Both resizing by shape and resizing by scale are supported.

Inputs (resizing by shape):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Zero batches is supported for this tensor.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the output width of the output tensor.
  • 2: An ANEURALNETWORKS_INT32 scalar, specifying the output height of the output tensor.
  • 3: An ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0.
  • 4: Align corners. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Available since NNAPI feature level 4.
  • 5: Half pixel centers. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the pixel centers are assumed to be at (0.5, 0.5). This is the default behavior of image.resize in TF 2.0. If this parameter is True, then align_corners parameter must be False. Available since NNAPI feature level 4.

Inputs (resizing by scale):

  • 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. Zero batches is supported for this tensor.
  • 1: A scalar, specifying width_scale, the scaling factor of the width dimension from the input tensor to the output tensor. The output width is calculated as new_width = floor(width * width_scale). The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 otherwise.
  • 2: A scalar, specifying height_scale, the scaling factor of the height dimension from the input tensor to the output tensor. The output height is calculated as new_height = floor(height * height_scale). The scalar must be of ANEURALNETWORKS_FLOAT16 if input0 is of ANEURALNETWORKS_TENSOR_FLOAT16 and of ANEURALNETWORKS_FLOAT32 otherwise.
  • 3: An ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0.
  • 4: Align corners. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Available since NNAPI feature level 4.
  • 5: Half pixel centers. An optional ANEURALNETWORKS_BOOL scalar, default to false. If True, the pixel centers are assumed to be at (0.5, 0.5). This is the default behavior of image.resize in TF 2.0. If this parameter is True, then align_corners parameter must be False. Available since NNAPI feature level 4.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_REVERSE

Reverses a specified dimension of a tensor.

Supported tensor OperandCode:

Supported tensor rank: up to 8.

Inputs:

  • 0: Input tensor of rank n.
  • 1: Axis tensor of type ANEURALNETWORKS_TENSOR_INT32 and shape [1], specifying which dimension of the input tensor is to be reversed. The dimension must be in the range [0, n).

Outputs:

Available since NNAPI feature level 7.

ANEURALNETWORKS_RNN

A basic recurrent neural network layer.

This layer implements the operation: outputs = state = activation(inputs * input_weights + state * recurrent_weights + bias)

Where:

  • “input_weights” is a weight matrix that multiplies the inputs;
  • “recurrent_weights” is a weight matrix that multiplies the current “state” which itself is the output from the previous time step computation;
  • “bias” is a bias vector (added to each output vector in the batch);
  • “activation” is the function passed as the “fused_activation_function” argument (if not “NONE”).

Supported tensor OperandCode:

The input tensors must all be the same type.

Inputs:

  • 0: input. A 2-D tensor of shape [batch_size, input_size], where “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
  • 1: weights. A 2-D tensor of shape [num_units, input_size], where “num_units” corresponds to the number of units.
  • 2: recurrent_weights. A 2-D tensor of shape [num_units, num_units], with columns corresponding to the weights from each unit.
  • 3: bias. A 1-D tensor of shape [num_units].
  • 4: hidden state (in). A 2-D tensor of shape [batch_size, num_units].
  • 5: fused_activation_function. An optional FuseCode value indicating the activation function. If “NONE” is specified then it results in a linear activation.

Outputs:

  • 0: hidden state (out). A 2-D tensor of shape [batch_size, num_units].
  • 1: output. A 2-D tensor of shape [batch_size, num_units]. This is effectively the same as the current state value.

Available since NNAPI feature level 1.

ANEURALNETWORKS_ROI_ALIGN

Select and scale the feature map of each region of interest to a unified output size by average pooling sampling points from bilinear interpolation.

The region of interest is represented by its upper-left corner coordinate (x1,y1) and lower-right corner coordinate (x2,y2) in the original image. A spatial scaling factor is applied to map into feature map coordinate. A valid region of interest should satisfy x1 <= x2 and y1 <= y2.

No rounding is applied in this operation. The sampling points are unified distributed in the pooling bin and their values are calculated by bilinear interpolation.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Inputs:

  • 0: A 4-D tensor, specifying the feature map.
  • 1: A 2-D Tensor of shape [num_rois, 4], specifying the locations of the regions of interest, each line with format [x1, y1, x2, y2]. For input0 of type ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, this tensor should be of ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, with zeroPoint of 0 and scale of 0.125. Zero num_rois is supported for this tensor.
  • 2: An 1-D ANEURALNETWORKS_TENSOR_INT32 tensor, of shape [num_rois], specifying the batch index of each box. Boxes with the same batch index are grouped together. Zero num_rois is supported for this tensor.
  • 3: An ANEURALNETWORKS_INT32 scalar, specifying the output height of the output tensor.
  • 4: An ANEURALNETWORKS_INT32 scalar, specifying the output width of the output tensor.
  • 5: An ANEURALNETWORKS_FLOAT32 scalar, specifying the ratio from the height of original image to the height of feature map.
  • 6: An ANEURALNETWORKS_FLOAT32 scalar, specifying the ratio from the width of original image to the width of feature map.
  • 7: An ANEURALNETWORKS_INT32 scalar, specifying the number of sampling points in height dimension used to compute the output. Set to 0 for adaptive value of ceil(roi_height/out_height).
  • 8: An ANEURALNETWORKS_INT32 scalar, specifying the number of sampling points in width dimension used to compute the output. Set to 0 for adaptive value of ceil(roi_width/out_width).
  • 9: An ANEURALNETWORKS_BOOL scalar, set to true to specify NCHW data layout for input0 and output0. Set to false for NHWC.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_ROI_POOLING

Select and scale the feature map of each region of interest to a unified output size by max-pooling.

The region of interest is represented by its upper-left corner coordinate (x1,y1) and lower-right corner coordinate (x2,y2) in the original image. A spatial scaling factor is applied to map into feature map coordinate. A valid region of interest should satisfy x1 <= x2 and y1 <= y2.

Rounding is applied in this operation to ensure integer boundary for regions of interest and pooling bins.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Inputs:

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_RSQRT

Computes reciprocal of square root of x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_SELECT

Using a tensor of booleans c and input tensors x and y select values elementwise from both input tensors:

O[i] = C[i] ? x[i] : y[i].

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

Outputs:

  • 0: A tensor of the same type and shape as input1 and input2. For a ANEURALNETWORKS_TENSOR_QUANT8_ASYMM tensor, the scale and zeroPoint can be different from inputs' scale and zeroPoint.

Available since NNAPI feature level 3.

ANEURALNETWORKS_SIN

Computes sin of x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_SLICE

Extracts a slice of specified size from the input tensor starting at a specified location.

The starting location is specified as a 1-D tensor containing offsets for each dimension. The size is specified as a 1-D tensor containing either size of a slice along corresponding dimension or -1. In the latter case, all the remaining elements in dimension are included in the slice.

A sum of begin offset and a size of a slice must not exceed size of a corresponding dimension.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor to take slice from, may be zero-sized.
  • 1: A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 specifying the beginning indices of the slice in each dimension.
  • 2: A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32 specifying the size of the slice in each dimension.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_SOFTMAX

Computes the softmax activation on the input tensor element-wise, per batch, by normalizing the input vector so the maximum coefficient is zero.

The output is calculated using this formula:

output[batch, i] =
    exp((input[batch, i] - max(input[batch, :])) * beta) /
    sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)}

For input tensor with rank other than 2, the activation will be applied independently on each 1-D slice along specified dimension.

Supported tensor OperandCode:

Supported tensor rank: up to 4. Tensors with rank other than 2 or 4 are only supported since NNAPI feature level 3.

Inputs:

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_SPACE_TO_BATCH_ND

SpaceToBatch for N-Dimensional tensors.

This operation divides "spatial" dimensions [1, ..., M] of the input into a grid of blocks of shape block_shape, and interleaves these blocks with the "batch" dimension (0) such that in the output, the spatial dimensions [1, ..., M] correspond to the position within the grid, and the batch dimension combines both the position within a spatial block and the original batch position. Prior to division into blocks, the spatial dimensions of the input are optionally zero padded according to paddings.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Inputs:

  • 0: An n-D tensor, specifying the input.
  • 1: A 1-D Tensor of ANEURALNETWORKS_TENSOR_INT32, the block sizes for each spatial dimension of the input tensor. All values must be >= 1.
  • 2: A 2-D Tensor of ANEURALNETWORKS_TENSOR_INT32, the paddings for each spatial dimension of the input tensor. All values must be >= 0. The shape of the tensor must be {M, 2}, where M is the number of spatial dimensions. padding[i, 0] specifies the number of element to be padded in the front of dimension i. padding[i, 1] specifies the number of element to be padded after the end of dimension i.
  • 3: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_SPACE_TO_DEPTH

Rearranges blocks of spatial data, into depth.

More specifically, this op outputs a copy of the input tensor where values from the height and width dimensions are moved to the depth dimension. The value block_size indicates the input block size and how the data is moved.

Chunks of data of size block_size * block_size from depth are rearranged into non-overlapping blocks of size block_size x block_size.

The depth of the output tensor is input_depth * block_size * block_size. The input tensor's height and width must be divisible by block_size.

Supported tensor OperandCode:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width]. NCHW is supported since NNAPI feature level 3.

Inputs:

  • 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
  • 1: An ANEURALNETWORKS_INT32 scalar, specifying the block_size. block_size must be >=1 and block_size must be a divisor of both the input height and width.
  • 2: An optional ANEURALNETWORKS_BOOL scalar, default to false. Set to true to specify NCHW data layout for input0 and output0. Available since NNAPI feature level 3.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_SPLIT

Splits a tensor along a given axis into num_splits subtensors.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: An n-D tensor to split.
  • 1: An ANEURALNETWORKS_INT32 scalar specifying the axis along which to split.
  • 2: An ANEURALNETWORKS_INT32 scalar indicating the number of splits along given axis. Must evenly divide axis size.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_SQRT

Computes square root of x element-wise.

Supported tensor OperandCode:

Supported tensor rank: from 1.

Inputs:

  • 0: A tensor.

Outputs:

  • 0: The output tensor of same shape as input0.

Available since NNAPI feature level 3.

ANEURALNETWORKS_SQUEEZE

Removes dimensions of size 1 from the shape of a tensor.

Given a tensor input, this operation returns a tensor of the same OperandCode with all dimensions of size 1 removed. If you don't want to remove all size 1 dimensions, you can remove specific size 1 dimensions by specifying the axes (input1).

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: An n-D tensor, the tensor to be squeezed.
  • 1: An optional 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The dimensions to squeeze. If specified only squeezes the dimensions listed. Otherwise, squeezes all dimensions. The dimension index starts at 0. An error must be reported if squeezing a dimension that is not 1.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_STRIDED_SLICE

Extracts a strided slice of a tensor.

Roughly speaking, this op extracts a slice of size (end - begin) / stride from the given input tensor. Starting at the location specified by begin the slice continues by adding stride to the index until all dimensions are not less than end. Note that a stride can be negative, which causes a reverse slice.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: An n-D tensor, specifying the tensor to be sliced.
  • 1: begin, a 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input0).
  • 2: end, a 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input0).
  • 3: strides, a 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input0). The entries must be non-zero.
  • 4: begin_mask, an ANEURALNETWORKS_INT32 scalar. If the ith bit of begin_mask is set, begin[i] is ignored and the fullest possible range in that dimension is used instead.
  • 5: end_mask, an ANEURALNETWORKS_INT32 scalar. If the ith bit of end_mask is set, end[i] is ignored and the fullest possible range in that dimension is used instead.
  • 6: shrink_axis_mask, an ANEURALNETWORKS_INT32 scalar. If the ith bit of shrink_axis_mask is set, the ith dimension specification shrinks the dimensionality by 1, taking on the value at index begin[i]. In this case, the ith specification must define a slice of size 1, e.g. begin[i] = x, end[i] = x + 1.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_SUB

Element-wise subtraction of two tensors.

Takes two input tensors of identical OperandCode and compatible dimensions. The output is the result of subtracting the second input tensor from the first one, optionally modified by an activation function.

Two dimensions are compatible when:

  1. they are equal, or
  2. one of them is 1

The size of the output is the maximum size along each dimension of the input operands. It starts with the trailing dimensions, and works its way forward.

Example: input1.dimension = {4, 1, 2} input2.dimension = {5, 4, 3, 1} output.dimension = {5, 4, 3, 2}

Since NNAPI feature level 3, generic zero-sized input tensor is supported. Zero dimension is only compatible with 0 or 1. The size of the output dimension is zero if either of corresponding input dimension is zero.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_SVDF

SVDF op is a kind of stateful layer derived from the notion that a densely connected layer that's processing a sequence of input frames can be approximated by using a singular value decomposition of each of its nodes.

The implementation is based on:

https://research.google.com/pubs/archive/43813.pdf

P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada. “Compressing Deep Neural Networks using a Rank-Constrained Topology”. INTERSPEECH, 2015.

It processes the incoming input using a 2-stage filtering mechanism:

  • stage 1 performs filtering on the "features" dimension, whose outputs get pushed into a memory of fixed-size memory_size.
  • stage 2 performs filtering on the "time" dimension of the memory_size memoized outputs of stage 1.

Specifically, for rank 1, this layer implements the operation:

memory = push(conv1d(inputs, weights_feature, feature_dim,
                     "ANEURALNETWORKS_PADDING_VALID"));
outputs = activation(memory * weights_time + bias);

Where:

  • “weights_feature” is a weights matrix that processes the inputs (by convolving the input with every “feature filter”), and whose outputs get pushed, stacked in order, into the fixed-size “memory” (the oldest entry gets dropped);
  • “weights_time” is a weights matrix that processes the “memory” (by a batched matrix multiplication on the num_units);
  • “bias” is an optional bias vector (added to each output vector in the batch); and
  • “activation” is the function passed as the “fused_activation_function” argument (if not “NONE”).

Each rank adds a dimension to the weights matrices by means of stacking the filters.

Supported tensor OperandCode:

All input tensors must be the same type.

Inputs:

  • 0: input. A 2-D tensor of shape [batch_size, input_size], where “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
  • 1: weights_feature. A 2-D tensor of shape [num_units, input_size], where “num_units” corresponds to the number of units.
  • 2: weights_time. A 2-D tensor of shape [num_units, memory_size], where “memory_size” corresponds to the fixed-size of the memory.
  • 3: bias. An optional 1-D tensor of shape [num_units].
  • 4: state (in). A 2-D tensor of shape [batch_size, (memory_size - 1) * num_units * rank].
  • 5: rank. The rank of the SVD approximation.
  • 6: fused_activation_function. An optional FuseCode value indicating the activation function. If “NONE” is specified then it results in a linear activation.

Outputs:

  • 0: state (out). A 2-D tensor of the same OperandCode as the inputs, with shape [batch_size, (memory_size - 1) * num_units * rank].
  • 1: output. A 2-D tensor of the same OperandCode as the inputs, with shape [batch_size, num_units].

Available since NNAPI feature level 1.

ANEURALNETWORKS_TANH

Computes hyperbolic tangent of input tensor element-wise.

The output is calculated using this formula:

output = tanh(input)

Supported tensor OperandCode:

Supported tensor rank: up to 4.

Inputs:

  • 0: A tensor, specifying the input. Since NNAPI feature level 3, this tensor may be zero-sized.

Outputs:

Available since NNAPI feature level 1.

ANEURALNETWORKS_TILE

Constructs a tensor by tiling a given tensor.

This operation creates a new tensor by replicating inputmultiples times. The output tensor's i-th dimension has input.dims(i) * multiples[i] elements, and the values of input are replicated multiples[i] times along the i-th dimension. For example, tiling [a b c d] by [2] produces [a b c d a b c d].

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: input, an n-D tensor specifying the input.
  • 1: multiples, a 1-D tensor of ANEURALNETWORKS_TENSOR_INT32. The length of multiples must be n.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_TOPK_V2

Finds values and indices of the k largest entries for the last dimension.

Resulting values in each dimensions are sorted in descending order. If two values are equal, the one with larger index appears first.

Supported tensor OperandCode:

Supported tensor rank: from 1

Inputs:

  • 0: input, an n-D tensor specifying the input.
  • 1: k, an ANEURALNETWORKS_INT32 scalar, specifying the number of top elements to look for along the last dimension.

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_TRANSPOSE

Transposes the input tensor, permuting the dimensions according to the perm tensor.

The returned tensor's dimension i corresponds to the input dimension perm[i]. If perm is not given, it is set to (n-1...0), where n is the rank of the input tensor. Hence by default, this operation performs a regular matrix transpose on 2-D input Tensors.

Supported tensor OperandCode:

Supported tensor rank: up to 4

Inputs:

  • 0: An n-D tensor, specifying the tensor to be transposed. Since NNAPI feature level 3, this tensor may be zero-sized.
  • 1: An optional 1-D Tensor of ANEURALNETWORKS_TENSOR_INT32, the permutation of the dimensions of the input tensor.

Outputs:

Available since NNAPI feature level 2.

ANEURALNETWORKS_TRANSPOSE_CONV_2D

Performs the transpose of 2-D convolution operation.

This operation is sometimes called "deconvolution" after Deconvolutional Networks, but is actually the transpose (gradient) of ANEURALNETWORKS_CONV_2D rather than an actual deconvolution.

The output dimensions are functions of the filter dimensions, stride, and padding.

Supported tensor OperandCode configurations:

Available since NNAPI feature level 4:

Supported tensor rank: 4, with "NHWC" or "NCHW" data layout. With the default data layout NHWC, the data is stored in the order of: [batch, height, width, channels]. Alternatively, the data layout could be NCHW, the data storage order of: [batch, channels, height, width].

Both explicit padding and implicit padding are supported.

Inputs (explicit padding):

Inputs (implicit padding):

Outputs:

Available since NNAPI feature level 3.

ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM

A recurrent neural network specified by an LSTM cell.

Performs (fully) dynamic unrolling of input.

This Op unrolls the input along the time dimension, and implements the following operation for each element in the sequence s = 1...sequence_length: outputs[s] = projection(state = activation(LSTMOp(inputs[s])))

Where LSTMOp is the LSTM op as in ANEURALNETWORKS_LSTM, the "projection" is an optional projection layer from state and output and the “activation” is the function passed as the “fused_activation_function” argument (if not “NONE”).

Supported tensor OperandCode:

Supported tensor rank: 3, either time-major or batch-major.

All input and output tensors must be of the same type.

Inputs:

  • 0: The input ( $x_t$). A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major: [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length), “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
  • 1: The input-to-input weights ( $W_{xi}$). Optional. A 2-D tensor of shape [num_units, input_size], where “num_units” corresponds to the number of cell units.
  • 2: The input-to-forget weights ( $W_{xf}$). A 2-D tensor of shape [num_units, input_size].
  • 3: The input-to-cell weights ( $W_{xc}$). A 2-D tensor of shape [num_units, input_size].
  • 4: The input-to-output weights ( $W_{xo}$). A 2-D tensor of shape [num_units, input_size].
  • 5: The recurrent-to-input weights ( $W_{hi}$). Optional. A 2-D tensor of shape [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., “num_units”), or the second dimension of the “projection_weights”, if defined.
  • 6: The recurrent-to-forget weights ( $W_{hf}$). A 2-D tensor of shape [num_units, output_size].
  • 7: The recurrent-to-cell weights ( $W_{hc}$). A 2-D tensor of shape [num_units, output_size].
  • 8: The recurrent-to-output weights ( $W_{ho}$). A 2-D tensor of shape [num_units, output_size].
  • 9: The cell-to-input weights ( $W_{ci}$). Optional. A 1-D tensor of shape [num_units].
  • 10:The cell-to-forget weights ( $W_{cf}$). Optional. A 1-D tensor of shape [num_units].
  • 11:The cell-to-output weights ( $W_{co}$). Optional. A 1-D tensor of shape [num_units].
  • 12:The input gate bias ( $b_i$). Optional. A 1-D tensor of shape [num_units].
  • 13:The forget gate bias ( $b_f$). A 1-D tensor of shape [num_units].
  • 14:The cell bias ( $b_c$). A 1-D tensor of shape [num_units].
  • 15:The output gate bias ( $b_o$). A 1-D tensor of shape [num_units].
  • 16:The projection weights ( $W_{proj}$). Optional. A 2-D tensor of shape [output_size, num_units].
  • 17:The projection bias ( $b_{proj}$). Optional. A 1-D tensor of shape [output_size].
  • 18:The output state (in) ( $h_{t-1}$). A 2-D tensor of shape [batch_size, output_size].
  • 19:The cell state (in) ( $C_{t-1}$). A 2-D tensor of shape [batch_size, num_units].
  • 20:The activation function ( $g$). A value indicating the activation function:
    • 0: None;
    • 1: Relu;
    • 3: Relu6;
    • 4: Tanh;
    • 6: Sigmoid.
  • 21:The clipping threshold ( $t_{cell}$) for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled.
  • 22:The clipping threshold ( $t_{proj}$) for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
  • 23:Time-major if true, batch-major if false.
  • 24:The input layer normalization weights. Optional. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at input gate.
  • 25:The forget layer normalization weights. Optional. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at forget gate.
  • 26:The cell layer normalization weights. Optional. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at cell gate.
  • 27:The output layer normalization weights. Optional. A 1-D tensor of shape [num_units]. Used to rescale normalized inputs to activation at output gate.

Outputs:

  • 0: The output ( $o_t$). A 3-D tensor of shape: If time-major: [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
  • 1: A tensor of shape [batch_size, output_size] containing a hidden state from the last time step in the sequence. This output is optional and can be omitted. If this output is present then output #2 must be present as well. Available since NNAPI feature level 4.
  • 2: A tensor of shape [batch_size, cell_size] containing a cell state from the last time step in the sequence. This output is optional and can be omitted. Available since NNAPI feature level 4.

Available since NNAPI feature level 3.

Important: As of NNAPI feature level 3, there is no way to get the output state tensors out and NNAPI does not maintain internal states. This operator does not support the usage pattern in which multiple cells are chained and state tensors are propagated.

ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN

A recurrent neural network layer that applies a basic RNN cell to a sequence of inputs.

This layer unrolls the input along the sequence dimension, and implements the following operation for each element in the sequence s = 1...sequence_length: outputs[s] = state = activation(inputs[s] * input_weights’ + state * recurrent_weights’ + bias)

Where:

  • “input_weights” is a weight matrix that multiplies the inputs;
  • “recurrent_weights” is a weight matrix that multiplies the current “state” which itself is the output from the previous time step computation;
  • “bias” is a bias vector (added to each output vector in the batch);
  • “activation” is the function passed as the “fused_activation_function” argument (if not “NONE”).

Supported tensor OperandCode:

The input tensors must all be the same type.

Inputs:

  • 0: input. A 3-D tensor. The shape is defined by the input 6 (timeMajor). If it is set to 1, then the input has a shape [maxTime, batchSize, inputSize], otherwise the input has a shape [batchSize, maxTime, inputSize].
  • 1: weights. A 2-D tensor of shape [numUnits, inputSize].
  • 2: recurrent_weights. A 2-D tensor of shape [numUnits, numUnits].
  • 3: bias. A 1-D tensor of shape [numUnits].
  • 4: hidden state A 2-D tensor of shape [batchSize, numUnits]. Specifies a hidden state input for the first time step of the computation.
  • 5: fusedActivationFunction. A FuseCode value indicating the activation function. If “NONE” is specified then it results in a linear activation.
  • 6: timeMajor An ANEURALNETWORKS_INT32 scalar specifying the shape format of input and output tensors. Must be set to either 0 or 1. Outputs:
  • 0: output. A 3-D tensor. The shape is defined by the input 6 (timeMajor). If it is set to 1, then the output has a shape [maxTime, batchSize, numUnits], otherwise the output has a shape [batchSize, maxTime, numUnits].
  • 1: A tensor of shape [batchSize, numUnits] containing hidden state from the last time step in the sequence. This output is optional and can be omitted. Available since NNAPI feature level 4.

Available since NNAPI feature level 3.

Important: As of NNAPI feature level 3, there is no way to get the output state tensors out and NNAPI does not maintain internal states. This operator does not support the usage pattern in which multiple cells are chained and state tensors are propagated.

ANEURALNETWORKS_WHILE

Executes the body model until the condition model outputs false.

The inputs to this operation are the condition model, the body model, and operand values for the first iteration of the loop. The values are implicitly split into three groups of input-output, state-only, and input-only values, as described below.

The outputs of this operation are the final values of input-output operands.

Both the condition and body model receive (m + k + n) inputs.

  • The first m (m >= 1) inputs are input-output operands. For the first iteration, these are initialized from the corresponding inputs of the WHILE operation. In subsequent iterations, their values come from the corresponding outputs of the body model produced during the previous iteration.
  • The next k (k >= 0) inputs are state-only operands. They are similar to the input-output operands, except that their values are no longer available after the loop terminates.
  • The last n (n >= 0) inputs are input-only operands. Their values come from the corresponding inputs of the WHILE operation.

The body model produces (m + k) outputs.

  • The first m outputs are input-output operands. They become the outputs of the WHILE operation when a termination condition is reached.
  • The last k outputs are state-only operands. Their values are no longer available after the loop terminates.

The numbers m, k, and n are inferred by the runtime as follows: m = (WHILE operation output count) k = (body model output count) - m n = (body model input count) - m - k

The pseudo-code below illustrates the flow of a WHILE operation with inputs condition, body, initial_input_output, initial_state, input_only (m = 1, k = 1, n = 1):

input_output = initial_input_output
state = initial_state
while condition(input_output, state, input_only):
    input_output, state = body(input_output, state, input_only)
return input_output

To prevent infinite loops, there is an implicit execution timeout associated with each loop ("loop timeout duration"). See ANeuralNetworksExecution_setLoopTimeout.

Inputs:

  • 0: A ANEURALNETWORKS_MODEL reference to the condition model. The model must have (m + k + n) inputs with the same types, ranks (if specified), dimensions (if specified), scales, zeroPoints, and other operand parameters as the corresponding inputs of the WHILE operation and exactly one output of ANEURALNETWORKS_TENSOR_BOOL8 and shape [1]. The output operand must have fully specified dimensions.
  • 1: A ANEURALNETWORKS_MODEL reference to the body model. The model must have (m + k + n) inputs and (m + k) outputs with the same types, ranks (if specified), dimensions (if specified), scales, zeroPoints, and other operand parameters as the corresponding inputs and outputs of the WHILE operation.
  • (m inputs): Initial values for input-output operands.
  • (k inputs): Initial values for state-only operands.
  • (n inputs): Values for input-only operands.

Outputs:

  • 0 ~ (m - 1): Outputs produced by the loop.

Available since NNAPI feature level 4.

PaddingCode

Declared in android/NeuralNetworksTypes.h
 PaddingCode

Implicit padding algorithms.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_PADDING_SAME

SAME padding.

Padding on both ends are the "same": padding_to_beginning = total_padding / 2 padding_to_end = (total_padding + 1)/2. i.e., for even number of padding, padding to both ends are exactly the same; for odd number of padding, padding to the ending is bigger than the padding to the beginning by 1.

total_padding is a function of input, stride, dilation and filter size. It could be computed as follows: out_size = (input + stride - 1) / stride effective_filter_size = (filter_size - 1) * dilation + 1 needed_input = (out_size - 1) * stride + effective_filter_size total_padding = max(0, needed_input - input_size) The computation is the same for the horizontal and vertical directions.

ANEURALNETWORKS_PADDING_VALID

VALID padding.

No padding. When the input size is not evenly divisible by the filter size, the input at the end that could not fill the whole filter tile will simply be ignored.

PreferenceCode

Declared in android/NeuralNetworksTypes.h
 PreferenceCode

Execution preferences.

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER

Prefer returning a single answer as fast as possible, even if this causes more power consumption.

ANEURALNETWORKS_PREFER_LOW_POWER

Prefer executing in a way that minimizes battery drain.

This is desirable for compilations that will be executed often.

ANEURALNETWORKS_PREFER_SUSTAINED_SPEED

Prefer maximizing the throughput of successive frames, for example when processing successive frames coming from the camera.

PriorityCode

Declared in android/NeuralNetworksTypes.h
 PriorityCode

Relative execution priority.

Available since NNAPI feature level 4.

Properties
ANEURALNETWORKS_PRIORITY_DEFAULT
ANEURALNETWORKS_PRIORITY_HIGH
ANEURALNETWORKS_PRIORITY_LOW
ANEURALNETWORKS_PRIORITY_MEDIUM

ResultCode

Declared in android/NeuralNetworksTypes.h
 ResultCode

Result codes.

Any NNAPI function can return any result code, including result codes not currently documented. Any value other than ANEURALNETWORKS_NO_ERROR indicates a failure of some kind.

Additional information about the nature of a failure can be obtained from the device log after enabling NNAPI debugging by setting the debug.nn.vlog property to 1, e.g., by calling "adb shell setprop debug.nn.vlog 1".

Available since NNAPI feature level 1.

Properties
ANEURALNETWORKS_BAD_DATA

Failure caused by invalid function arguments, invalid model definition, invalid execution definition or invalid data at execution time.

ANEURALNETWORKS_BAD_STATE

Failure caused by object being in the wrong state.

ANEURALNETWORKS_DEAD_OBJECT

Failure indicating an object is in a dead state.

Available since NNAPI feature level 4.

ANEURALNETWORKS_INCOMPLETE
ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT

Failure because a deadline could not be met for a task, and future deadlines will likely also not be met for the same task even after a short delay.

Available since NNAPI feature level 4.

ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT

Failure because a deadline could not be met for a task, but future deadlines may still be met for the same task after a short delay.

Available since NNAPI feature level 4.

ANEURALNETWORKS_NO_ERROR

Operation was successful.

ANEURALNETWORKS_OP_FAILED

Failure caused by failed model execution.

ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE

Failure caused by insufficient buffer size provided to a model output.

ANEURALNETWORKS_OUT_OF_MEMORY

Failure caused by not enough available memory.

ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT

Failure because of a resource limitation within the driver, and future calls for the same task will likely also fail even after a short delay.

Available since NNAPI feature level 4.

ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT

Failure because of a resource limitation within the driver, but future calls for the same task may still succeed after a short delay.

Available since NNAPI feature level 4.

ANEURALNETWORKS_UNAVAILABLE_DEVICE

Failure caused by a device not being available.

ANEURALNETWORKS_UNEXPECTED_NULL

Failure caused by unexpected null argument.

ANEURALNETWORKS_UNMAPPABLE

Failure caused by not being able to map a file into memory.

This may be caused by a file descriptor not being mappable, or an AHardwareBuffer not supported by the device. Mitigate by reading its content into memory.

Typedefs

ANeuralNetworksBurst

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksBurst ANeuralNetworksBurst

ANeuralNetworksBurst is an opaque type that can be used to reduce the latency of a rapid sequence of executions.

It will likely cause overhead if only used for a single execution.

ANeuralNetworksBurst serves as a context object for any number of inferences using ANeuralNetworksExecution objects. An ANeuralNetworksBurst object and the ANeuralNetworksExecution objects used with it must all have been created from the same ANeuralNetworksCompilation object.

This object is also used as a hint to drivers, providing insight to the lifetime of a rapid sequence of executions. For example, a driver may choose to increase the clock frequency of its accelerator for the lifetime of a burst object.

To use:

Available since NNAPI feature level 3.

ANeuralNetworksCompilation

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksCompilation ANeuralNetworksCompilation

ANeuralNetworksCompilation is an opaque type that can be used to compile a machine learning model.

To use:

A compilation is completed by calling ANeuralNetworksCompilation_finish. A compilation is destroyed by calling ANeuralNetworksCompilation_free.

A compilation cannot be modified once ANeuralNetworksCompilation_finish has been called on it.

It is the application's responsibility to make sure that only one thread modifies a compilation at a given time. It is however safe for more than one thread to use the compilation once ANeuralNetworksCompilation_finish has returned.

It is also the application's responsibility to ensure that there are no other uses of the compilation after calling ANeuralNetworksCompilation_free. This includes any execution object or burst object created using the compilation, or any memory descriptor with the compilation as part of one of the roles specified by ANeuralNetworksMemoryDesc_addInputRole or ANeuralNetworksMemoryDesc_addOutputRole.

Available since NNAPI feature level 1.

ANeuralNetworksDevice

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksDevice ANeuralNetworksDevice

ANeuralNetworksDevice is an opaque type that represents a device.

This type is used to query basic properties and supported operations of the corresponding device, and control which device(s) a model is to be run on.

Available since NNAPI feature level 3.

ANeuralNetworksEvent

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksEvent ANeuralNetworksEvent

ANeuralNetworksEvent is an opaque type that represents an event that will be signaled once an execution completes.

Available since NNAPI feature level 1.

ANeuralNetworksExecution

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksExecution ANeuralNetworksExecution

ANeuralNetworksExecution is an opaque type that can be used to apply a machine learning model to a set of inputs.

To use:

An output buffer or memory region must not overlap with any other output buffer or memory region, with an input buffer or memory region, or with an operand value in a memory object (ANeuralNetworksModel_setOperandValueFromMemory).

An execution is in the preparation state after it is created by ANeuralNetworksExecution_create. An execution may only be modified in the preparation state. Scheduling a computation by calling ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute, or ANeuralNetworksExecution_startComputeWithDependencies will change the state of the execution object to the computation state. When the computation completes, the state of the execution object will change from the computation state to the completed state. The computation is completed when ANeuralNetworksExecution_compute, ANeuralNetworksExecution_burstCompute, or ANeuralNetworksEvent_wait has returned.

An execution can be applied to a model with ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies only once. Create new executions to do new evaluations of the model.

Starting at NNAPI feature level 5, the application may call ANeuralNetworksExecution_setReusable to set an execution to be reusable for multiple computations. The application may schedule and evaluate a computation again from the completed state of a reusable execution. The execution cannot be modified between computations.

It is the application's responsibility to make sure that only one thread modifies an execution at a given time. It is however safe for more than one thread to use ANeuralNetworksEvent_wait at the same time.

It is also the application's responsibility to ensure that the execution either has never been scheduled or has completed (i.e., that ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, or ANeuralNetworksEvent_wait has returned) before calling ANeuralNetworksExecution_free.

.

It is also the application's responsibility to ensure that there are no other uses of the execution after calling ANeuralNetworksExecution_free.

It is the application's responsibility to ensure that there are no concurrent computations scheduled and evaluated on the same execution, either by means of ANeuralNetworksExecution_compute or ANeuralNetworksExecution_burstCompute (which are synchronous) in different threads, or by means of ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies (which are asynchronous). It is however safe to schedule and evaluate multiple computations on different executions concurrently. (Concurrent uses of ANeuralNetworksExecution_burstCompute must be on different burst objects.) The runtime makes no guarantee on the ordering of completion of executions. If it's important to the application, the application should enforce the ordering by ensuring that one execution completes before the next is scheduled (for example, by scheduling all executions synchronously within a single thread, or by scheduling all executions asynchronously and using ANeuralNetworksEvent_wait between calls to ANeuralNetworksExecution_startCompute); or by using ANeuralNetworksExecution_startComputeWithDependencies to make the execution wait for a list of events to be signaled before starting the actual evaluation.

Available since NNAPI feature level 1.

ANeuralNetworksMemory

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksMemory ANeuralNetworksMemory

ANeuralNetworksMemory is an opaque type that represents memory.

This type is used to represent shared memory, memory mapped files, and similar memories.

By using shared memory, a program can efficiently communicate to the runtime and drivers the tensors that define a model. See ANeuralNetworksModel_setOperandValueFromMemory. An application should typically create one shared memory object that contains every constant tensor needed to define a model. ANeuralNetworksMemory_createFromFd can be used to create shared memory from a file handle. ANeuralNetworksMemory_createFromAHardwareBuffer can be used to create shared memory from an AHardwareBuffer handle.

Memory objects can also be used to specify the input and output arguments of an execution. See ANeuralNetworksExecution_setInputFromMemory and ANeuralNetworksExecution_setOutputFromMemory.

When calling ANeuralNetworksModel_setOperandValueFromMemory, ANeuralNetworksExecution_setInputFromMemory and ANeuralNetworksExecution_setOutputFromMemory, each operand in the shared memory object must be aligned on a boundary of a byte size that is a multiple of the element type byte size, e.g., a tensor with ANEURALNETWORKS_TENSOR_FLOAT32 type must be aligned on 4-byte boundary.

It is the application's responsibility to ensure that there are no uses of the memory after calling ANeuralNetworksMemory_free. This includes any model which references this memory because of a call to ANeuralNetworksModel_setOperandValueFromMemory, any compilation created using such a model, any execution object or burst object created using such a compilation, or any execution which references this memory because of a call to ANeuralNetworksExecution_setInputFromMemory or ANeuralNetworksExecution_setOutputFromMemory.

Available since NNAPI feature level 1.

Starting at NNAPI feature level 4, the application may request creation of device native memory from ANeuralNetworksMemoryDesc to avoid potential memory copying and transformation overhead between executions. See also ANeuralNetworksMemoryDesc and ANeuralNetworksMemory_createFromDesc.

ANeuralNetworksMemoryDesc

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksMemoryDesc ANeuralNetworksMemoryDesc

ANeuralNetworksMemoryDesc is an opaque type that represents a memory descriptor.

A memory descriptor describes the properties of a memory object, and is used by ANeuralNetworksMemory_createFromDesc.

To use:

A memory descriptor is completed by calling ANeuralNetworksMemoryDesc_finish. A memory descriptor is destroyed by calling ANeuralNetworksMemoryDesc_free.

A memory descriptor must not be modified once ANeuralNetworksMemoryDesc_finish has been called on it.

It is the application's responsibility to make sure that only one thread modifies a memory descriptor at a given time. It is however safe for more than one thread to use the memory descriptor once ANeuralNetworksMemoryDesc_finish has returned.

It is also the application's responsibility to ensure that there are no other uses of the memory descriptor after calling ANeuralNetworksMemoryDesc_free. It is however safe to continue using a ANeuralNetworksMemory object created from the memory descriptor.

Available since NNAPI feature level 4.

ANeuralNetworksModel

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksModel ANeuralNetworksModel

ANeuralNetworksModel is an opaque type that contains a description of the mathematical operations that constitute the model.

Build the model by calling

This forms a graph in which each operation and operand is a node, a directed edge from an operand to an operation indicates that the operand is an input to the operation, and a directed edge from an operation to an operand indicates that the operand is an output from the operation. This graph must be acyclic.

A model is completed by calling ANeuralNetworksModel_finish. A model is destroyed by calling ANeuralNetworksModel_free.

A model cannot be modified once ANeuralNetworksModel_finish has been called on it.

It is the application's responsibility to make sure that only one thread modifies a model at a given time. It is however safe for more than one thread to use the model once ANeuralNetworksModel_finish has returned.

It is also the application's responsibility to ensure that there are no other uses of the model after calling ANeuralNetworksModel_free. This includes any compilation, execution object or burst object created using the model.

Available since NNAPI feature level 1.

ANeuralNetworksOperandType

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksOperandType ANeuralNetworksOperandType

ANeuralNetworksOperandType describes the type of an operand.

This structure is used to describe both scalars and tensors.

A tensor operand type with all dimensions specified is "fully specified". Whenever possible (i.e., whenever the dimensions are known at model construction time), a tensor operand type should be (but is not required to be) fully specified, in order to enable the best possible performance.

If a tensor operand's type is not fully specified, the dimensions of the operand are deduced from the operand types and values of the operation for which that operand is an output or from the corresponding ANEURALNETWORKS_IF or ANEURALNETWORKS_WHILE operation input operand type in the case of referenced model input operands.

In the following situations, a tensor operand type must be fully specified:

A tensor operand type of specified rank but some number of unspecified dimensions is represented by setting dimensionCount to the rank and each unspecified dimension to 0.

Available since NNAPI feature level 1.

Starting at NNAPI feature level 3, a tensor operand type of unspecified rank is represented by setting dimensionCount to 0 and dimensions to NULL (just as if it were a scalar operand type).

ANeuralNetworksOperationType

Declared in android/NeuralNetworksTypes.h
int32_t ANeuralNetworksOperationType

Aliasing to OperationCode, used in function ANeuralNetworksModel_addOperation.

ANeuralNetworksSymmPerChannelQuantParams

Declared in android/NeuralNetworksTypes.h
struct ANeuralNetworksSymmPerChannelQuantParams ANeuralNetworksSymmPerChannelQuantParams

Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand.

Functions

ANeuralNetworksBurst_create

Declared in android/NeuralNetworks.h
int ANeuralNetworksBurst_create(
  ANeuralNetworksCompilation *compilation,
  ANeuralNetworksBurst **burst
)

Create a ANeuralNetworksBurst to apply the given compilation.

This only creates the burst object. Computation is only performed once ANeuralNetworksExecution_burstCompute is invoked with a valid ANeuralNetworksExecution and ANeuralNetworksBurst.

The provided compilation must outlive the burst object.

Available since NNAPI feature level 3.

Details
Parameters
compilation
The ANeuralNetworksCompilation to be evaluated.
burst
The newly created object or NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the compilation is invalid.

ANeuralNetworksBurst_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksBurst_free(
  ANeuralNetworksBurst *burst
)

Destroys the burst object.

Available since NNAPI feature level 3.

Details
Parameters
burst
The burst object to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksCompilation_create

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_create(
  ANeuralNetworksModel *model,
  ANeuralNetworksCompilation **compilation
)

Create a ANeuralNetworksCompilation to compile the given model.

The model passed to this function is termed the "main model" of the compilation, to distinguish it from other models referred to by an Operand of type ANEURALNETWORKS_MODEL within this compilation.

This function only creates the object. Compilation is only performed once ANeuralNetworksCompilation_finish is invoked.

ANeuralNetworksCompilation_finish should be called once all desired properties have been set on the compilation.

ANeuralNetworksModel_free should be called once the compilation is no longer needed.

The provided model must outlive the compilation.

The model must already have been finished by a call to ANeuralNetworksModel_finish.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The ANeuralNetworksModel to be compiled.
compilation
The newly created object or NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the model is invalid.

ANeuralNetworksCompilation_createForDevices

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_createForDevices(
  ANeuralNetworksModel *model,
  const ANeuralNetworksDevice *const *devices,
  uint32_t numDevices,
  ANeuralNetworksCompilation **compilation
)

Create a ANeuralNetworksCompilation to compile the given model for a specified set of devices.

If more than one device is specified, the compilation will distribute the workload automatically across the devices. The model must be fully supported by the specified set of devices. This means that ANeuralNetworksModel_getSupportedOperationsForDevices() must have returned true for every operation for that model/devices pair.

The user must handle all compilation and execution failures from the specified set of devices. This is in contrast to a use of ANeuralNetworksCompilation_create, where the runtime will attempt to recover from such failures.

The model passed to this function is termed the "main model" of the compilation, to distinguish it from other models referred to by an Operand of type ANEURALNETWORKS_MODEL within this compilation.

Available since NNAPI feature level 3.

Details
Parameters
model
The ANeuralNetworksModel to be compiled.
devices
The set of devices. Must not contain duplicates.
numDevices
The number of devices in the set.
compilation
The newly created object or NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the model is invalid.

ANeuralNetworksCompilation_finish

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_finish(
  ANeuralNetworksCompilation *compilation
)

Indicate that we have finished modifying a compilation.

Required before calling ANeuralNetworksBurst_create or ANeuralNetworksExecution_create.

An application must ensure that no other thread uses the compilation at the same time.

This function must only be called once for a given compilation.

If ANeuralNetworksCompilation_setTimeout was called on this compilation, and the compilation is not able to be finished before the timeout duration is exceeded, then compilation may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
compilation
The compilation to be finished.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksCompilation_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksCompilation_free(
  ANeuralNetworksCompilation *compilation
)

Destroy a compilation.

The compilation need not have been finished by a call to ANeuralNetworksCompilation_finish.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
compilation
The compilation to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput(
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  uint32_t *alignment
)

Get the preferred buffer and memory alignment of an input to an execution created from a particular compilation.

The user may use the returned alignment value to guide the layout of the input buffer or memory pool. To achieve the best performance, make sure the address of the buffer passed in ANeuralNetworksExecution_setInput, or the offset value passed in ANeuralNetworksExecution_setInputFromMemory, is a multiple of the perferred alignment value of the same input. A driver may choose to allocate a separate buffer and do memory copying if the provided buffer or memory does not satisfy the preferred alignment.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 5.

Details
Parameters
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish.
index
The index of the input argument we are referencing from the compilation. It is an index into the inputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
alignment
The returned preferred alignment in bytes. It will be a power of 2.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if either compilation or alignment is NULL. ANEURALNETWORKS_BAD_STATE if the compilation has not been finished. ANEURALNETWORKS_BAD_DATA if the index is out of range.

ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput(
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  uint32_t *alignment
)

Get the preferred buffer and memory alignment of an output to an execution created from a particular compilation.

The user may use the returned alignment value to guide the layout of the output buffer or memory pool. To achieve the best performance, make sure the address of the buffer passed in ANeuralNetworksExecution_setOutput, or the offset value passed in ANeuralNetworksExecution_setOutputFromMemory, is a multiple of the perferred alignment value of the same output. A driver may choose to allocate a separate buffer and do memory copying if the provided buffer or memory does not satisfy the preferred alignment.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 5.

Details
Parameters
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish.
index
The index of the output argument we are referencing from the compilation. It is an index into the outputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
alignment
The returned perferred alignment in bytes. It will be a power of 2.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if either compilation or alignment is NULL. ANEURALNETWORKS_BAD_STATE if the compilation has not been finished. ANEURALNETWORKS_BAD_DATA if the index is out of range.

ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput(
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  uint32_t *padding
)

Get the preferred buffer and memory end padding of an input to an execution created from a particular compilation.

The user may use the returned padding value to guide the layout of the input buffer or memory pool. To achieve the best performance, make sure the length value passed in ANeuralNetworksExecution_setInput or ANeuralNetworksExecution_setInputFromMemory is greater than or equal to the raw size of the input (i.e. the size of an element multiplied by the number of elements) rounding up to a multiple of the perferred padding value of the same input. A driver may choose to allocate a separate buffer and do memory copying if the provided buffer or memory value does not satisfy the preferred padding.

See ANeuralNetworksCompilation for information on multithreaded usage. See ANeuralNetworksExecution_enableInputAndOutputPadding, ANeuralNetworksExecution_setInput, and ANeuralNetworksExecution_setInputFromMemory for information on passing input buffer or memory padding to the driver.

Available since NNAPI feature level 5.

Details
Parameters
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish.
index
The index of the input argument we are referencing from the compilation. It is an index into the inputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
padding
The returned preferred padding in bytes. It will be a power of 2.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if either compilation or padding is NULL. ANEURALNETWORKS_BAD_STATE if the compilation has not been finished. ANEURALNETWORKS_BAD_DATA if the index is out of range.

ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput(
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  uint32_t *padding
)

Get the preferred memory end padding of an output to an execution created from a particular compilation.

The user may use the returned padding value to guide the layout of the output buffer or memory pool. To achieve the best performance, make sure the length value passed in ANeuralNetworksExecution_setOutput or ANeuralNetworksExecution_setOutputFromMemory is greater than or equal to the raw size of the output (i.e. the size of an element multiplied by the number of elements) rounding up to a multiple of the perferred padding value of the same output. A driver may choose to allocate a separate buffer and do memory copying if the provided buffer or memory value does not satisfy the preferred padding.

See ANeuralNetworksCompilation for information on multithreaded usage. See ANeuralNetworksExecution_enableInputAndOutputPadding, ANeuralNetworksExecution_setOutput, and ANeuralNetworksExecution_setOutputFromMemory for information on passing output buffer or memory padding to the driver.

Available since NNAPI feature level 5.

Details
Parameters
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish.
index
The index of the output argument we are referencing from the compilation. It is an index into the outputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
padding
The returned perferred padding in bytes. It will be a power of 2.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if either compilation or padding is NULL. ANEURALNETWORKS_BAD_STATE if the compilation has not been finished. ANEURALNETWORKS_BAD_DATA if the index is out of range.

ANeuralNetworksCompilation_setCaching

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_setCaching(
  ANeuralNetworksCompilation *compilation,
  const char *cacheDir,
  const uint8_t *token
)

Sets the compilation caching signature and the cache directory.

Provides optional caching information to the runtime for faster repeated compilation.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 3.

Details
Parameters
compilation
The compilation to be modified.
cacheDir
The cache directory for the runtime to store and retrieve caching data. It is recommended to use the code cache directory provided by the Android runtime. If not using the code cache directory, the user should choose a directory local to the application, and is responsible for managing the cache entries.
token
The token provided by the user to specify a model must be of length ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should ensure that the token is unique to a model within the application. The NNAPI runtime cannot detect token collisions; a collision will result in a failed execution or in a successful execution that produces incorrect output values.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksCompilation_setPreference

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_setPreference(
  ANeuralNetworksCompilation *compilation,
  int32_t preference
)

Sets the execution preference.

Provides guidance to the runtime when trade-offs are possible. By default the runtime uses PREFER_SINGLE_FAST_ANSWER

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
compilation
The compilation to be modified.
preference
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksCompilation_setPriority

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_setPriority(
  ANeuralNetworksCompilation *compilation,
  int priority
)

Set the execution priority.

Execution priorities are relative to other executions created by the same application (specifically same uid) for the same device. Specifically, priorities of executions from one application will not affect executions from another application. Similarly, priorities of executions on one device will not affect executions on another device.

Higher priority executions may use more compute resources than lower priority executions, and may preempt or starve lower priority executions.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
compilation
The compilation to be modified.
priority
The relative priority of the execution compared to other executions created by the application. Must be one of ANEURALNETWORKS_PRIORITY_*.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksCompilation_setTimeout

Declared in android/NeuralNetworks.h
int ANeuralNetworksCompilation_setTimeout(
  ANeuralNetworksCompilation *compilation,
  uint64_t duration
)

Set the maximum expected duration for compiling the model.

If the device is not able to complete the compilation within the specified duration, the compilation may be aborted. The timeout duration begins at the call to ANeuralNetworksCompilation_finish.

This timeout duration acts as a hint to drivers, and can be used to both free up compute resources within the driver and return control back to the application quicker than is possible without the hint. It enables drivers that are able to estimate how long a compilation will take to abort the compilation before it has even started if the driver believes the compilation cannot be completed within the timeout duration. Similarly, it enables drivers to abort an ongoing compilation if it is taking too long. However, this call does not guarantee that the compilation will complete or abort within the timeout duration.

By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called), the timeout duration for compiling the model is considered infinite.

The ANeuralNetworksCompilation must have been created with ANeuralNetworksCompilation_createForDevices with numDevices = 1, otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_4, then the timeout duration hint will be ignored.

See ANeuralNetworksCompilation for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
compilation
The compilation to be modified.
duration
The maximum amount of time in nanoseconds that is expected to be spent finishing a compilation. If this duration is exceeded, the compilation may be aborted. If set to 0, the timeout duration is considered infinite.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksDevice_getFeatureLevel

Declared in android/NeuralNetworks.h
int ANeuralNetworksDevice_getFeatureLevel(
  const ANeuralNetworksDevice *device,
  int64_t *featureLevel
)

Get the NNAPI feature level of the specified NNAPI device.

Each device has a supported feature level, which is the most advanced NNAPI specification and features this driver implements. For example, if the driver implements the features introduced in ANEURALNETWORKS_FEATURE_LEVEL_2, but does not implement the features introduced after ANEURALNETWORKS_FEATURE_LEVEL_2, the value would be ANEURALNETWORKS_FEATURE_LEVEL_2. Developers could decide whether or not the specified device should be used for a model that has certain feature requirements.

NNAPI device feature level is closely related to NNAPI runtime feature level (ANeuralNetworks_getRuntimeFeatureLevel), which indicates an NNAPI runtime feature level (the most advanced NNAPI specification and features that the runtime implements). An NNAPI device feature level is always less than or equal to the runtime feature level.

This function produces a FeatureLevelCode enum value, NOT an Android API level.

Available since NNAPI feature level 3.

Details
Parameters
device
The representation of the specified device.
featureLevel
FeatureLevelCode of the most advanced feature this driver implements.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksDevice_getName

Declared in android/NeuralNetworks.h
int ANeuralNetworksDevice_getName(
  const ANeuralNetworksDevice *device,
  const char **name
)

Get the name of the specified device.

Available since NNAPI feature level 3.

Details
Parameters
device
The representation of the specified device.
name
The returned name of the specified device. The name will be in UTF-8 and will be null-terminated. It will be recognizable as a known device name rather than a cryptic string. For devices with feature level reported by ANeuralNetworksDevice_getFeatureLevel that is ANEURALNETWORKS_FEATURE_LEVEL_3 and higher, the format of the name is {VENDOR}-{DEVICE}. For devices with feature level ANEURALNETWORKS_FEATURE_LEVEL_2 or lower, the format of the name is undefined. The name will remain valid for the duration of the application.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksDevice_getType

Declared in android/NeuralNetworks.h
int ANeuralNetworksDevice_getType(
  const ANeuralNetworksDevice *device,
  int32_t *type
)

Get the type of a given device.

The device type can be used to help application developers to distribute Machine Learning workloads and other workloads such as graphical rendering. E.g., for an app which renders AR scenes based on real time object detection results, the developer could choose an ACCELERATOR type device for ML workloads, and reserve GPU for graphical rendering.

Available since NNAPI feature level 3.

Details
Parameters
device
The representation of the specified device.
type
The returned DeviceTypeCode of the specified device.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksDevice_getVersion

Declared in android/NeuralNetworks.h
int ANeuralNetworksDevice_getVersion(
  const ANeuralNetworksDevice *device,
  const char **version
)

Get the version of the driver implementation of the specified device.

It’s the responsibility of the driver implementor to insure that this version string uniquely distinguishes this implementation from all previous implementations.

This version string must not be confused with the feature level which is solely defined by ANeuralNetworksDevice_getFeatureLevel. There is no implicit ordering of the versions. For example, it is not possible to filter all drivers older than a certain version.

Application developers may use this version string to avoid or prefer specific driver implementations. For example, an application may want to do so because:

  • A specific version of the driver does not provide the required performance, perhaps because of a performance regression.
  • A specific version of the driver has a bug or returns results that don’t match the minimum precision requirement for the application.

Available since NNAPI feature level 3.

Details
Parameters
device
The representation of the specified device.
version
The returned version string of the driver for the specified device. The string will be in UTF-8 and will be null-terminated. For devices with feature level 28 or lower, "UNKNOWN" will be returned. The version string will remain valid for the duration of the application.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksDevice_wait

Declared in android/NeuralNetworks.h
int ANeuralNetworksDevice_wait(
  const ANeuralNetworksDevice *device
)

Wait until the device is in a live state.

A device may encounter internal errors and temporarily enter a dead state. A call that uses a device in such a state will return with the error ANEURALNETWORKS_DEAD_OBJECT. ANeuralNetworksDevice_wait will block until the device is in a live state.

Available since NNAPI feature level 4.

Details
Parameters
device
The representation of the specified device.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksEvent_createFromSyncFenceFd

Declared in android/NeuralNetworks.h
int ANeuralNetworksEvent_createFromSyncFenceFd(
  int sync_fence_fd,
  ANeuralNetworksEvent **event
)

Create a ANeuralNetworksEvent from a sync_fence file descriptor.

The newly created ANeuralNetworksEvent does not take ownership of the provided sync_fence_fd, it will instead dup the provided sync_fence_fd and own the duplicate.

Available since NNAPI feature level 4.

Details
Parameters
sync_fence_fd
The sync_fence file descriptor.
event
The newly created object or NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksEvent_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksEvent_free(
  ANeuralNetworksEvent *event
)

Destroys the event.

See ANeuralNetworksExecution for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
event
The event object to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksEvent_getSyncFenceFd

Declared in android/NeuralNetworks.h
int ANeuralNetworksEvent_getSyncFenceFd(
  const ANeuralNetworksEvent *event,
  int *sync_fence_fd
)

Get sync_fence file descriptor from the event.

If the ANeuralNetworksEvent is not backed by a sync fence, the sync_fence_fd will be set to -1, and ANEURALNETWORKS_BAD_DATA will be returned.

See ANeuralNetworksEvent_createFromSyncFenceFd and ANeuralNetworksExecution_startComputeWithDependencies to see how to create an event backed by a sync fence.

The user takes ownership of the returned fd, and must close the returned file descriptor when it is no longer needed.

Available since NNAPI feature level 4.

Details
Parameters
event
An event that is backed by a sync fence.
sync_fence_fd
The sync_fence file descriptor. The file descriptor will be set to -1 if there is an error.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksEvent_wait

Declared in android/NeuralNetworks.h
int ANeuralNetworksEvent_wait(
  ANeuralNetworksEvent *event
)

Waits until the execution completes.

More than one thread can wait on an event. When the execution completes, all threads will be released.

If ANeuralNetworksExecution_setTimeout was called on the execution corresponding to this event, and the execution is not able to complete before the duration is exceeded, the execution may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned here.

If the execution contains a ANEURALNETWORKS_WHILE operation, and the condition model does not output false within the loop timeout duration, the execution will be aborted, and ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned here.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
event
The event that will be signaled on completion.
Returns
ANEURALNETWORKS_NO_ERROR if the execution completed normally. ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot be properly mapped.

ANeuralNetworksExecution_burstCompute

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_burstCompute(
  ANeuralNetworksExecution *execution,
  ANeuralNetworksBurst *burst
)

Schedule synchronous evaluation of the execution on a burst object.

Schedules synchronous evaluation of the execution. Returns once the execution has completed and the outputs are ready to be consumed.

If ANeuralNetworksExecution_setTimeout was called on the execution, and the execution is not able to complete before the timeout duration is exceeded, then execution may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned.

If the execution contains a ANEURALNETWORKS_WHILE operation, and the condition model does not output false within the loop timeout duration, then execution will be aborted and ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_4, then the timeout duration hint will be ignored.

There must be at most one ANeuralNetworksExecution processing at any given time for any given burst object. Any ANeuralNetworksExecution launched before the previous has finished will result in ANEURALNETWORKS_BAD_STATE.

Before NNAPI feature level 5, this function may only be invoked when the execution is in the preparation state. Starting at NNAPI feature level 5, if the user sets the execution to be reusable by ANeuralNetworksExecution_setReusable, this function may also be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

See ANeuralNetworksExecution_compute for synchronous execution. See ANeuralNetworksExecution_startCompute for regular asynchronous execution. See ANeuralNetworksExecution_startComputeWithDependencies for asynchronous execution with dependencies.

Available since NNAPI feature level 3.

Details
Parameters
burst
The burst object to execute on.
execution
The execution to be scheduled and executed. The execution must be created from the same ANeuralNetworksCompilation as the burst object.
Returns
ANEURALNETWORKS_NO_ERROR if the execution completed normally.

ANeuralNetworksExecution_compute

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_compute(
  ANeuralNetworksExecution *execution
)

Schedule synchronous evaluation of the execution.

Schedules synchronous evaluation of the execution. Returns once the execution has completed and the outputs are ready to be consumed.

If ANeuralNetworksExecution_setTimeout was called on this execution, and the execution is not able to complete before the timeout duration is exceeded, then execution may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than 30, then the timeout duration hint will be ignored.

If this execution contains a ANEURALNETWORKS_WHILE operation, and the condition model does not output false within the loop timeout duration, then execution will be aborted and ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned.

Before NNAPI feature level 5, this function may only be invoked when the execution is in the preparation state. Starting at NNAPI feature level 5, if the user sets the execution to be reusable by ANeuralNetworksExecution_setReusable, this function may also be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

See ANeuralNetworksExecution_burstCompute for burst synchronous execution. See ANeuralNetworksExecution_startCompute for regular asynchronous execution. See ANeuralNetworksExecution_startComputeWithDependencies for asynchronous execution with dependencies.

Available since NNAPI feature level 3.

Details
Parameters
execution
The execution to be scheduled and executed.
Returns
ANEURALNETWORKS_NO_ERROR if the execution completed normally. ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot be properly mapped.

ANeuralNetworksExecution_create

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_create(
  ANeuralNetworksCompilation *compilation,
  ANeuralNetworksExecution **execution
)

Create a ANeuralNetworksExecution to apply the given compilation.

This only creates the object. Computation is only performed once ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies is invoked.

The provided compilation must outlive the execution.

See ANeuralNetworksExecution for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
compilation
The ANeuralNetworksCompilation to be evaluated.
execution
The newly created object or NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the compilation is invalid.

ANeuralNetworksExecution_enableInputAndOutputPadding

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_enableInputAndOutputPadding(
  ANeuralNetworksExecution *execution,
  bool enable
)

Specifies whether the ANeuralNetworksExecution is able to accept padded input and output buffers and memory objects.

By default, the input and output buffers and memory objects of ANeuralNetworksExecution do not allow padding.

Setting the execution to accept padded input and output buffers and memory objects enables the length argument of ANeuralNetworksExecution_setInput, ANeuralNetworksExecution_setInputFromMemory, ANeuralNetworksExecution_setOutput, and ANeuralNetworksExecution_setOutputFromMemory to be greater than the raw size of the operand (i.e. the size of an element multiplied by the number of elements). The extra bytes at the end of the buffer or memory region may be used by the driver to access data in chunks, for efficiency.

This method must not be called after ANeuralNetworksExecution_setInput, ANeuralNetworksExecution_setInputFromMemory, ANeuralNetworksExecution_setOutput, or ANeuralNetworksExecution_setOutputFromMemory.

See ANeuralNetworksExecution for information on multithreaded usage.

Available since NNAPI feature level 5.

Details
Parameters
execution
The execution to be modified.
enable
'true' if the execution is to be able to accept padded input and output buffers and memory objects, 'false' if not.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL. ANEURALNETWORKS_BAD_STATE if ANeuralNetworksExecution_setInput, ANeuralNetworksExecution_setInputFromMemory, ANeuralNetworksExecution_setOutput, or ANeuralNetworksExecution_setOutputFromMemory has been called on the execution.

ANeuralNetworksExecution_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksExecution_free(
  ANeuralNetworksExecution *execution
)

Destroy an execution.

The execution need not have been scheduled by a call to ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies; but if it has been scheduled, then the application must not call ANeuralNetworksExecution_free until the execution has completed (i.e., ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, or ANeuralNetworksEvent_wait has returned).

See ANeuralNetworksExecution for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksExecution_getDuration

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_getDuration(
  const ANeuralNetworksExecution *execution,
  int32_t durationCode,
  uint64_t *duration
)

Get the time spent in the latest computation evaluated on the specified ANeuralNetworksExecution, in nanoseconds.

This function may only be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states.

Available since NNAPI feature level 3.

Details
Parameters
execution
The execution to be queried.
durationCode
The measurement to be queried, specified by DurationCode.
duration
The returned duration. If no measurement was requested by ANeuralNetworksExecution_setMeasureTiming, if the device is has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_3, or for some other reason the duration is not available, UINT64_MAX will be returned. A particular device need not support any given measurement.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksExecution_getOutputOperandDimensions

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_getOutputOperandDimensions(
  ANeuralNetworksExecution *execution,
  int32_t index,
  uint32_t *dimensions
)

Get the dimensional information of the specified output operand of the model of the latest computation evaluated on ANeuralNetworksExecution.

The target output operand cannot be a scalar.

This function may only be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states.

Available since NNAPI feature level 3.

Details
Parameters
execution
The execution to be queried.
index
The index of the output argument we are querying. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
dimensions
The dimension array to be filled. The size of the array must be exactly as large as the rank of the output operand to be queried in the model.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index is invalid or if the target is a scalar.

ANeuralNetworksExecution_getOutputOperandRank

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_getOutputOperandRank(
  ANeuralNetworksExecution *execution,
  int32_t index,
  uint32_t *rank
)

Get the dimensional information of the specified output operand of the model of the latest computation evaluated on ANeuralNetworksExecution.

This function may only be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states.

Available since NNAPI feature level 3.

Details
Parameters
execution
The execution to be queried.
index
The index of the output argument we are querying. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
rank
The rank of the output operand.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index is invalid.

ANeuralNetworksExecution_setInput

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setInput(
  ANeuralNetworksExecution *execution,
  int32_t index,
  const ANeuralNetworksOperandType *type,
  const void *buffer,
  size_t length
)

Associate a user buffer with an input of the model of the ANeuralNetworksExecution.

Evaluation of the execution must not have been scheduled. Once evaluation of the execution has been scheduled, the application must not change the content of the buffer until the execution has completed. Evaluation of the execution will not change the content of the buffer.

The provided buffer must outlive the execution.

If the input is optional, you can indicate that it is omitted by passing nullptr for buffer and 0 for length.

Otherwise, if the user has not set the execution to accept padded input buffers by calling ANeuralNetworksExecution_enableInputAndOutputPadding, then the length argument must be equal to the raw size of the input (i.e. the size of an element multiplied by the number of elements). Passing a length argument with value not equal to the raw size of the input will result in ANEURALNETWORKS_BAD_DATA.

Otherwise, if the user has set the execution to accept padded input buffers by calling ANeuralNetworksExecution_enableInputAndOutputPadding, the length argument may be greater than the raw size of the input, and the extra bytes at the end of the buffer may be used by the driver to access data in chunks, for efficiency. Passing a length argument with value less than the raw size of the input will result in ANEURALNETWORKS_BAD_DATA.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage. See ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput and ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput for information on getting preferred buffer alignment and padding, to improve performance.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be modified.
index
The index of the input argument we are setting. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
type
The ANeuralNetworksOperandType of the operand. Unless the input is omitted, this should be used to specify the dimensions that were left unspecified when the operand was added to the model. All other properties of the type must be the same as specified in the model. If the type is the same as specified when the model was built, NULL can be passed. Neither the ANeuralNetworksOperandType nor the dimensions it points to need to outlive the call to ANeuralNetworksExecution_setInput.
buffer
The buffer containing the data.
length
The size of the data value in bytes plus any end padding.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the name is not recognized or the buffer is too small for the input.

ANeuralNetworksExecution_setInputFromMemory

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setInputFromMemory(
  ANeuralNetworksExecution *execution,
  int32_t index,
  const ANeuralNetworksOperandType *type,
  const ANeuralNetworksMemory *memory,
  size_t offset,
  size_t length
)

Associate a region of a memory object with an input of the model of the ANeuralNetworksExecution.

Evaluation of the execution must not have been scheduled. Once evaluation of the execution has been scheduled, the application must not change the content of the region until the execution has completed. Evaluation of the execution will not change the content of the region.

The provided memory must outlive the execution.

If the input is optional, you can indicate that it is omitted by using ANeuralNetworksExecution_setInput instead, passing nullptr for buffer and 0 for length.

If the memory is an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB created from ANeuralNetworksMemory_createFromAHardwareBuffer, or an opaque memory object created from ANeuralNetworksMemory_createFromDesc, both offset and length must be 0, indicating the whole memory is used.

Otherwise, if the user has not set the execution to accept padded input memory objects by calling ANeuralNetworksExecution_enableInputAndOutputPadding, then the length argument must be equal to the raw size of the input (i.e. the size of an element multiplied by the number of elements). Passing a length argument with value not equal to the raw size of the input will result in ANEURALNETWORKS_BAD_DATA.

Otherwise, if the user has set the execution to accept padded input memory objects by calling ANeuralNetworksExecution_enableInputAndOutputPadding, the length argument may be greater than the raw size of the input, and the extra bytes at the end of the memory region may be used by the driver to access data in chunks, for efficiency. Passing a length argument with value less than the raw size of the input will result in ANEURALNETWORKS_BAD_DATA.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage. See ANeuralNetworksMemory_createFromAHardwareBuffer for information on AHardwareBuffer usage. See ANeuralNetworksMemory_createFromDesc for information on usage of memory objects created from memory descriptors. See ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput and ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput for information on getting preferred memory alignment and padding, to improve performance.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be modified.
index
The index of the input argument we are setting. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
type
The ANeuralNetworksOperandType of the operand. This should be used to specify the dimensions that were left unspecified when the operand was added to the model. All other properties of the type must be the same as specified in the model. If the type is the same as specified when the model was built, NULL can be passed. Neither the ANeuralNetworksOperandType nor the dimensions it points to need to outlive the call to ANeuralNetworksExecution_setInputFromMemory.
memory
The memory containing the data.
offset
This specifies the location of the data within the memory. The offset is in bytes from the start of memory.
length
The size of the data value in bytes plus any end padding.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the name is not recognized or the buffer is too small for the input.

ANeuralNetworksExecution_setLoopTimeout

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setLoopTimeout(
  ANeuralNetworksExecution *execution,
  uint64_t duration
)

Set the maximum duration of WHILE loops in the specified execution.

This is a fuzzy per-loop timeout intended to prevent infinite loops.

If a WHILE loop condition model does not output false within the specified duration, the execution will be aborted.

See ANeuralNetworks_getDefaultLoopTimeout and ANeuralNetworks_getMaximumLoopTimeout for the default and maximum timeout values.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
execution
The execution to be modified.
duration
The maximum amount of time in nanoseconds that can be spent executing a WHILE loop. If the specified duration value exceeds the value produced by ANeuralNetworks_getMaximumLoopTimeout, it will be overridden by that value.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_BAD_STATE if execution has started. ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.

ANeuralNetworksExecution_setMeasureTiming

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setMeasureTiming(
  ANeuralNetworksExecution *execution,
  bool measure
)

Specifies whether duration of the ANeuralNetworksExecution is to be measured.

Evaluation of the execution must not have been scheduled.

By default, duration is not measured.

The ANeuralNetworksExecution must have been created from an ANeuralNetworksCompilation which in turn was created from ANeuralNetworksCompilation_createForDevices with numDevices = 1. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_3, then the duration will not be measured.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

Available since NNAPI feature level 3.

Details
Parameters
execution
The execution to be modified.
measure
'true' if duration is to be measured, 'false' if not.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksExecution_setOutput

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setOutput(
  ANeuralNetworksExecution *execution,
  int32_t index,
  const ANeuralNetworksOperandType *type,
  void *buffer,
  size_t length
)

Associate a user buffer with an output of the model of the ANeuralNetworksExecution.

Evaluation of the execution must not have been scheduled. Once evaluation of the execution has been scheduled, the application must not change the content of the buffer until the execution has completed.

The provided buffer must outlive the execution.

If the output is optional, you can indicate that it is omitted by passing nullptr for buffer and 0 for length.

Otherwise, if the user has not set the execution to accept padded output buffers by calling ANeuralNetworksExecution_enableInputAndOutputPadding, then the length argument must be equal to the raw size of the output (i.e. the size of an element multiplied by the number of elements). Passing a length argument with value not equal to the raw size of the output will result in ANEURALNETWORKS_BAD_DATA.

Otherwise, if the user has set the execution to accept padded output buffers by calling ANeuralNetworksExecution_enableInputAndOutputPadding, the length argument may be greater than the raw size of the output, and the extra bytes at the end of the buffer may be used by the driver to access data in chunks, for efficiency. Passing a length argument with value less than the raw size of the output will result in ANEURALNETWORKS_BAD_DATA.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage. See ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput and ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput for information on getting preferred buffer alignment and padding, to improve performance.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be modified.
index
The index of the output argument we are setting. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
type
The ANeuralNetworksOperandType of the operand. Unless the output is omitted, this should be used to specify the dimensions that were left unspecified when the operand was added to the model. All other properties of the type must be the same as specified in the model. If the type is the same as specified when the model was built, NULL can be passed. Neither the ANeuralNetworksOperandType nor the dimensions it points to need to outlive the call to ANeuralNetworksExecution_setOutput. Since NNAPI feature level 3, the output operand can have unspecified dimensions or rank to be deduced dynamically during the execution. However, the user must provide a large enough buffer. The user can retrieve the output dimensional information after the execution by ANeuralNetworksExecution_getOutputOperandRank and ANeuralNetworksExecution_getOutputOperandDimensions.
buffer
The buffer where the data is to be written.
length
The size of the data value in bytes plus any end padding.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the name is not recognized or the buffer is too small for the output.

ANeuralNetworksExecution_setOutputFromMemory

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setOutputFromMemory(
  ANeuralNetworksExecution *execution,
  int32_t index,
  const ANeuralNetworksOperandType *type,
  const ANeuralNetworksMemory *memory,
  size_t offset,
  size_t length
)

Associate a region of a memory object with an output of the model of the ANeuralNetworksExecution.

Evaluation of the execution must not have been scheduled. Once evaluation of the execution has been scheduled, the application must not change the content of the region until the execution has completed.

The provided memory must outlive the execution.

If the output is optional, you can indicate that it is omitted by using ANeuralNetworksExecution_setOutput instead, passing nullptr for buffer and 0 for length.

If the memory is an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB created from ANeuralNetworksMemory_createFromAHardwareBuffer, or an opaque memory object created from ANeuralNetworksMemory_createFromDesc, both offset and length must be 0, indicating the whole memory is used.

Otherwise, if the user has not set the execution to accept padded output memory objects by calling ANeuralNetworksExecution_enableInputAndOutputPadding, then the length argument must be equal to the raw size of the output (i.e. the size of an element multiplied by the number of elements). Passing a length argument with value not equal to the raw size of the output will result in ANEURALNETWORKS_BAD_DATA.

Otherwise, if the user has set the execution to accept padded output memory objects by calling ANeuralNetworksExecution_enableInputAndOutputPadding, the length argument may be greater than the raw size of the output, and the extra bytes at the end of the memory region may be used by the driver to access data in chunks, for efficiency. Passing a length argument with value less than the raw size of the output will result in ANEURALNETWORKS_BAD_DATA.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage. See ANeuralNetworksMemory_createFromAHardwareBuffer for information on AHardwareBuffer usage. See ANeuralNetworksMemory_createFromDesc for information on usage of memory objects created from memory descriptors. See ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput and ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput for information on getting preferred memory alignment and padding, to improve performance.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be modified.
index
The index of the output argument we are setting. It is an index into the lists passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
type
The ANeuralNetworksOperandType of the operand. This should be used to specify the dimensions that were left unspecified when the operand was added to the model. All other properties of the type must be the same as specified in the model. If the type is the same as specified when the model was built, NULL can be passed. Neither the ANeuralNetworksOperandType nor the dimensions it points to need to outlive the call to ANeuralNetworksExecution_setOutputFromMemory. Since NNAPI feature level 3, the output operand can have unspecified dimensions or rank to be deduced dynamically during the execution. However, the user must provide a large enough memory. The user can retrieve the output dimensional information after the execution by ANeuralNetworksExecution_getOutputOperandRank and ANeuralNetworksExecution_getOutputOperandDimensions.
memory
The memory where the data is to be stored.
offset
This specifies the location of the data within the memory. The offset is in bytes from the start of memory.
length
The size of the data value in bytes plus any end padding.
Returns
ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the name is not recognized or the buffer is too small for the output.

ANeuralNetworksExecution_setReusable

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setReusable(
  ANeuralNetworksExecution *execution,
  bool reusable
)

Specifies whether the ANeuralNetworksExecution can be reused for multiple computations.

By default, the ANeuralNetworksExecution is not reusable.

Setting the execution to be reusable enables multiple computations to be scheduled and evaluated on the same execution sequentially, either by means of ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies: The application may schedule and evaluate a computation again from the completed state of a reusable execution.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

Available since NNAPI feature level 5.

Details
Parameters
execution
The execution to be modified.
reusable
'true' if the execution is to be reusable, 'false' if not.
Returns
ANEURALNETWORKS_NO_ERROR if successful. ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL. ANEURALNETWORKS_BAD_STATE if the execution is not in the preparation state.

ANeuralNetworksExecution_setTimeout

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_setTimeout(
  ANeuralNetworksExecution *execution,
  uint64_t duration
)

Set the maximum expected duration of the specified execution.

If the device is not able to complete the execution within the specified duration, the execution may be aborted. The timeout duration begins at a call to one of:

This timeout duration acts as a hint to drivers, and can be used to both free up compute resources within the driver and return control back to the application quicker than is possible without the hint. It enables drivers that are able to estimate how long an execution will take to abort the execution before it has even started if the driver believes the execution cannot be completed within the timeout duration. Similarly, it enables drivers to abort an ongoing execution if it is taking too long. However, this call does not guarantee that the execution will complete or abort within the timeout duration.

By default (i.e., unless ANeuralNetworksExecution_setTimeout is called), the timeout duration for execution is considered infinite.

The ANeuralNetworksExecution must have been created from an ANeuralNetworksCompilation which in turn was created from ANeuralNetworksCompilation_createForDevices with numDevices = 1, otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_4, then the timeout duration hint will be ignored.

This function may only be invoked when the execution is in the preparation state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
execution
The execution to be modified.
duration
The maximum amount of time in nanoseconds that is expected to be spent executing a model. If this duration is exceeded, the execution may be aborted. If set to 0, the timeout duration is considered infinite.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksExecution_startCompute

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_startCompute(
  ANeuralNetworksExecution *execution,
  ANeuralNetworksEvent **event
)

Schedule asynchronous evaluation of the execution.

Schedules asynchronous evaluation of the execution. Once the execution has completed and the outputs are ready to be consumed, the returned event will be signaled. Use ANeuralNetworksEvent_wait to wait for that event.

ANeuralNetworksEvent_wait must be called to recuperate the resources used by the execution.

If ANeuralNetworksExecution_setTimeout was called on this execution, and the execution is not able to complete before the timeout duration is exceeded, then execution may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned through ANeuralNetworksExecution_startCompute or ANeuralNetworksEvent_wait on the event object. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_4, then the timeout duration hint will be ignored.

If this execution contains a ANEURALNETWORKS_WHILE operation, and the condition model does not output false within the loop timeout duration, then execution will be aborted and ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned through ANeuralNetworksEvent_wait on the event object.

If the device can detect before the execution has started that the execution will not complete within the timeout duration, the device may choose to skip the execution and instead return ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode.

Before NNAPI feature level 5, this function may only be invoked when the execution is in the preparation state. Starting at NNAPI feature level 5, if the user sets the execution to be reusable by ANeuralNetworksExecution_setReusable, this function may also be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

See ANeuralNetworksExecution_compute for synchronous execution. See ANeuralNetworksExecution_burstCompute for burst synchronous execution. See ANeuralNetworksExecution_startComputeWithDependencies for asynchronous execution with dependencies.

Available since NNAPI feature level 1.

Details
Parameters
execution
The execution to be scheduled and executed.
event
The event that will be signaled on completion. event is set to NULL if there's an error.
Returns
ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.

ANeuralNetworksExecution_startComputeWithDependencies

Declared in android/NeuralNetworks.h
int ANeuralNetworksExecution_startComputeWithDependencies(
  ANeuralNetworksExecution *execution,
  const ANeuralNetworksEvent *const *dependencies,
  uint32_t num_dependencies,
  uint64_t duration,
  ANeuralNetworksEvent **event
)

Schedule asynchronous evaluation of the execution with dependencies.

The execution will wait for all the depending events to be signaled before starting the evaluation. Once the execution has completed and the outputs are ready to be consumed, the returned event will be signaled. Depending on which devices are handling the execution, the event could be backed by a sync fence. Use ANeuralNetworksEvent_wait to wait for that event.

ANeuralNetworksEvent_wait must be called to recurperate the resources used by the execution.

If parts of the execution are scheduled on devices that do not support fenced execution, the function call may wait for such parts to finish before returning.

The function will return an error if any of the events in dependencies is already in a bad state. After the execution is scheduled, if any of the events in dependencies does not complete normally, the execution will fail, and ANeuralNetworksEvent_wait on the returned event will return an error.

The function will return an error if any of the execution outputs has a tensor operand type that is not fully specified.

The function can be passed a timeout duration in nanoseconds. This timeout duration acts as a hint to drivers in the same way that the timeout durations in ANeuralNetworksCompilation_setTimeout and ANeuralNetworksExecution_setTimeout act as hints to drivers. The duration begins when all waitFor sync fences have been signaled, and can be used together with ANeuralNetworksExecution_setTimeout which specifies the maximum timeout duration beginning at the call to ANeuralNetworksExecution_startComputeWithDependencies. If the duration is non-zero, the ANeuralNetworksExecution must have been created from an ANeuralNetworksCompilation which in turn was created from ANeuralNetworksCompilation_createForDevices with numDevices = 1, otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If either the timeout duration from ANeuralNetworksExecution_setTimeout or the timeout duration passed to this call is exceeded, the execution may be aborted, in which case ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned through ANeuralNetworksExecution_startComputeWithDependencies or ANeuralNetworksEvent_wait on the event object. If the device has a feature level reported by ANeuralNetworksDevice_getFeatureLevel that is lower than ANEURALNETWORKS_FEATURE_LEVEL_4, then the timeout duration hints will be ignored.

If this execution contains a ANEURALNETWORKS_WHILE operation, and the condition model does not output false within the loop timeout duration, then execution will be aborted and ANEURALNETWORKS_MISSED_DEADLINE_* ResultCode will be returned through ANeuralNetworksEvent_wait on the event object.

Before NNAPI feature level 5, this function may only be invoked when the execution is in the preparation state. Starting at NNAPI feature level 5, if the user sets the execution to be reusable by ANeuralNetworksExecution_setReusable, this function may also be invoked when the execution is in the completed state.

See ANeuralNetworksExecution for information on execution states and multithreaded usage.

See ANeuralNetworksExecution_compute for synchronous execution. See ANeuralNetworksExecution_burstCompute for burst synchronous execution. See ANeuralNetworksExecution_startCompute for regular asynchronous execution.

Available since NNAPI feature level 4.

Details
Parameters
execution
The execution to be scheduled and executed.
dependencies
A set of depending events. The actual evaluation will not start until all the events are signaled.
num_dependencies
The number of events in the dependencies set.
duration
The maximum amount of time in nanoseconds that is expected to be spent executing the model after all dependencies are signaled. If set to 0, the timeout duration is considered infinite.
event
The event that will be signaled on completion. event is set to NULL if there's an error.
Returns
ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.

ANeuralNetworksMemoryDesc_addInputRole

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemoryDesc_addInputRole(
  ANeuralNetworksMemoryDesc *desc,
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  float frequency
)

Specify that a memory object will be playing the role of an input to an execution created from a particular compilation.

The compilation and the input index fully specify an input operand. This function may be invoked multiple times on the same memory descriptor with different input operands, and the same input operand may be specified on multiple memory descriptors. However, specifying the same input operand on the same memory descriptor more than once will return an error.

The dimensions of the corresponding model operands of all the roles specified by ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole must be compatible with each other. Two dimensions are incompatible if both ranks are fully specified but have different values, or if there is at least one axis that is fully specified in both but has different values.

At least one of ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole must be called on a memory descriptor before invoking ANeuralNetworksMemoryDesc_finish.

Attempting to modify a memory descriptor once ANeuralNetworksMemoryDesc_finish has been called will return an error.

See ANeuralNetworksMemoryDesc for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor to be modified.
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish, and must outlive the memory descriptor.
index
The index of the input argument we are referencing from the compilation. It is an index into the inputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
frequency
A floating-point value within the range (0.0, 1.0]. Describes how likely the memory is to be used in the specified role. This is provided as a hint to optimize the case when different roles prefer different memory locations or data layouts.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemoryDesc_addOutputRole

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemoryDesc_addOutputRole(
  ANeuralNetworksMemoryDesc *desc,
  const ANeuralNetworksCompilation *compilation,
  uint32_t index,
  float frequency
)

Specify that a memory object will be playing the role of an output to an execution created from a particular compilation.

The compilation and the output index fully specify an output operand. This function may be invoked multiple times on the same memory descriptor with different output operands, and the same output operand may be specified on multiple memory descriptors. However, specifying the same output operand on the same memory descriptor object more than once will return an error.

The dimensions of the corresponding model operands of all the roles specified by ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole must be compatible with each other. Two dimensions are incompatible if both ranks are fully specified but have different values, or if there is at least one axis that is fully specified in both but has different values.

At least one of ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole must be called on the memory descriptor before invoking ANeuralNetworksMemoryDesc_finish.

Attempting to modify a memory descriptor once ANeuralNetworksMemoryDesc_finish has been called will return an error.

See ANeuralNetworksMemoryDesc for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor to be modified.
compilation
The compilation object. It must already have been finished by calling ANeuralNetworksCompilation_finish, and must outlive the memory descriptor.
index
The index of the output argument we are referencing from the compilation. It is an index into the outputs list passed to ANeuralNetworksModel_identifyInputsAndOutputs. It is not the index associated with ANeuralNetworksModel_addOperand.
frequency
A floating-point value within the range (0.0, 1.0]. Describes how likely the memory is to be used in the specified role. This is provided as a hint to optimize the case when multiple roles prefer different memory locations or data layouts.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemoryDesc_create

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemoryDesc_create(
  ANeuralNetworksMemoryDesc **desc
)

Create a ANeuralNetworksMemoryDesc with no properties.

This only creates the memory descriptor. Its properties should be set with calls to ANeuralNetworksMemoryDesc_addInputRole, ANeuralNetworksMemoryDesc_addOutputRole, and ANeuralNetworksMemoryDesc_setDimensions.

ANeuralNetworksMemoryDesc_finish must be called once all properties have been set.

ANeuralNetworksMemoryDesc_free must be called once the memory descriptor is no longer needed.

Available since NNAPI feature level 4.

Details
Parameters
desc
The ANeuralNetworksMemoryDesc to be created. Set to NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemoryDesc_finish

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemoryDesc_finish(
  ANeuralNetworksMemoryDesc *desc
)

Indicate that we have finished modifying a memory descriptor.

Required before calling ANeuralNetworksMemory_createFromDesc.

This function must only be called once for a given memory descriptor.

See ANeuralNetworksMemoryDesc for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor to be finished.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemoryDesc_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksMemoryDesc_free(
  ANeuralNetworksMemoryDesc *desc
)

Destroy a memory descriptor.

The memory descriptor need not have been finished by a call to ANeuralNetworksMemoryDesc_finish.

See ANeuralNetworksMemoryDesc for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksMemoryDesc_setDimensions

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemoryDesc_setDimensions(
  ANeuralNetworksMemoryDesc *desc,
  uint32_t rank,
  const uint32_t *dimensions
)

Set the dimensional information of the memory descriptor.

The specified dimensions must be compatible with the dimensions of the corresponding model operands of all the roles specified by ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole. Two dimensions are incompatible if both ranks are fully specified but have different values, or if there is at least one axis that is fully specified in both but has different values.

Attempting to modify a memory descriptor once ANeuralNetworksMemoryDesc_finish has been called will return an error.

See ANeuralNetworksMemoryDesc for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor to be modified.
rank
The number of dimensions. Must be 0 for scalars.
dimensions
An array of dimensions. An entry with the value 0 indicates that the corresponding axis has an unknown size.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemory_copy

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemory_copy(
  const ANeuralNetworksMemory *src,
  const ANeuralNetworksMemory *dst
)

Copies data from one memory object to another.

If at most one of the src and dst is created from ANeuralNetworksMemory_createFromDesc, the src and dst must have the same logical size:

If both src and dst are created from ANeuralNetworksMemory_createFromDesc, they must have compatible dimensions. Two dimensions are incompatible if both ranks are fully specified but have different values, or if there is at least one axis that is fully specified in both but has different values. The dst may have unspecified dimensions or rank. In such a case, the dimensions of dst will get updated according to the dimensions of the src.

In both cases, if the src is created from ANeuralNetworksMemory_createFromDesc, it must have been used as an output in a successful execution, or used as the destination memory in a successful ANeuralNetworksMemory_copy.

The src and dst may have different data layout, in which case the data copying is performed logically with data layout transformation.

Available since NNAPI feature level 4.

Details
Parameters
src
The source memory object.
dst
The destination memory object.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksMemory_createFromAHardwareBuffer

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemory_createFromAHardwareBuffer(
  const AHardwareBuffer *ahwb,
  ANeuralNetworksMemory **memory
)

Creates a shared memory object from an AHardwareBuffer handle.

If the shared memory is backed by an AHardwareBuffer of AHARDWAREBUFFER_FORMAT_BLOB format, it can be used the same way as shared memory created from a file handle. See ANeuralNetworksMemory for a description on how to use this shared memory.

If the shared memory is backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for model inputs and outputs. When calling ANeuralNetworksExecution_setInputFromMemory or ANeuralNetworksExecution_setOutputFromMemory with the shared memory, both offset and length must be set to zero and the entire memory region will be associated with the specified input or output operand. There is no guarantee that an arbitrary AHardwareBuffer_Format and AHardwareBuffer_UsageFlags combination can be used by arbitrary devices. The execution will fail if the selected set of devices cannot consume the buffer.

Calling ANeuralNetworksModel_setOperandValueFromMemory with shared memory backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB is disallowed.

The provided AHardwareBuffer must outlive the ANeuralNetworksMemory object.

Available since NNAPI feature level 3.

See also: AHardwareBuffer

Details
Parameters
ahwb
The AHardwareBuffer handle.
memory
The memory object to be created. Set to NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if the request completed normally.

ANeuralNetworksMemory_createFromDesc

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemory_createFromDesc(
  const ANeuralNetworksMemoryDesc *desc,
  ANeuralNetworksMemory **memory
)

Creates a memory object from a memory descriptor.

The memory object is created with an uninitialized buffer. A memory object with an uninitialized buffer may only be used according to the roles specified by ANeuralNetworksMemoryDesc_addOutputRole, or as the destination memory in ANeuralNetworksMemory_copy. The buffer of a memory object is initialized after the memory object is used as an output in a successful execution, or used as the destination memory in a successful ANeuralNetworksMemory_copy. A memory object with an initialized buffer may be used according to all roles specified in ANeuralNetworksMemoryDesc, or as the source or destination memory in ANeuralNetworksMemory_copy. The buffer of a memory object will return to the uninitialized state if the memory object is used as an output in a failed execution, or used as the destination memory in a failed ANeuralNetworksMemory_copy.

The dimensions of the memory descriptor are deduced from the dimensions of the corresponding model operands of all the roles specified by ANeuralNetworksMemoryDesc_addInputRole and ANeuralNetworksMemoryDesc_addOutputRole, as well as the dimensions set by the call to ANeuralNetworksMemoryDesc_setDimensions, if any. The memory descriptor may have unspecified dimensions or rank. In such a case, the same memory object may be used with different shapes of outputs in different executions. When the memory is used as an input, the input shape must be the same as the output shape from the last execution using this memory object as an output, or the last ANeuralNetworksMemory_copy using this memory object as the destination memory. Creating a memory object with unspecified dimensions or rank may fail for certain sets of roles.

Using the memory in roles or shapes that are not compatible with the rules specified above will return an error.

When calling ANeuralNetworksExecution_setInputFromMemory or ANeuralNetworksExecution_setOutputFromMemory with the memory object, both offset and length must be set to zero and the entire memory region will be associated with the specified input or output operand.

Calling ANeuralNetworksModel_setOperandValueFromMemory with the memory created from this function will return an error.

ANeuralNetworksMemory_free must be called once the memory is no longer needed.

Attempting to create memory from an unfinished memory descriptor will return an error.

The provided ANeuralNetworksMemoryDesc need not outlive the ANeuralNetworksMemory object.

Available since NNAPI feature level 4.

Details
Parameters
desc
The memory descriptor.
memory
The memory object to be created. Set to NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful; ANEURALNETWORKS_OP_FAILED if the memory is created with unspecified dimensions or rank and it is not supported for this set of roles.

ANeuralNetworksMemory_createFromFd

Declared in android/NeuralNetworks.h
int ANeuralNetworksMemory_createFromFd(
  size_t size,
  int protect,
  int fd,
  size_t offset,
  ANeuralNetworksMemory **memory
)

Creates a shared memory object from a file descriptor.

The shared memory is backed by a file descriptor via mmap. See ANeuralNetworksMemory for a description on how to use this shared memory.

Available since NNAPI feature level 1.

Details
Parameters
size
The requested size in bytes. Must not be larger than the file size.
protect
The desired memory protection for the mapping. It is either PROT_NONE or the bitwise OR of one or more of the following flags: PROT_READ, PROT_WRITE.
fd
The requested file descriptor. The file descriptor has to be mmap-able. The file descriptor will be duplicated.
offset
The offset to the beginning of the file of the area to map. The offset has to be aligned to a page size.
memory
The memory object to be created. Set to NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if the request completed normally.

ANeuralNetworksMemory_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksMemory_free(
  ANeuralNetworksMemory *memory
)

Delete a memory object.

Destroys the object used by the run time to keep track of the memory. This will free the underlying actual memory if no other code has open handles to this memory.

Available since NNAPI feature level 1.

Details
Parameters
memory
The memory object to be freed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksModel_addOperand

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_addOperand(
  ANeuralNetworksModel *model,
  const ANeuralNetworksOperandType *type
)

Add an operand to a model.

The order in which the operands are added is important. The first one added to a model will have the index value 0, the second 1, etc. These indexes are used as operand identifiers in ANeuralNetworksModel_addOperation, ANeuralNetworksModel_identifyInputsAndOutputs, ANeuralNetworksModel_setOperandValue, ANeuralNetworksModel_setOperandValueFromMemory, ANeuralNetworksExecution_setInput, ANeuralNetworksExecution_setInputFromMemory, ANeuralNetworksExecution_setOutput, and ANeuralNetworksExecution_setOutputFromMemory.

Every operand must be referenced in exactly one of the following ways:

An operand that is identified as a model input or as a constant must not also be identified as a model output with ANeuralNetworksModel_identifyInputsAndOutputs.

To build a model that can accommodate inputs of various sizes, as you may want to do for a CNN, leave unspecified the dimensions that will vary at run time. If you do so, fully specify dimensions when calling ANeuralNetworksExecution_setInput or ANeuralNetworksExecution_setInputFromMemory.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The model to be modified.
type
The ANeuralNetworksOperandType that describes the shape of the operand. Neither the ANeuralNetworksOperandType nor the dimensions it points to need to outlive the call to ANeuralNetworksModel_addOperand.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_addOperation

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_addOperation(
  ANeuralNetworksModel *model,
  ANeuralNetworksOperationType type,
  uint32_t inputCount,
  const uint32_t *inputs,
  uint32_t outputCount,
  const uint32_t *outputs
)

Add an operation to a model.

The operands specified by inputs and outputs must have been previously added by calls to ANeuralNetworksModel_addOperand.

Details
Parameters
model
The model to be modified.
type
The ANeuralNetworksOperationType of the operation.
inputCount
The number of entries in the inputs array.
inputs
An array of indexes identifying each operand.
outputCount
The number of entries in the outputs array.
outputs
An array of indexes identifying each operand.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_create

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_create(
  ANeuralNetworksModel **model
)

Create an empty ANeuralNetworksModel.

This only creates the object. Computation is performed once ANeuralNetworksExecution_burstCompute, ANeuralNetworksExecution_compute, ANeuralNetworksExecution_startCompute or ANeuralNetworksExecution_startComputeWithDependencies is invoked.

The model should be constructed with calls to ANeuralNetworksModel_addOperation and ANeuralNetworksModel_addOperand

ANeuralNetworksModel_finish should be called once the model has been fully constructed.

ANeuralNetworksModel_free should be called once the model is no longer needed.

Available since NNAPI feature level 1.

Details
Parameters
model
The ANeuralNetworksModel to be created. Set to NULL if unsuccessful.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_finish

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_finish(
  ANeuralNetworksModel *model
)

Indicate that we have finished modifying a model.

Required before calling ANeuralNetworksCompilation_create and ANeuralNetworksCompilation_createForDevices.

An application must ensure that no other thread uses the model at the same time.

This function must only be called once for a given model.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The model to be finished.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_free

Declared in android/NeuralNetworks.h
void ANeuralNetworksModel_free(
  ANeuralNetworksModel *model
)

Destroy a model.

The model need not have been finished by a call to ANeuralNetworksModel_finish.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The model to be destroyed. Passing NULL is acceptable and results in no operation.

ANeuralNetworksModel_getSupportedOperationsForDevices

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_getSupportedOperationsForDevices(
  const ANeuralNetworksModel *model,
  const ANeuralNetworksDevice *const *devices,
  uint32_t numDevices,
  bool *supportedOps
)

Get the supported operations for a specified set of devices.

If multiple devices are selected, the supported operation list is a union of supported operations of all selected devices.

Available since NNAPI feature level 3.

Details
Parameters
model
The model to be queried.
devices
The set of devices. Must not contain duplicates.
numDevices
The number of devices in the set.
supportedOps
The boolean array to be filled. True means supported. The size of the boolean array must be at least as large as the number of operations in the model. The order of elements in the supportedOps array matches the order in which the corresponding operations were added to the model.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_identifyInputsAndOutputs

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_identifyInputsAndOutputs(
  ANeuralNetworksModel *model,
  uint32_t inputCount,
  const uint32_t *inputs,
  uint32_t outputCount,
  const uint32_t *outputs
)

Specifies which operands will be the model's inputs and outputs.

Every model must have at least one input and one output.

An operand cannot be used for both input and output. Doing so will return an error.

The operands specified by inputs and outputs must have been previously added by calls to ANeuralNetworksModel_addOperand.

Details
Parameters
model
The model to be modified.
inputCount
The number of entries in the inputs array.
inputs
An array of indexes identifying the input operands.
outputCount
The number of entries in the outputs array.
outputs
An array of indexes identifying the output operands.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

ANeuralNetworksModel_relaxComputationFloat32toFloat16

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
  ANeuralNetworksModel *model,
  bool allow
)

Specifies whether ANEURALNETWORKS_TENSOR_FLOAT32 is allowed to be calculated with range and/or precision as low as that of the IEEE 754 16-bit floating-point format.

By default, ANEURALNETWORKS_TENSOR_FLOAT32 must be calculated using at least the range and precision of the IEEE 754 32-bit floating-point format.

The relaxComputationFloat32toFloat16 setting of the main model of a compilation overrides the values of the referenced models.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

Details
Parameters
model
The model to be modified.
allow
'true' indicates ANEURALNETWORKS_TENSOR_FLOAT32 may be calculated with range and/or precision as low as that of the IEEE 754 16-bit floating point format. 'false' indicates ANEURALNETWORKS_TENSOR_FLOAT32 must be calculated using at least the range and precision of the IEEE 754 32-bit floating point format.

Available since NNAPI feature level 2.

See ANeuralNetworksModel for information on multithreaded usage.

ANeuralNetworksModel_setOperandSymmPerChannelQuantParams

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
  ANeuralNetworksModel *model,
  int32_t index,
  const ANeuralNetworksSymmPerChannelQuantParams *channelQuant
)

Sets an operand's per channel quantization parameters.

Sets parameters required by a tensor of type ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL. This function must be called for every tensor of type ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL before calling ANeuralNetworksModel_finish.

Available since NNAPI feature level 3.

Details
Parameters
model
The model to be modified.
index
The index of the model operand we're setting.
channelQuant
The per channel quantization parameters for the operand. No memory in this struct needs to outlive the call to this function.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_setOperandValue

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_setOperandValue(
  ANeuralNetworksModel *model,
  int32_t index,
  const void *buffer,
  size_t length
)

Sets an operand to a constant value.

Values of length smaller or equal to ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES are immediately copied into the model.

For values of length greater than ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES, a pointer to the buffer is stored within the model. The application must not change the content of this region until all executions using this model have completed. As the data may be copied during processing, modifying the data after this call yields undefined results. The provided buffer must outlive this model.

For large tensors, using ANeuralNetworksModel_setOperandValueFromMemory is likely to be more efficient.

To indicate that an optional operand should be considered missing, pass nullptr for buffer and 0 for length.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The model to be modified.
index
The index of the model operand we're setting.
buffer
A pointer to the data to use.
length
The size in bytes of the data value.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_setOperandValueFromMemory

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_setOperandValueFromMemory(
  ANeuralNetworksModel *model,
  int32_t index,
  const ANeuralNetworksMemory *memory,
  size_t offset,
  size_t length
)

Sets an operand to a value stored in a memory object.

The content of the memory is not copied. A reference to that memory is stored inside the model. The application must not change the content of the memory region until all executions using this model have completed. As the data may be copied during processing, modifying the data after this call yields undefined results.

The provided memory must outlive this model.

To indicate that an optional operand should be considered missing, use ANeuralNetworksModel_setOperandValue instead, passing nullptr for buffer.

It is disallowed to set an operand value with shared memory backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB.

It is disallowed to set an operand value with memory created from ANeuralNetworksMemory_createFromDesc.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage. See ANeuralNetworksMemory_createFromAHardwareBuffer for information on AHardwareBuffer usage.

Available since NNAPI feature level 1.

Details
Parameters
model
The model to be modified.
index
The index of the model operand we're setting.
memory
The memory containing the data.
offset
This specifies the location of the data within the memory. The offset is in bytes from the start of memory.
length
The size in bytes of the data value.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworksModel_setOperandValueFromModel

Declared in android/NeuralNetworks.h
int ANeuralNetworksModel_setOperandValueFromModel(
  ANeuralNetworksModel *model,
  int32_t index,
  const ANeuralNetworksModel *value
)

Sets an operand to a value that is a reference to another NNAPI model.

The referenced model must already have been finished by a call to ANeuralNetworksModel_finish.

The ANeuralNetworksModel_relaxComputationFloat32toFloat16 setting of referenced models is overridden by that setting of the main model of a compilation.

The referenced model must outlive the model referring to it.

Attempting to modify a model once ANeuralNetworksModel_finish has been called will return an error.

See ANeuralNetworksModel for information on multithreaded usage.

Available since NNAPI feature level 4.

Details
Parameters
model
The model to be modified.
index
The index of the model operand we're setting.
value
The model to be referenced.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworks_getDefaultLoopTimeout

Declared in android/NeuralNetworks.h
uint64_t ANeuralNetworks_getDefaultLoopTimeout()

Get the default timeout value for WHILE loops.

Available since NNAPI feature level 4.

Details
Returns
The default timeout value in nanoseconds.

ANeuralNetworks_getDevice

Declared in android/NeuralNetworks.h
int ANeuralNetworks_getDevice(
  uint32_t devIndex,
  ANeuralNetworksDevice **device
)

Get the representation of the specified device.

Available since NNAPI feature level 3.

Details
Parameters
devIndex
The index of the specified device. Must be less than the number of available devices.
device
The representation of the specified device. The same representation will always be returned for the specified device.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworks_getDeviceCount

Declared in android/NeuralNetworks.h
int ANeuralNetworks_getDeviceCount(
  uint32_t *numDevices
)

Get the number of available devices.

Available since NNAPI feature level 3.

Details
Parameters
numDevices
Used to return the number of devices.
Returns
ANEURALNETWORKS_NO_ERROR if successful.

ANeuralNetworks_getMaximumLoopTimeout

Declared in android/NeuralNetworks.h
uint64_t ANeuralNetworks_getMaximumLoopTimeout()

Get the maximum timeout value for WHILE loops.

Available since NNAPI feature level 4.

Details
Returns
The maximum timeout value in nanoseconds.

ANeuralNetworks_getRuntimeFeatureLevel

Declared in android/NeuralNetworks.h
int64_t ANeuralNetworks_getRuntimeFeatureLevel()

Get the NNAPI runtime feature level.

Since API level 31 (NNAPI feature level 5), the NNAPI runtime (libneuralnetworks.so) and its API specification can be updated between Android API releases.

On Android devices with API level 31 and newer, for NNAPI runtime feature discovery, the NNAPI runtime feature level must be used instead of the Android device API level.

On Android devices with API level 30 and older, the Android API level of the Android device must be used for NNAPI runtime feature discovery. Enum values in FeatureLevelCode from feature level 1 to 5 have their corresponding Android API levels listed in their documentation, and each such enum value equals the corresponding API level. This allows using the Android API level as the feature level. This mapping between enum value and Android API level does not exist for feature levels after NNAPI feature level 5 and API levels after S (31).

Example usage: int device_api_level = android_get_device_api_level(); int64_t runtime_feature_level = (device_api_level < ANDROID_API_S) ? device_api_level : ANeuralNetworks_getRuntimeFeatureLevel();

Runtime feature level is closely related to NNAPI device feature level (ANeuralNetworksDevice_getFeatureLevel), which indicates an NNAPI device feature level (the most advanced NNAPI specification and features that the driver implements). This function expresses NNAPI runtime feature level, which indicates the most advanced NNAPI specification and features the runtime implements. An NNAPI device feature level is always less than or equal to the runtime feature level.

This function returns a FeatureLevelCode enum value, which is the NNAPI specification version that this NNAPI runtime implements. It is NOT an Android API level.

Available since NNAPI feature level 5.