From 2ba45a03049e8a47a5774c0c8acd4349e0ef22c1 Mon Sep 17 00:00:00 2001 From: ddavis-2015 Date: Tue, 16 Jul 2024 14:01:48 -0700 Subject: [PATCH] Draft PR: testing --- tensorflow/lite/micro/compression.h | 70 ++ tensorflow/lite/micro/compression/BUILD | 94 ++ tensorflow/lite/micro/compression/compress.py | 244 +++++ .../lite/micro/compression/metadata.fbs | 38 + .../micro/compression/metadata_generated.h | 148 +++ .../lite/micro/compression/metadata_test.cc | 71 ++ .../lite/micro/compression/metadata_test.py | 67 ++ .../lite/micro/compression/original.fbs | 82 ++ .../lite/micro/compression/original_test.py | 76 ++ tensorflow/lite/micro/compression/view.py | 155 ++++ .../micro_speech/micro_speech_test.cc | 7 - tensorflow/lite/micro/fake_micro_context.cc | 82 +- tensorflow/lite/micro/fake_micro_context.h | 36 +- tensorflow/lite/micro/kernels/conv.cc | 51 +- tensorflow/lite/micro/kernels/conv.h | 10 +- tensorflow/lite/micro/kernels/conv_common.cc | 19 +- tensorflow/lite/micro/kernels/conv_test.cc | 349 +++++++- tensorflow/lite/micro/kernels/conv_test.h | 276 +++++- .../lite/micro/kernels/conv_test_common.cc | 104 +-- .../lite/micro/kernels/fully_connected.cc | 62 +- .../lite/micro/kernels/fully_connected.h | 10 +- .../micro/kernels/fully_connected_test.cc | 336 ++++++- .../lite/micro/kernels/kernel_runner.cc | 16 +- tensorflow/lite/micro/kernels/kernel_runner.h | 9 +- tensorflow/lite/micro/kernels/kernel_util.h | 27 +- .../lite/micro/kernels/transpose_conv.cc | 75 +- .../lite/micro/kernels/transpose_conv_test.cc | 840 +++++++++++++++--- tensorflow/lite/micro/micro_allocator.cc | 250 +++++- tensorflow/lite/micro/micro_allocator.h | 20 +- tensorflow/lite/micro/micro_context.cc | 135 ++- tensorflow/lite/micro/micro_context.h | 32 +- .../lite/micro/micro_interpreter_context.cc | 102 ++- .../lite/micro/micro_interpreter_context.h | 27 +- .../lite/micro/micro_interpreter_test.cc | 55 +- .../lite/micro/recording_micro_allocator.cc | 30 +- .../lite/micro/recording_micro_allocator.h | 17 +- .../micro/recording_micro_allocator_test.cc | 66 +- .../lite/micro/test_helper_custom_ops.cc | 189 +++- .../lite/micro/test_helper_custom_ops.h | 19 +- tensorflow/lite/micro/test_helpers.cc | 173 +++- tensorflow/lite/micro/test_helpers.h | 110 ++- tensorflow/lite/micro/testing/micro_test.h | 9 +- .../lite/micro/tools/benchmarking/metrics.cc | 30 +- .../micro/tools/ci_build/test_x86_default.sh | 6 + tensorflow/lite/micro/tools/make/Makefile | 14 + 45 files changed, 4261 insertions(+), 377 deletions(-) create mode 100644 tensorflow/lite/micro/compression.h create mode 100644 tensorflow/lite/micro/compression/BUILD create mode 100644 tensorflow/lite/micro/compression/compress.py create mode 100644 tensorflow/lite/micro/compression/metadata.fbs create mode 100644 tensorflow/lite/micro/compression/metadata_generated.h create mode 100644 tensorflow/lite/micro/compression/metadata_test.cc create mode 100644 tensorflow/lite/micro/compression/metadata_test.py create mode 100644 tensorflow/lite/micro/compression/original.fbs create mode 100644 tensorflow/lite/micro/compression/original_test.py create mode 100644 tensorflow/lite/micro/compression/view.py diff --git a/tensorflow/lite/micro/compression.h b/tensorflow/lite/micro/compression.h new file mode 100644 index 00000000000..d6a2b27b091 --- /dev/null +++ b/tensorflow/lite/micro/compression.h @@ -0,0 +1,70 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_ + +#ifdef USE_TFLM_COMPRESSION + +#include "tensorflow/lite/c/common.h" + +namespace tflite { + +// +// Compressed tensors +// + +static constexpr const char* kCompressionMetadataString = "TFLM_COMPRESSION"; + +enum class CompressionScheme : uint8_t { + kBinQuant, +}; + +// TODO(ddavis-2015): pack struct +struct BinQuantData { + static constexpr size_t kMaxBitWidth = 7; + static constexpr size_t kMaxValueTableChannelStride = 128; + + const void* value_table; // Pointer into FlatBuffer Values. + uint8_t value_table_channel_stride; // elements per channel + uint8_t compressed_bit_width : 3; // 1 to 7 bits + bool is_per_channel_quantized : 1; // tensor is per-channel quantized + bool use_alternate_axis : 1; // shape default channel: + // 0 = first, 1 = last + uint8_t reserved : 3; +}; + +union CompressionData { + BinQuantData bin_quant; +}; + +// TODO(ddavis-2015): pack struct +struct CompressionTensorData { + CompressionScheme scheme; + CompressionData data; +}; + +// TODO(ddavis-2015): pack struct +struct CompressedTensorList { + // Sparsely populated array with the same number of elements as there are + // tensors in the Subgraph. An alternative would include a tensor index in + // the struct for each and walk the list on look up. This could be slow. + CompressionTensorData** tensors; +}; + +} // namespace tflite + +#endif // USE_TFLM_COMPRESSION +#endif // TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_ diff --git a/tensorflow/lite/micro/compression/BUILD b/tensorflow/lite/micro/compression/BUILD new file mode 100644 index 00000000000..cde1b55bb15 --- /dev/null +++ b/tensorflow/lite/micro/compression/BUILD @@ -0,0 +1,94 @@ +load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_py_library") +load("@rules_python//python:defs.bzl", "py_test") +load("@tflm_pip_deps//:requirements.bzl", "requirement") + +package( + default_visibility = [ + "//visibility:public", + ], +) + +flatbuffer_cc_library( + name = "metadata_flatbuffer_cc", + srcs = ["metadata.fbs"], +) + +flatbuffer_py_library( + name = "original_flatbuffer_py", + srcs = ["original.fbs"], +) + +flatbuffer_py_library( + name = "metadata_flatbuffer_py", + srcs = ["metadata.fbs"], +) + +cc_test( + name = "metadata_test_cc", + srcs = ["metadata_test.cc"], + deps = [ + "metadata_flatbuffer_cc", + "//tensorflow/lite/micro:hexdump", + "@flatbuffers//:runtime_cc", + ], + size = "small", +) + +py_binary( + name = "compress", + srcs = ["compress.py"], + deps = [ + "@absl_py//absl:app", + "@absl_py//absl/flags", + "@absl_py//absl/logging", + "@flatbuffers//:runtime_py", + "metadata_flatbuffer_py", + "//tensorflow/lite/python:schema_py", + requirement("bitarray"), + requirement("numpy"), + requirement("scikit-learn"), + ], +) + +py_binary( + name = "view", + srcs = [ + "view.py", + ], + deps = [ + "metadata_flatbuffer_py", + "//tensorflow/lite/python:schema_py", + ], +) + +py_test( + name = "metadata_test_py", + main = "metadata_test.py", + srcs = ["metadata_test.py"], + deps = [ + "metadata_flatbuffer_py", + "@flatbuffers//:runtime_py", + requirement("hexdump"), + ], + size = "small", +) + +py_test( + name = "original_test_py", + main = "original_test.py", + srcs = ["original_test.py"], + deps = [ + "original_flatbuffer_py", + "@flatbuffers//:runtime_py", + requirement("hexdump"), + ], + size = "small", +) + +genrule( + name = "hello_world_int8.compressed", + srcs = ["//tensorflow/lite/micro/examples/hello_world/models:hello_world_int8.tflite"], + outs = ["hello_world_int8.compressed.tflite"], + cmd = "$(location :compress) --input_model_path $< --output_model_path $@", + tools = [":compress"], +) diff --git a/tensorflow/lite/micro/compression/compress.py b/tensorflow/lite/micro/compression/compress.py new file mode 100644 index 00000000000..18834982f24 --- /dev/null +++ b/tensorflow/lite/micro/compression/compress.py @@ -0,0 +1,244 @@ +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reduces the number of weights in a .tflite model using various strategies.""" + +# Usage information: +# Default: +# `bazel run tensorflow/lite/micro/tools:compress -- \ +# --input_model_path=` \ +# --output_model_path=` + + +from tensorflow.lite.micro.compression import metadata_flatbuffer_py_generated as compression_schema +from tensorflow.lite.python import schema_py_generated as tflite_schema + +from absl import app +from absl import flags +from absl import logging +import bitarray +import bitarray.util +import numpy as np +import flatbuffers +import sklearn.cluster +import struct + + +_INPUT_MODEL_PATH = flags.DEFINE_string( + "input_model_path", + None, + ".tflite input model path", + required=True, +) + +_TEST_COMPRESSED_MODEL = flags.DEFINE_bool( + "test_compressed_model", + False, + "optional config to test models with random data and" + " report on the differences in output.", +) + +_OUTPUT_MODEL_PATH = flags.DEFINE_string( + "output_model_path", + None, + ".tflite output path. Leave blank if same as input+.compressed.tflite", +) + + +def read_model(path): + with open(path, 'rb') as file: + buffer = bytearray(file.read()) + return tflite_schema.ModelT.InitFromPackedBuf(buffer, 0) + + +def write_model(model, path): + builder = flatbuffers.Builder(32) + root = model.Pack(builder) + builder.Finish(root) + buffer: bytearray = builder.Output() + + with open(path, 'wb') as file: + file.write(buffer) + + +def pack_compression_metadata(m): + builder = flatbuffers.Builder(32) + root = m.Pack(builder) + builder.Finish(root) + buffer: bytearray = builder.Output() + return buffer + + +def pack_lut_indexes(indexes, bitwidth): + """Pack the sequence of integers given in `indexes` into bitwidth-wide fields + in a buffer, and return the buffer. Raise an OverflowError if any element + does not fit into a bitwidth-wide field. """ + ba = bitarray.bitarray(endian="big") + for i in indexes: + field = bitarray.util.int2ba(i, length=bitwidth, endian="big") + ba.extend(field) + return ba.tobytes() + + +def pack_lut_values(values, struct_format): + """Pack the `values` into a buffer of bytes, using a `struct_format` + character from the standard module `struct` to determine the type of values + and corresponding encoding into bytes. Always little-endian byte order. + """ + buffer = bytearray() + little_endian = "<" + packer = struct.Struct(little_endian + struct_format) + for v in values: + buffer.extend(packer.pack(v)) + return buffer + + +def unpack_buffer_values(data, struct_format): + little_endian = "<" + unpacker = struct.Struct(little_endian + struct_format) + values = [v[0] for v in unpacker.iter_unpack(bytes(data))] + return values + + +def tensor_type_to_struct_format(type): + m = { + tflite_schema.TensorType.INT8: "b", + tflite_schema.TensorType.INT16: "h", + tflite_schema.TensorType.FLOAT32: "f", + } + return m[type] + + +def bq(sequence, num_values): + """Quantize a sequence of integers, minimizing the total error using k-means + clustering. + + Parameters: + sequence :list - a sequence of integers to be quanized + num_values :int - the number of quantization levels + + Returns: + (indexes, values): a tuple with the list of indexes and list of values + """ + sequence = np.array(sequence).reshape(-1, 1) + kmeans = sklearn.cluster.KMeans(n_clusters=num_values, + random_state=0).fit(sequence) + values = kmeans.cluster_centers_.flatten() + values = np.round(values).astype(int).tolist() + indexes = kmeans.predict(sequence).tolist() + return (indexes, values) + + +def compress_tensor(subgraph_id, tensor_id, model): + subgraph = model.subgraphs[subgraph_id] + tensor = subgraph.tensors[tensor_id] + struct_format = tensor_type_to_struct_format(tensor.type) + buffer_id = tensor.buffer + buffer = model.buffers[buffer_id] + sequence = unpack_buffer_values(buffer.data, struct_format) + bitwidth = 2 + indexes, values = bq(sequence, 2 ** bitwidth) + + # append index buffer + buffer = tflite_schema.BufferT() + buffer.data = pack_lut_indexes(indexes, bitwidth) + model.buffers.append(buffer) + index_id = len(model.buffers) - 1 + + # append value buffer + buffer = tflite_schema.BufferT() + buffer.data = pack_lut_values(values, struct_format) + model.buffers.append(buffer) + value_id = len(model.buffers) - 1 + + # create metadata + lut_tensor = compression_schema.LutTensorT() + lut_tensor.subgraph = subgraph_id + lut_tensor.tensor = tensor_id + lut_tensor.indexBitwidth = bitwidth + lut_tensor.indexBuffer = index_id + lut_tensor.valueBuffer = value_id + + return lut_tensor + + +def compress_fully_connected(subgraph_id, operator_id, model): + # On a fully_connected operator, we compress the 2nd + subgraph = model.subgraphs[subgraph_id] + operator = subgraph.operators[operator_id] + tensor_id_2 = operator.inputs[1] + # tensor_id_3 = operator.inputs[2] + lut_tensor_2 = compress_tensor(subgraph_id, tensor_id_2, model) + # lut_tensor_3 = compress_tensor(subgraph_id, tensor_id_2, model) + return (lut_tensor_2,) + + +def get_opcode_compressions(model): + """Return a map of operator_code indexes to compression functions, for those + operators we wish to and know how to compress. + """ + compressable = {tflite_schema.BuiltinOperator.FULLY_CONNECTED: compress_fully_connected} + compressions = {} + for index, code in enumerate(model.operatorCodes): + if code.builtinCode in compressable: + compressions[index] = compressable[code.builtinCode] + return compressions + + +def compress(model): + # Walk op codes, identify those we compress, note index + # Walk operators, match op code indexes, note tensors to compress + # Walk those tensors, creating LUTs in buffers and metadata + + compressions = get_opcode_compressions(model) + + lut_tensors = [] + + for subgraph_id, subgraph in enumerate(model.subgraphs): + for operator_id, operator in enumerate(subgraph.operators): + fn = compressions.get(operator.opcodeIndex) + if fn is not None: + result = fn(subgraph_id, operator_id, model) + if result is not None: + lut_tensors.extend(result) + + compression_metadata = compression_schema.MetadataT() + compression_metadata.lutTensors = lut_tensors + + return compression_metadata + + +def main(_) -> None: + output_model_path = _OUTPUT_MODEL_PATH.value or ( + _INPUT_MODEL_PATH.value.split(".tflite")[0] + ".compressed.tflite") + logging.info("compressing %s to %s", _INPUT_MODEL_PATH.value, output_model_path) + + model = read_model(_INPUT_MODEL_PATH.value) + + compression_metadata = compress(model) + + buffer = tflite_schema.BufferT() + buffer.data = pack_compression_metadata(compression_metadata) + model.buffers.append(buffer) + + metadata = tflite_schema.MetadataT() + metadata.name = "COMPRESSION_METADATA" + metadata.buffer = len(model.buffers) - 1 + model.metadata.append(metadata) + + write_model(model, output_model_path) + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow/lite/micro/compression/metadata.fbs b/tensorflow/lite/micro/compression/metadata.fbs new file mode 100644 index 00000000000..dcfb1ccafb9 --- /dev/null +++ b/tensorflow/lite/micro/compression/metadata.fbs @@ -0,0 +1,38 @@ +// Copyright 2024 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Flatbuffer schema describing a TFLM compressed model. Use as the value for +// the key "TFLM_COMPRESSION" in the metadata table in a .tflite flatbuffer. + +namespace tflite.micro.compression; + +table Metadata { + lut_tensors:[LutTensor]; // list of tensors that are compressed by LUT +} + +struct LutTensor { + subgraph:uint16; // the index of the subgraph + tensor:uint16; // the index of the tensor in its subgraph + index_bitwidth:uint8; // the bit-width of LUT indexes + index_buffer:uint16; // the index of the buffer containing LUT indexes + value_buffer:uint16; // the index of the buffer containing LUT values +} +// Look-Up-Table tensors are encoded in two buffers: an index buffer and a +// value buffer. The indexes are unsigned integers packed into the index buffer +// in bitwidth-wide bit fields with a big-endian bit order. The data in the +// value buffer is encoded as usual according to the type of the tensor. +// Tensors with multiple channels have distinct values tables for each channel, +// concatinated into one value buffer. (Will elaborate this comment.) + +root_type Metadata; diff --git a/tensorflow/lite/micro/compression/metadata_generated.h b/tensorflow/lite/micro/compression/metadata_generated.h new file mode 100644 index 00000000000..eaa03cb21e8 --- /dev/null +++ b/tensorflow/lite/micro/compression/metadata_generated.h @@ -0,0 +1,148 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_ +#define FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace tflite { +namespace micro { +namespace compression { + +struct Metadata; +struct MetadataBuilder; + +struct LutTensor; + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(2) LutTensor FLATBUFFERS_FINAL_CLASS { + private: + uint16_t subgraph_; + uint16_t tensor_; + uint8_t index_bitwidth_; + int8_t padding0__; + uint16_t index_buffer_; + uint16_t value_buffer_; + + public: + LutTensor() + : subgraph_(0), + tensor_(0), + index_bitwidth_(0), + padding0__(0), + index_buffer_(0), + value_buffer_(0) { + (void)padding0__; + } + LutTensor(uint16_t _subgraph, uint16_t _tensor, uint8_t _index_bitwidth, uint16_t _index_buffer, uint16_t _value_buffer) + : subgraph_(flatbuffers::EndianScalar(_subgraph)), + tensor_(flatbuffers::EndianScalar(_tensor)), + index_bitwidth_(flatbuffers::EndianScalar(_index_bitwidth)), + padding0__(0), + index_buffer_(flatbuffers::EndianScalar(_index_buffer)), + value_buffer_(flatbuffers::EndianScalar(_value_buffer)) { + } + uint16_t subgraph() const { + return flatbuffers::EndianScalar(subgraph_); + } + uint16_t tensor() const { + return flatbuffers::EndianScalar(tensor_); + } + uint8_t index_bitwidth() const { + return flatbuffers::EndianScalar(index_bitwidth_); + } + uint16_t index_buffer() const { + return flatbuffers::EndianScalar(index_buffer_); + } + uint16_t value_buffer() const { + return flatbuffers::EndianScalar(value_buffer_); + } +}; +FLATBUFFERS_STRUCT_END(LutTensor, 10); + +struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_LUT_TENSORS = 4 + }; + const flatbuffers::Vector *lut_tensors() const { + return GetPointer *>(VT_LUT_TENSORS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_LUT_TENSORS) && + verifier.VerifyVector(lut_tensors()) && + verifier.EndTable(); + } +}; + +struct MetadataBuilder { + typedef Metadata Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_lut_tensors(flatbuffers::Offset> lut_tensors) { + fbb_.AddOffset(Metadata::VT_LUT_TENSORS, lut_tensors); + } + explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMetadata( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> lut_tensors = 0) { + MetadataBuilder builder_(_fbb); + builder_.add_lut_tensors(lut_tensors); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateMetadataDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *lut_tensors = nullptr) { + auto lut_tensors__ = lut_tensors ? _fbb.CreateVectorOfStructs(*lut_tensors) : 0; + return tflite::micro::compression::CreateMetadata( + _fbb, + lut_tensors__); +} + +inline const tflite::micro::compression::Metadata *GetMetadata(const void *buf) { + return flatbuffers::GetRoot(buf); +} + +inline const tflite::micro::compression::Metadata *GetSizePrefixedMetadata(const void *buf) { + return flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyMetadataBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedMetadataBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishMetadataBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.Finish(root); +} + +inline void FinishSizePrefixedMetadataBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root); +} + +} // namespace compression +} // namespace micro +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_METADATA_TFLITE_MICRO_COMPRESSION_H_ diff --git a/tensorflow/lite/micro/compression/metadata_test.cc b/tensorflow/lite/micro/compression/metadata_test.cc new file mode 100644 index 00000000000..74b567c7d14 --- /dev/null +++ b/tensorflow/lite/micro/compression/metadata_test.cc @@ -0,0 +1,71 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// Test validity of the flatbuffer schema and illustrate use of the flatbuffer +// machinery with C++. + +#include +#include + +#include "metadata_generated.h" +#include "tensorflow/lite/micro/hexdump.h" + +using tflite::micro::compression::LutTensor; +using tflite::micro::compression::Metadata; +using tflite::micro::compression::MetadataT; + +bool operator==(const LutTensor& a, const LutTensor& b) { + return + a.subgraph() == b.subgraph() && + a.tensor() == b.tensor() && + a.index_bitwidth() == b.index_bitwidth() && + a.index_buffer() == b.index_buffer() && + a.value_buffer() == b.value_buffer(); +} + +int main(int argc, char* argv[]) { + const LutTensor lut_tensor0 { + 0, // subgraph + 127, // tensor + 2, // index_bitwidth + 128, // index_buffer + 129, // value_buffer + }; + const LutTensor lut_tensor1 { + 1, // subgraph + 164, // tensor + 2, // index_bitwidth + 136, // index_buffer + 129, // value_buffer + }; + MetadataT metadata; + metadata.lut_tensors = {lut_tensor0, lut_tensor1}; + + flatbuffers::FlatBufferBuilder builder; + auto root = Metadata::Pack(builder, &metadata); + builder.Finish(root); + const uint8_t* buffer = builder.GetBufferPointer(); + + tflite::hexdump( + {reinterpret_cast(buffer), builder.GetSize()}); + std::cout << "length: " << builder.GetSize() << "\n"; + + auto readback = tflite::micro::compression::GetMetadata(buffer); + auto& read_lut_tensor0 = *readback->lut_tensors()->Get(0); + auto& read_lut_tensor1 = *readback->lut_tensors()->Get(1); + assert(read_lut_tensor0 == lut_tensor0); + assert(read_lut_tensor1 == lut_tensor1); + + return 0; +} diff --git a/tensorflow/lite/micro/compression/metadata_test.py b/tensorflow/lite/micro/compression/metadata_test.py new file mode 100644 index 00000000000..3d954154b8a --- /dev/null +++ b/tensorflow/lite/micro/compression/metadata_test.py @@ -0,0 +1,67 @@ +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test validity of the flatbuffer schema and illustrate use of the flatbuffer +# machinery with Python + +import sys +import hexdump +import flatbuffers + +# `.*_generated` is the name of the module created by the Bazel rule +# `flatbuffer_py_library' based on the schema. +from tensorflow.lite.micro.compression import metadata_flatbuffer_py_generated as schema + + +def main(): + # The classes with a `T` suffix provide an object-oriented representation of + # the object tree in the flatbuffer using native data structures. + lut_tensor0 = schema.LutTensorT() + lut_tensor0.subgraph = 1 + lut_tensor0.tensor = 127 + lut_tensor0.indexBitwidth = 2 + lut_tensor0.indexBuffer = 128 + lut_tensor0.valueBuffer = 129 + + lut_tensor1 = schema.LutTensorT() + lut_tensor1.subgraph = 1 + lut_tensor1.tensor = 164 + lut_tensor1.indexBitwidth = 2 + lut_tensor1.indexBuffer = 136 + lut_tensor1.valueBuffer = 129 + + metadata = schema.MetadataT() + metadata.lutTensors = [lut_tensor0, lut_tensor1] + + # Build the flatbuffer itself using the flatbuffers runtime module. + builder = flatbuffers.Builder(32) + root = metadata.Pack(builder) + builder.Finish(root) + buffer: bytearray = builder.Output() + + print(hexdump.hexdump(buffer, result='return')) + print(f"length: {len(buffer)}") + + def attrs_equal(a, b): + return all(vars(a)[key] == vars(b)[key] for key in vars(a)) + + readback = schema.MetadataT.InitFromPackedBuf(buffer, 0) + assert attrs_equal(readback.lutTensors[0], lut_tensor0) + assert attrs_equal(readback.lutTensors[1], lut_tensor1) + + sys.exit() + + +if __name__ == "__main__": + main() diff --git a/tensorflow/lite/micro/compression/original.fbs b/tensorflow/lite/micro/compression/original.fbs new file mode 100644 index 00000000000..3a05a6cd4f2 --- /dev/null +++ b/tensorflow/lite/micro/compression/original.fbs @@ -0,0 +1,82 @@ +// Copyright 2024 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace tflite.micro; + +table ValuesInt8 { + values:[int8]; +} + +table ValuesInt16 { + values:[int16]; +} + +table ValuesInt32 { + values:[int32]; +} + +table ValuesInt64 { + values:[int64]; +} + +table ValuesFloat32 { + values:[float32]; +} + +union ValuesUnion { + ValuesFloat32, + ValuesInt8, + ValuesInt16, + ValuesInt32, + ValuesInt64 +} + +table Values { + values:ValuesUnion; +} + +table BinQuantBufferOptions { + value_table_index:int; + compressed_bit_width:uint8; // Should be 2 or 4 +} + +union CompressedBufferOptions { + BinQuantBufferOptions, + // HuffmanBufferOptions, // Future +} + +table CompressedBuffer { + buffer_index:int; // Buffer index from the top-level Model buffer vector + options:CompressedBufferOptions; +} + +table BinQuantCompression { + version:uint8; + // For a given value table, if the corresponding buffer was per-tensor quantized, there should be 4 or 16 elements (2 bit or 4 bit indexes). + // If the buffer was per-channel quantized, there should be 4/16 x number of channels elements. These will be laid out in the table as: + // [c0v0, c0v1, c0v2, c0v3, c1v0, c1v1, ... cNv3] + value_tables:[Values]; +} + +table CompressionMetadata { + // List of compressed buffers + buffers:[CompressedBuffer]; + + // (Optional) Model-wide Bin & Quant compression parameters. Only needed if a + // CompressedBuffer contains BinQuantBufferOptions. + bin_quant_compression:BinQuantCompression; +} + +root_type CompressionMetadata; diff --git a/tensorflow/lite/micro/compression/original_test.py b/tensorflow/lite/micro/compression/original_test.py new file mode 100644 index 00000000000..edc8ad4d11f --- /dev/null +++ b/tensorflow/lite/micro/compression/original_test.py @@ -0,0 +1,76 @@ +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test validity of the flatbuffer schema and illustrate use of the flatbuffer +# machinery with Python + +import sys +import hexdump +import flatbuffers + +# `.*_generated` is the name of the module created by the Bazel rule +# `flatbuffer_py_library' based on the schema. +from tensorflow.lite.micro.compression import original_flatbuffer_py_generated as schema + + +def main(): + # The classes with a `T` suffix provide an object-oriented representation of + # the object tree in the flatbuffer using native data structures. + bq0_options = schema.BinQuantBufferOptionsT() + bq0_options.valueTableIndex = 0 + bq0_options.compressedBitWidth = 2 + + bq1_options = schema.BinQuantBufferOptionsT() + bq1_options.valueTableIndex = 1 + bq1_options.compressedBitBidth = 4 + + buffer0 = schema.CompressedBufferT() + buffer0.bufferIndex = 0 + buffer0.options = bq0_options + buffer0.optionsType = schema.CompressedBufferOptions.BinQuantBufferOptions + + buffer1 = schema.CompressedBufferT() + buffer1.bufferIndex = 1 + buffer1.options = bq1_options + buffer1.optionsType = schema.CompressedBufferOptions.BinQuantBufferOptions + + valuesInt8 = schema.ValuesInt8T() + valuesInt8.values = [65] + values0 = schema.ValuesT() + values0.values = valuesInt8 + values0.values.Type = schema.ValuesUnion.ValuesInt8 + + bq_compression = schema.BinQuantCompressionT() + bq_compression.valueTables = [values0] + + metadata = schema.CompressionMetadataT() + metadata.buffers = [buffer0, buffer1] + metadata.binQuantCompression = bq_compression + + # Build the flatbuffer itself using the flatbuffers runtime module. + builder = flatbuffers.Builder(32) + root = metadata.Pack(builder) + builder.Finish(root) + buffer: bytearray = builder.Output() + + print(hexdump.hexdump(buffer, result='return')) + print(f"length: {len(buffer)}") + + readback = schema.CompressionMetadataT.InitFromPackedBuf(buffer, 0) + + sys.exit() + + +if __name__ == "__main__": + main() diff --git a/tensorflow/lite/micro/compression/view.py b/tensorflow/lite/micro/compression/view.py new file mode 100644 index 00000000000..55c4255ede1 --- /dev/null +++ b/tensorflow/lite/micro/compression/view.py @@ -0,0 +1,155 @@ +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pprint + +from tensorflow.lite.micro.compression import metadata_flatbuffer_py_generated as compression_schema +from tensorflow.lite.python import schema_py_generated as tflite_schema + + +def read_model(path): + with open(path, 'rb') as file: + buffer = bytearray(file.read()) + return tflite_schema.ModelT.InitFromPackedBuf(buffer, 0) + + +def unpack_list(source): + result = [] + for index, s in enumerate(source): + d = {"_index": index} | vars(s) + result.append(d) + return result + + +def unpack_operators(operators): + result = [] + for index, o in enumerate(operators): + d = {"_index": index, + "opcode_index": o.opcodeIndex, + "inputs": unpack_array(o.inputs), + "outputs": unpack_array(o.outputs), + } + result.append(d) + return result + + +def unpack_TensorType(type): + attrs = [attr for attr in dir(tflite_schema.TensorType) if not + attr.startswith("__")] + lut = {getattr(tflite_schema.TensorType, attr): attr for attr in attrs} + return lut[type] + + +def unpack_tensors(tensors): + result = [] + for index, t in enumerate(tensors): + d = {"_index": index, + "name": t.name.decode("utf-8"), + "type": unpack_TensorType(t.type), + "shape": unpack_array(t.shape), + "quantization": [unpack_array(t.quantization.scale), unpack_array(t.quantization.zeroPoint)], + "buffer": t.buffer, + } + result.append(d) + return result + + +def unpack_subgraphs(subgraphs): + result = [] + for index, s in enumerate(subgraphs): + d = {"_index": index, + "name": s.name, + # "inputs": s.inputs, + # "outputs": s.outputs, + "operators": unpack_operators(s.operators), + "tensors": unpack_tensors(s.tensors), + } + result.append(d) + return result + + +def unpack_metadata(metadata): + return [{"name": m.name.decode("utf-8"), "buffer": m.buffer} for m in + metadata] + + +def unpack_compression_metadata(buffer): + metadata = compression_schema.MetadataT.InitFromPackedBuf(buffer, 0) + result = [] + for index, t in enumerate(metadata.lutTensors): + d = {"_index": index, + "subgraph": t.subgraph, + "tensor": t.tensor, + "indexBitwidth": t.indexBitwidth, + "indexBuffer": t.indexBuffer, + "valueBuffer": t.valueBuffer, + } + result.append(d) + return {"lut_tensors": result} + + +def unpack_array(a): + try: + # Avoid printing as numpy arrays if possible. The pprint module does not + # format them well. + a = a.tolist() + except AttributeError: + pass + return a + + +def unpack_buffers(buffers, compression_metadata=None): + result = [] + for index, b in enumerate(buffers): + d = {"_index": index} + d = d | {"data": unpack_array(b.data)} + if index == compression_metadata: d = d | {"_compression_metadata_decoded": + unpack_compression_metadata(bytes(b.data))} + result.append(d) + return result + + +def get_compression_metadata_buffer(model): + # Return the metadata buffer data or None + for item in model.metadata: + if item.name.decode("utf-8") == "COMPRESSION_METADATA": + return item.buffer + else: + return None + + +def print_model(model, format=None): + output = { + "description": model.description.decode("utf-8"), + "version": model.version, + "operator_codes": unpack_list(model.operatorCodes), + "metadata": unpack_metadata(model.metadata), + "subgraphs": unpack_subgraphs(model.subgraphs), + "buffers": unpack_buffers(model.buffers, + get_compression_metadata_buffer(model)), + } + + pprint.pprint(output, width=90, sort_dicts=False, compact=True) + + +def main(argv=None): + filename = argv[1] + model = read_model(filename) + print_model(model) + + +if __name__ == "__main__": + import sys + main(sys.argv) diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc index f31728c3707..6fe75c18c15 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc @@ -32,13 +32,6 @@ limitations under the License. #include "tensorflow/lite/micro/micro_mutable_op_resolver.h" #include "tensorflow/lite/micro/testing/micro_test.h" -#define TF_LITE_MICRO_CHECK_FAIL() \ - do { \ - if (micro_test::did_test_fail) { \ - return kTfLiteError; \ - } \ - } while (false) - namespace { // Arena size is a guesstimate, followed by use of diff --git a/tensorflow/lite/micro/fake_micro_context.cc b/tensorflow/lite/micro/fake_micro_context.cc index 5787ffd0648..1ee2c65f5e1 100644 --- a/tensorflow/lite/micro/fake_micro_context.cc +++ b/tensorflow/lite/micro/fake_micro_context.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,18 +15,34 @@ limitations under the License. #include "tensorflow/lite/micro/fake_micro_context.h" +#include + #include "tensorflow/lite/c/c_api_types.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h" #include "tensorflow/lite/micro/micro_arena_constants.h" #include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/micro/micro_utils.h" namespace tflite { -FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors, - SingleArenaBufferAllocator* allocator, - MicroGraph* micro_graph) - : graph_(*micro_graph), tensors_(tensors), allocator_(allocator) {} +FakeMicroContext::FakeMicroContext( + TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator, + MicroGraph* micro_graph +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* compressed_tensors +#endif // USE_TFLM_COMPRESSION + ) + : graph_(*micro_graph), + tensors_(tensors), + allocator_(allocator) +#ifdef USE_TFLM_COMPRESSION + , + compressed_tensors_(compressed_tensors) +#endif // USE_TFLM_COMPRESSION +{ +} TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) { allocated_temp_count_++; @@ -112,4 +128,60 @@ void* FakeMicroContext::external_context() { return nullptr; } MicroGraph& FakeMicroContext::graph() { return graph_; } +#ifdef USE_TFLM_COMPRESSION + +// Available during Prepare & Eval. Returns false if tensor is not +// compressed. +bool FakeMicroContext::IsTensorCompressed(const TfLiteNode* node, + int tensor_idx) { + if (compressed_tensors_ != nullptr && tensor_idx < node->inputs->size) { + int index = node->inputs->data[tensor_idx]; + if (index >= 0 && compressed_tensors_->tensors[index] != nullptr) { + return true; + } + } + + return false; +} + +// Only available during Prepare. The kernel is responsible for storing the +// scratch buffer handle. +int FakeMicroContext::AllocateDecompressionScratchBuffer(const TfLiteNode* node, + int tensor_idx) { + if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) { + return -1; + } + int index = node->inputs->data[tensor_idx]; + if (index < 0 || compressed_tensors_->tensors[index] == nullptr) { + return -1; + } + TfLiteTensor* tensor = &tensors_[index]; + int scratch_index = -1; + TfLiteStatus result = + RequestScratchBufferInArena(tensor->bytes, &scratch_index); + if (result != kTfLiteOk) { + return -1; + } + + return scratch_index; +} + +// Available during Prepare & Eval. Returns nullptr if tensor is not +// compressed. +const CompressionTensorData* FakeMicroContext::GetTensorCompressionData( + const TfLiteNode* node, int tensor_idx) { + if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) { + return nullptr; + } + + int index = node->inputs->data[tensor_idx]; + if (index < 0) { + return nullptr; + } + + return compressed_tensors_->tensors[index]; +} + +#endif // USE_TFLM_COMPRESSION + } // namespace tflite diff --git a/tensorflow/lite/micro/fake_micro_context.h b/tensorflow/lite/micro/fake_micro_context.h index 46d8a9b1ec4..7cf9c682e5c 100644 --- a/tensorflow/lite/micro/fake_micro_context.h +++ b/tensorflow/lite/micro/fake_micro_context.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,12 @@ class FakeMicroContext : public MicroContext { ~FakeMicroContext() = default; FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator, - MicroGraph* micro_graph); + MicroGraph* micro_graph +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* compressed_tensors = nullptr +#endif // USE_TFLM_COMPRESSION + ); void* AllocatePersistentBuffer(size_t bytes) override; TfLiteStatus RequestScratchBufferInArena(size_t bytes, @@ -50,6 +55,24 @@ class FakeMicroContext : public MicroContext { void* external_context() override; MicroGraph& graph() override; +#ifdef USE_TFLM_COMPRESSION + + // Available during Prepare & Eval. Returns false if tensor is not + // compressed. + bool IsTensorCompressed(const TfLiteNode* node, int tensor_idx) override; + + // Only available during Prepare. The kernel is responsible for storing the + // scratch buffer handle. + int AllocateDecompressionScratchBuffer(const TfLiteNode* node, + int tensor_idx) override; + + // Available during Prepare & Eval. Returns nullptr if tensor is not + // compressed. + const CompressionTensorData* GetTensorCompressionData( + const TfLiteNode* node, int tensor_idx) override; + +#endif // USE_TFLM_COMPRESSION + private: static constexpr int kNumScratchBuffers_ = 12; @@ -62,6 +85,15 @@ class FakeMicroContext : public MicroContext { SingleArenaBufferAllocator* allocator_; +#ifdef USE_TFLM_COMPRESSION + + // + // Compression + // + const CompressedTensorList* compressed_tensors_; + +#endif // USE_TFLM_COMPRESSION + TF_LITE_REMOVE_VIRTUAL_DELETE }; diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index 0df35fce4eb..7364d609e5b 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -45,15 +45,36 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const auto& data = *(static_cast(node->user_data)); +#ifdef USE_TFLM_COMPRESSION + + // TODO(ddavis-2015): make micro_context a const pointer + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* weights_comp_td = + micro_context->GetTensorCompressionData(node, kConvWeightsTensor); + const CompressionTensorData* bias_comp_td = + micro_context->GetTensorCompressionData(node, kConvBiasTensor); + +#endif // USE_TFLM_COMPRESSION + switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: { tflite::reference_ops::Conv( ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(micro_context, bias, bias_comp_td, + data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr); @@ -67,9 +88,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); } else if (bias->type == kTfLiteInt64) { @@ -79,9 +109,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); } else { @@ -119,9 +158,19 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; diff --git a/tensorflow/lite/micro/kernels/conv.h b/tensorflow/lite/micro/kernels/conv.h index 0c8073f48f0..0090053e03c 100644 --- a/tensorflow/lite/micro/kernels/conv.h +++ b/tensorflow/lite/micro/kernels/conv.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,6 +49,14 @@ struct OpDataConv { // A buffer used to store unpacked filter values. This is used if the source // tensor is of n-bit precision that cannot be easily processed by kernels. int filter_buffer_index; + +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int weights_scratch_index; + int bias_scratch_index; + +#endif // USE_TFLM_COMPRESSION }; extern const int kConvInputTensor; diff --git a/tensorflow/lite/micro/kernels/conv_common.cc b/tensorflow/lite/micro/kernels/conv_common.cc index 51c7a6ff2d6..9f0f2f79588 100644 --- a/tensorflow/lite/micro/kernels/conv_common.cc +++ b/tensorflow/lite/micro/kernels/conv_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -209,6 +209,23 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) { &data->filter_buffer_index); } +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + if (micro_context->IsTensorCompressed(node, kConvWeightsTensor) && + filter->type == kTfLiteInt4) { + MicroPrintf("Compression not supported with INT4 tensors"); + return kTfLiteError; + } + data->weights_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, + kConvWeightsTensor); + data->bias_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kConvBiasTensor); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(filter); micro_context->DeallocateTempTfLiteTensor(input); micro_context->DeallocateTempTfLiteTensor(output); diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc index 0fb9411a3f0..0c3e0f06937 100644 --- a/tensorflow/lite/micro/kernels/conv_test.cc +++ b/tensorflow/lite/micro/kernels/conv_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/conv_test.h" +#include + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/kernels/kernel_runner.h" @@ -46,6 +48,83 @@ static int kOutputShape[] = {4, 2, 1, 2, 3}; static const float kGoldenData[kOutputElements] = {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}; +#ifdef USE_TFLM_COMPRESSION + +// compressed filter data for kBinQuant scheme, matches kFilterData +constexpr uint8_t kBinQuantFilterData[] = { + 0x05, 0x38, 0x20, 0x90, 0x00, +}; +constexpr float kBinQuantFilterValueTable[] = { + 1, 2, 3, 4, -1, +}; +constexpr int kBinQuantFilterBitWidth = 3; +// compressed bias data for kBinQuant scheme, matches kBiasData +constexpr uint8_t kBinQuantBiasData[] = {0x18}; +constexpr int kBinQuantBiasBitWidth = 2; + +// Common inputs and outputs for quantized compressed tensor tests. +// Values from TfLite conv_test.cc SimplePerChannelTest. +static int kInputShapeQ1[] = {4, 1, 2, 3, 2}; +static const float kInputDataQ1[] = { + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 +}; +constexpr size_t kInputElementsQ1 = std::extent::value; + +constexpr int kFilterNumChannelsQ1 = 2; +static int kFilterShapeQ1[] = {4, 2, 2, 2, 2}; +static const float kFilterDataQ1[] = { + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 +}; +constexpr size_t kFilterElementsQ1 = + std::extent::value; + +static int kBiasShapeQ1[] = {1, 2}; +static const float kBiasDataQ1[] = {3, -2}; +constexpr size_t kBiasElementsQ1 = std::extent::value; + +static int kOutputShapeQ1[] = {4, 1, 1, 2, 2}; +static const float kGoldenDataQ1[] = {31, 64, -57, -46}; +constexpr int kOutputElementsQ1 = std::extent::value; +static const float kGoldenDataQ1_16[] = {31, 63.99804688, -57, -46}; + +// compressed filter data for kBinQuant scheme, matches kFilterDataQ1 +constexpr uint8_t kBinQuantFilterDataQ1[] = { + 0x05, 0x34, 0xE5, 0xDE, 0x54, 0xC1, +}; +constexpr float kBinQuantFilterValueTableQ1[] = { + 1, 2, 3, 4, 5, 6, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, +}; +constexpr int kBinQuantFilterBitWidthQ1 = 3; +// compressed bias data for kBinQuant scheme, matches kBiasDataQ1 +constexpr uint8_t kBinQuantBiasDataQ1[] = {0x00}; +constexpr int kBinQuantBiasBitWidthQ1 = 1; + +static TfLiteConvParams common_conv_params_q1 = { + kTfLitePaddingValid, // padding + 1, // stride_width + 1, // stride_height + kTfLiteActNone, // activation + 1, // dilation_width_factor + 1, // dilation_height_factor + kTfLiteNoType // quantized_bias_type +}; + +#endif // USE_TFLM_COMPRESSION + static TfLiteConvParams common_conv_params = { kTfLitePaddingValid, // padding 2, // stride_width @@ -122,6 +201,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelCompressed) { + const float input_scale = 0.5f; + const float output_scale = 0.5f; + const int input_zero_point = -1; + const int output_zero_point = -1; + constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1, + 1.0f, 2.0f}; + constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1, + 0, 0}; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int8_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int32_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int8_t golden_quantized[tflite::testing::kOutputElementsQ1]; + int8_t output_quantized[tflite::testing::kOutputElementsQ1]; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent< + decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1; + comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1; + comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1; + comp_info.filter_scales = filter_scales; + comp_info.filter_zero_points = filter_zero_points; + + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1; + comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1; + comp_info.bias_data = tflite::testing::kBiasDataQ1; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1; + comp_info.bias_scales = bias_scales; + comp_info.bias_zero_points = bias_zero_points; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestConvQuantizedPerChannelCompressed( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(), + &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(SimpleTestFloat) { float output_data[tflite::testing::kOutputElements]; @@ -136,6 +275,37 @@ TF_LITE_MICRO_TEST(SimpleTestFloat) { output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestFloatCompressed) { + tflite::testing::TestCompressionInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = tflite::testing::kBinQuantFilterValueTable; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth; + comp_info.bias_value_table = tflite::testing::kBiasData; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth; + + float output_data[tflite::testing::kOutputElements]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestConvFloat( + tflite::testing::kInputShape, tflite::testing::kInputData, + tflite::testing::kFilterShape, + reinterpret_cast(tflite::testing::kBinQuantFilterData), + tflite::testing::kBiasShape, + reinterpret_cast(tflite::testing::kBinQuantBiasData), + tflite::testing::kOutputShape, tflite::testing::kGoldenData, + &tflite::testing::common_conv_params, tflite::Register_CONV_2D(), + output_data, &comp_info)); +} + +#endif + TF_LITE_MICRO_TEST(InputAndFilterSameWidthHeight) { const int output_dims_count = 2; float output_data[output_dims_count]; @@ -246,6 +416,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBias) { output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBiasCompressed) { + const float input_scale = 128.0f / 65536; + const float output_scale = 128.0f / 65536; + const int input_zero_point = 0; + const int output_zero_point = 0; + constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1, + 1.0f, 2.0f}; + constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1, + 0, 0}; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int64_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int16_t golden_quantized[tflite::testing::kOutputElementsQ1]; + int16_t output_quantized[tflite::testing::kOutputElementsQ1]; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent< + decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1; + comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1; + comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1; + comp_info.filter_scales = filter_scales; + comp_info.filter_zero_points = filter_zero_points; + + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1; + comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1; + comp_info.bias_data = tflite::testing::kBiasDataQ1; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1; + comp_info.bias_scales = bias_scales; + comp_info.bias_zero_points = bias_zero_points; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestConvQuantizedPerChannelCompressed( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1_16, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(), + &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBias) { const int output_dims_count = 12; int16_t output_data[output_dims_count]; @@ -276,6 +506,66 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBias) { output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel32bBiasCompressed) { + const float input_scale = 128.0f / 65536; + const float output_scale = 128.0f / 65536; + const int input_zero_point = 0; + const int output_zero_point = 0; + constexpr float filter_scales[] = {tflite::testing::kFilterNumChannelsQ1, + 1.0f, 2.0f}; + constexpr int filter_zero_points[] = {tflite::testing::kFilterNumChannelsQ1, + 0, 0}; + // bias scales and zero points will be computed + float bias_scales[std::extent::value] = {}; + int bias_zero_points[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int32_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int16_t golden_quantized[tflite::testing::kOutputElementsQ1]; + int16_t output_quantized[tflite::testing::kOutputElementsQ1]; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent< + decltype(tflite::testing::kBinQuantFilterValueTableQ1)>::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidthQ1; + comp_info.filter_compressed = tflite::testing::kBinQuantFilterDataQ1; + comp_info.filter_data = tflite::testing::kBinQuantFilterValueTableQ1; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1; + comp_info.filter_scales = filter_scales; + comp_info.filter_zero_points = filter_zero_points; + + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value / + tflite::testing::kFilterNumChannelsQ1; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidthQ1; + comp_info.bias_compressed = tflite::testing::kBinQuantBiasDataQ1; + comp_info.bias_data = tflite::testing::kBiasDataQ1; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1; + comp_info.bias_scales = bias_scales; + comp_info.bias_zero_points = bias_zero_points; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestConvQuantizedPerChannelCompressed( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1_16, + golden_quantized, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params_q1, tflite::Register_CONV_2D(), + &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(SimpleTestDilatedQuantizedPerChannel) { const int output_dims_count = 24; int8_t output_data[output_dims_count]; @@ -1190,3 +1480,60 @@ TF_LITE_MICRO_TEST(Int8Filter8x3x3x3PerChannelScaleRelu6ShouldMatchGolden) { } TF_LITE_MICRO_TESTS_END + +// {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, +// {TensorType_INT8, +// // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] +// {2, 2, 2, 2}, +// 0, +// 0, +// 0, +// 0, +// /*per_channel_quantization=*/true, +// /*per_channel_quantization_scales=*/{1, 2}, +// /*per_channel_quantization_offsets=*/{0, 0}, +// /*channel_index=*/0}, +// {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, +// /*stride_width=*/1, /*stride_height=*/1); +// m.SetInput({ +// // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] +// 3, 2, // batch = 0, y = 0, x = 0 +// 1, -1, // batch = 0, y = 0, x = 1 +// -2, -3, // batch = 0, y = 0, x = 2 +// 4, 3, // batch = 0, y = 1, x = 0 +// 2, -2, // batch = 0, y = 1, x = 1 +// -3, -4, // batch = 0, y = 1, x = 2 +// }); +// m.SetFilter( +// // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] +// { +// 1, 2, // out channel = 0, y = 0, x = 0 +// 3, 4, // out channel = 0, y = 0, x = 1 +// 3, 4, // out channel = 0, y = 1, x = 0 +// 5, 6, // out channel = 0, y = 1, x = 1 +// 7, 8, // out channel = 1, y = 0, x = 0 +// 5, 6, // out channel = 1, y = 0, x = 1 +// 3, 4, // out channel = 1, y = 1, x = 0 +// 1, 2, // out channel = 1, y = 1, x = 1 +// }); +// m.SetBias({3, -2}); +// // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] +// EXPECT_THAT(m.GetDequantizedOutput(), +// ElementsAreArray(ArrayFloatNear({31, 64, -57, -46}))); +// EXPECT_THAT(m.GetOutput(), ElementsAreArray({61, 127, -115, -93})); + +// TEST_P(ConvolutionOpTest, SimplePerChannel16x8Bias32) { +// const float scale = 128.0 / 65536; +// // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] +// EXPECT_THAT(m.GetDequantizedOutput(), +// ElementsAreArray(ArrayFloatNear({31, 63.99804688, -57, -46}))); +// EXPECT_THAT(m.GetOutput(), +// ElementsAreArray({15872, 32767, -29184, -23552})); + +// TEST_P(ConvolutionOpTest, SimplePerChannel16x8Bias64) { +// const float scale = 128.0 / 65536; +// // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] +// EXPECT_THAT(m.GetDequantizedOutput(), +// ElementsAreArray(ArrayFloatNear({31, 63.99804688, -57, -46}))); +// EXPECT_THAT(m.GetOutput(), +// ElementsAreArray({15872, 32767, -29184, -23552})); \ No newline at end of file diff --git a/tensorflow/lite/micro/kernels/conv_test.h b/tensorflow/lite/micro/kernels/conv_test.h index c655f043bcc..9df52b6b250 100644 --- a/tensorflow/lite/micro/kernels/conv_test.h +++ b/tensorflow/lite/micro/kernels/conv_test.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/conv.h" #include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" #include "tensorflow/lite/micro/test_helpers.h" @@ -26,35 +27,180 @@ limitations under the License. namespace tflite { namespace testing { -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TFLMRegistration registration, float* output_data); +constexpr int kMaxTensors = 4; + +#ifdef USE_TFLM_COMPRESSION + +template +struct TestCompressionInfo { + TFILTER* filter_value_table; + size_t filter_value_table_stride; + int filter_bit_width; + TBIAS* bias_value_table; + size_t bias_value_table_stride; + int bias_bit_width; + CompressionScheme scheme; +}; + +template +struct TestCompressionQuantizedInfo : TestCompressionInfo { + const uint8_t* filter_compressed; + const float* filter_data; + const int* filter_dims_data; // TfLiteIntArray + const float* filter_scales; // TfLiteFloatArray + const int* filter_zero_points; // TfLiteIntArray + const uint8_t* bias_compressed; + const float* bias_data; + const int* bias_dims_data; // TfLiteIntArray + float* bias_scales; // TfLiteFloatArray (computed) + int* bias_zero_points; // TfLiteIntArray (computed) +}; + +#endif // USE_TFLM_COMPRESSION + +template TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TFLMRegistration registration, int8_t* output_data); - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const float* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TFLMRegistration registration, - float* output_data, float tolerance = 1e-5); - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const int8_t* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TFLMRegistration registration, - int8_t* output_data, float tolerance = 1e-5); - -TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, - int* filter_dims_data, const float* filter_data, - int* bias_dims_data, const float* bias_data, - int* output_dims_data, - const float* expected_output_data, - TfLiteConvParams* conv_params, - TFLMRegistration registration, float* output_data); + int output_length, const TfLiteConvParams* conv_params, + TFLMRegistration registration, T* output_data +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* comp_list_p = nullptr +#endif // USE_TFLM_COMPRESSION +) { + // TODO(ddavis-2015): support optional bias tensor + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, conv_params +#ifdef USE_TFLM_COMPRESSION + , + nullptr, comp_list_p +#endif // USE_TFLM_COMPRESSION + ); + + const char* init_data = reinterpret_cast(conv_params); + TfLiteStatus status = runner.InitAndPrepare(init_data); + if (status != kTfLiteOk) { + return status; + } + return runner.Invoke(); +} + +template +TfLiteStatus ValidateConvGoldens( + TfLiteTensor* tensors, int tensors_size, const T* expected_output_data, + int output_length, const TfLiteConvParams* conv_params, + TFLMRegistration registration, T* output_data, float tolerance = 1e-5 +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { +#ifdef USE_TFLM_COMPRESSION + + CompressionTensorData* compressed_tensors[kMaxTensors] = {}; + CompressionTensorData filter_comp_data = {}; + CompressionTensorData bias_comp_data = {}; + CompressedTensorList comp_list = {compressed_tensors}; + CompressedTensorList* comp_list_p = nullptr; + + if (comp_info != nullptr) { + if (comp_info->scheme == CompressionScheme::kBinQuant) { + if (comp_info->filter_value_table != nullptr) { + bool is_per_channel = + tensors[kConvWeightsTensor].type != kTfLiteFloat32 && + tensors[kConvWeightsTensor].dims->data[kConvQuantizedDimension] > 1; + compressed_tensors[kConvWeightsTensor] = &filter_comp_data; + filter_comp_data.scheme = CompressionScheme::kBinQuant; + filter_comp_data.data.bin_quant.compressed_bit_width = + comp_info->filter_bit_width; + filter_comp_data.data.bin_quant.value_table = + comp_info->filter_value_table; + filter_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->filter_value_table_stride; + filter_comp_data.data.bin_quant.is_per_channel_quantized = + is_per_channel; + filter_comp_data.data.bin_quant.use_alternate_axis = false; + } + if (comp_info->bias_value_table != nullptr) { + bool is_per_channel = + tensors[kConvBiasTensor].type != kTfLiteFloat32 && + tensors[kConvBiasTensor].dims->data[kConvQuantizedDimension] > 1; + compressed_tensors[kConvBiasTensor] = &bias_comp_data; + bias_comp_data.scheme = CompressionScheme::kBinQuant; + bias_comp_data.data.bin_quant.compressed_bit_width = + comp_info->bias_bit_width; + bias_comp_data.data.bin_quant.value_table = comp_info->bias_value_table; + bias_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->bias_value_table_stride; + bias_comp_data.data.bin_quant.is_per_channel_quantized = is_per_channel; + bias_comp_data.data.bin_quant.use_alternate_axis = false; + } + comp_list_p = &comp_list; + } else { + return kTfLiteError; + } + } + +#endif // USE_TFLM_COMPRESSION + + TfLiteStatus status = InvokeConv(tensors, tensors_size, output_length, + conv_params, registration, output_data +#ifdef USE_TFLM_COMPRESSION + , + comp_list_p +#endif // USE_TFLM_COMPRESSION + ); + if (status != kTfLiteOk) { + return status; + } + for (int i = 0; i < output_length; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], + tolerance); + } + return kTfLiteOk; +} + +template +TfLiteStatus TestConvFloat( + int* input_dims_data, const float* input_data, int* filter_dims_data, + const float* filter_data, int* bias_dims_data, const float* bias_data, + int* output_dims_data, const float* expected_output_data, + TfLiteConvParams* conv_params, TFLMRegistration registration, + float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateTensor(input_data, input_dims), + CreateTensor(filter_data, filter_dims), + CreateTensor(bias_data, bias_dims), + CreateTensor(output_data, output_dims), + }; + + return ValidateConvGoldens(tensors, tensors_size, expected_output_data, + output_dims_count, conv_params, registration, + output_data +#ifdef USE_TFLM_COMPRESSION + , + 1e-5, comp_info +#endif // USE_TFLM_COMPRESSION + ); +} TfLiteStatus TestConvQuantizedPerChannel( int* input_dims_data, const float* input_data, int8_t* input_quantized, @@ -88,6 +234,80 @@ TfLiteStatus TestConvQuantizedPerChannel( float output_scale, int output_zero_point, TfLiteConvParams* conv_params, TFLMRegistration registration, int16_t* output_data); +#ifdef USE_TFLM_COMPRESSION + +template +TfLiteStatus TestConvQuantizedPerChannelCompressed( + int* input_dims_data, const float* input_data, TIO* input_quantized, + float input_scale, int input_zero_point, int* output_dims_data, + const float* expected_output_data, TIO* expected_output_quantized, + TIO* output_quantized, float output_scale, int output_zero_point, + const TfLiteConvParams* conv_params, TFLMRegistration registration, + const TestCompressionQuantizedInfo* comp_info) { + // TODO(ddavis-2015): account for optional bias tensor + // bool null_bias = comp_info->bias_data == nullptr ? true : false; + + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + TfLiteFloatArray* filter_scales = + FloatArrayFromFloats(comp_info->filter_scales); + TfLiteIntArray* filter_zero_points = + IntArrayFromInts(comp_info->filter_zero_points); + TfLiteFloatArray* bias_scales = FloatArrayFromFloats(comp_info->bias_scales); + TfLiteIntArray* bias_zero_points = + IntArrayFromInts(comp_info->bias_zero_points); + + TfLiteAffineQuantization filter_quant = {}; + TfLiteTensor filter_tensor = CreatePerChannelQuantizedTensor( + comp_info->filter_compressed, filter_dims, filter_scales, + filter_zero_points, &filter_quant, kConvQuantizedDimension, + false /* is_variable */, kTfLiteInt8); + SymmetricPerChannelQuantize( + comp_info->filter_data, comp_info->filter_value_table, + ElementCount(*filter_dims), filter_scales->size, filter_scales->data); + + TfLiteAffineQuantization bias_quant = {}; + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( + comp_info->bias_compressed, bias_dims, input_scale, filter_scales, + bias_scales, bias_zero_points, &bias_quant, kConvQuantizedDimension, + false /* is_variable */, typeToTfLiteType()); + SymmetricPerChannelQuantize(comp_info->bias_data, comp_info->bias_value_table, + ElementCount(*bias_dims), bias_scales->size, + bias_scales->data); + + for (int i = 0; i < ElementCount(*bias_dims); i++) { + int64_t bias_data0 = comp_info->bias_value_table[i]; + MicroPrintf( + "bias scale %f bias zero_point %d" + " bias data %f bias data quantized %lld", + (double)bias_scales->data[i], bias_zero_points->data[i], + (double)comp_info->bias_data[i], bias_data0); + } + + constexpr int tensors_size = kMaxTensors; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point), + filter_tensor, + bias_tensor, + CreateQuantizedTensor(output_quantized, output_dims, output_scale, + output_zero_point), + }; + + const int output_dims_count = ElementCount(*output_dims); + Quantize(expected_output_data, expected_output_quantized, output_dims_count, + output_scale, output_zero_point); + return ValidateConvGoldens(tensors, tensors_size, expected_output_quantized, + output_dims_count, conv_params, registration, + output_quantized, 1.0e-5f /* tolerance */, + comp_info); +} + +#endif // USE_TFLM_COMPRESSION + } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/micro/kernels/conv_test_common.cc b/tensorflow/lite/micro/kernels/conv_test_common.cc index a0f733b8e42..7b6f71a8fc3 100644 --- a/tensorflow/lite/micro/kernels/conv_test_common.cc +++ b/tensorflow/lite/micro/kernels/conv_test_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,108 +18,6 @@ limitations under the License. namespace tflite { namespace testing { -template -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TFLMRegistration registration, T* output_data) { - int inputs_array_data[] = {3, 0, 1, 2}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 3}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, - outputs_array, conv_params); - - const char* init_data = reinterpret_cast(conv_params); - TfLiteStatus status = runner.InitAndPrepare(init_data); - if (status != kTfLiteOk) { - return status; - } - return runner.Invoke(); -} - -template -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const T* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TFLMRegistration registration, T* output_data, - float tolerance) { - TfLiteStatus status = InvokeConv(tensors, tensors_size, output_length, - conv_params, registration, output_data); - if (status != kTfLiteOk) { - return status; - } - for (int i = 0; i < output_length; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], - tolerance); - } - return kTfLiteOk; -} - -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TFLMRegistration registration, float* output_data) { - return InvokeConv(tensors, tensors_size, output_length, conv_params, - registration, output_data); -} - -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TFLMRegistration registration, int8_t* output_data) { - return InvokeConv(tensors, tensors_size, output_length, conv_params, - registration, output_data); -} - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const float* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TFLMRegistration registration, - float* output_data, float tolerance) { - return ValidateConvGoldens(tensors, tensors_size, expected_output_data, - output_length, conv_params, registration, - output_data, tolerance); -} - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const int8_t* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TFLMRegistration registration, - int8_t* output_data, float tolerance) { - return ValidateConvGoldens( - tensors, tensors_size, expected_output_data, output_length, conv_params, - registration, output_data, tolerance); -} - -TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, - int* filter_dims_data, const float* filter_data, - int* bias_dims_data, const float* bias_data, - int* output_dims_data, - const float* expected_output_data, - TfLiteConvParams* conv_params, - TFLMRegistration registration, float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateTensor(input_data, input_dims), - CreateTensor(filter_data, filter_dims), - CreateTensor(bias_data, bias_dims), - CreateTensor(output_data, output_dims), - }; - - return ValidateConvGoldens(tensors, tensors_size, expected_output_data, - output_dims_count, conv_params, registration, - output_data); -} - template TfLiteStatus TestConvQuantizedPerChannel( int* input_dims_data, const float* input_data, T* input_quantized, diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc index 65c83792e87..c779ea329f3 100644 --- a/tensorflow/lite/micro/kernels/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/fully_connected.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -60,7 +60,7 @@ TfLiteStatus FullyConnectedPrepare(TfLiteContext* context, TfLiteNode* node) { (input->type == kTfLiteInt8 && (filter->type != kTfLiteInt8 && filter->type != kTfLiteInt4)) || (input->type == kTfLiteInt16 && filter->type != kTfLiteInt8)) { - MicroPrintf("Input type: %s with filter type : %s not supported.", + MicroPrintf("Input type: %s with filter type: %s not supported.", TfLiteTypeGetName(input->type), TfLiteTypeGetName(filter->type)); return kTfLiteError; @@ -79,6 +79,23 @@ TfLiteStatus FullyConnectedPrepare(TfLiteContext* context, TfLiteNode* node) { context, params->activation, input->type, input, filter, bias, output, data)); +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + if (micro_context->IsTensorCompressed(node, kFullyConnectedWeightsTensor) && + filter->type == kTfLiteInt4) { + MicroPrintf("Compression not supported with INT4 tensors"); + return kTfLiteError; + } + data->weights_scratch_index = + micro_context->AllocateDecompressionScratchBuffer( + node, kFullyConnectedWeightsTensor); + data->bias_scratch_index = micro_context->AllocateDecompressionScratchBuffer( + node, kFullyConnectedBiasTensor); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(input); micro_context->DeallocateTempTfLiteTensor(filter); if (bias != nullptr) { @@ -102,8 +119,20 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); - TFLITE_DCHECK(node->user_data != nullptr); +#ifdef USE_TFLM_COMPRESSION + + // TODO(ddavis-2015): make micro_context a const pointer + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* weights_comp_td = + micro_context->GetTensorCompressionData(node, + kFullyConnectedWeightsTensor); + const CompressionTensorData* bias_comp_td = + micro_context->GetTensorCompressionData(node, kFullyConnectedBiasTensor); +#endif // USE_TFLM_COMPRESSION + + TFLITE_DCHECK(node->user_data != nullptr); const auto& data = *(static_cast(node->user_data)); @@ -115,9 +144,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(micro_context, bias, bias_comp_td, + data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; @@ -149,9 +187,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; @@ -173,9 +220,18 @@ TfLiteStatus FullyConnectedEval(TfLiteContext* context, TfLiteNode* node) { tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + weights_comp_td, + data.weights_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); break; diff --git a/tensorflow/lite/micro/kernels/fully_connected.h b/tensorflow/lite/micro/kernels/fully_connected.h index 8308838ec6d..d7ea705964c 100644 --- a/tensorflow/lite/micro/kernels/fully_connected.h +++ b/tensorflow/lite/micro/kernels/fully_connected.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,6 +46,14 @@ struct OpDataFullyConnected { // tensor is of n-bit precision that cannot be easily processed by kernels. int filter_buffer_index; #endif + +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int weights_scratch_index; + int bias_scratch_index; + +#endif // USE_TFLM_COMPRESSION }; extern const int kFullyConnectedInputTensor; diff --git a/tensorflow/lite/micro/kernels/fully_connected_test.cc b/tensorflow/lite/micro/kernels/fully_connected_test.cc index 2ad132055b8..b88d5635815 100644 --- a/tensorflow/lite/micro/kernels/fully_connected_test.cc +++ b/tensorflow/lite/micro/kernels/fully_connected_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,6 +42,20 @@ const float simple_weights_data[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 }; +#ifdef USE_TFLM_COMPRESSION + +// compressed filter data for kBinQuant scheme +constexpr uint8_t kBinQuantFilterData[] = {0x01, 0x23, 0x45, 0x67, 0x89, + 0x01, 0x23, 0x45, 0x67, 0x89, + 0x01, 0x23, 0x45, 0x67, 0x89}; +constexpr float kBinQuantFilterValueTable[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; +constexpr int kBinQuantFilterBitWidth = 4; +// compressed bias data for kBinQuant scheme +constexpr uint8_t kBinQuantBiasData[] = {0x18}; +constexpr int kBinQuantBiasBitWidth = 2; + +#endif // USE_TFLM_COMPRESSION + // TODO(b/258710417): INT4 isn't currently supported on Hexagon. #if !defined(HEXAGON) const float simple_int4_weights_data[] = { @@ -241,11 +255,50 @@ const float representative_64x16_golden[] = { const int representative_64x16_output_size = 16; int representative_64x16_output_dims[] = {2, 1, 16}; -template +constexpr int kMaxTensors = 4; + +#ifdef USE_TFLM_COMPRESSION + +template +struct TestCompressionInfo { + TFILTER* filter_value_table; + size_t filter_value_table_stride; + int filter_bit_width; + bool use_filter_alt_axis; + TBIAS* bias_value_table; + size_t bias_value_table_stride; + int bias_bit_width; + bool use_bias_alt_axis; + CompressionScheme scheme; +}; + +template +struct TestCompressionQuantizedInfo : TestCompressionInfo { + const uint8_t* filter_compressed; + const float* filter_data; + const int* filter_dims_data; // TfLiteIntArray + const float* filter_scales; // TfLiteFloatArray + const int* filter_zero_points; // TfLiteIntArray + + const uint8_t* bias_compressed; + const float* bias_data; + const int* bias_dims_data; // TfLiteIntArray + float* bias_scales; // TfLiteFloatArray (computed) + int* bias_zero_points; // TfLiteIntArray (computed) +}; + +#endif // USE_TFLM_COMPRESSION + +template TfLiteStatus ValidateFullyConnectedGoldens( TfLiteTensor* tensors, const int tensors_size, bool null_bias, const TfLiteFusedActivation activation, const float tolerance, - const int output_len, const T* golden, T* output_data) { + const int output_len, const T* golden, T* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { TfLiteFullyConnectedParams builtin_data = { activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false, kTfLiteNoType}; @@ -272,10 +325,56 @@ TfLiteStatus ValidateFullyConnectedGoldens( TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); +#ifdef USE_TFLM_COMPRESSION + + CompressionTensorData* compressed_tensors[kMaxTensors] = {}; + CompressionTensorData filter_comp_data = {}; + CompressionTensorData bias_comp_data = {}; + CompressedTensorList comp_list = {compressed_tensors}; + CompressedTensorList* comp_list_p = nullptr; + + if (comp_info != nullptr) { + if (comp_info->scheme == CompressionScheme::kBinQuant) { + if (comp_info->filter_value_table != nullptr) { + compressed_tensors[kFullyConnectedWeightsTensor] = &filter_comp_data; + filter_comp_data.scheme = CompressionScheme::kBinQuant; + filter_comp_data.data.bin_quant.compressed_bit_width = + comp_info->filter_bit_width; + filter_comp_data.data.bin_quant.value_table = + comp_info->filter_value_table; + filter_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->filter_value_table_stride; + filter_comp_data.data.bin_quant.is_per_channel_quantized = false; + filter_comp_data.data.bin_quant.use_alternate_axis = false; + } + if (comp_info->bias_value_table != nullptr) { + compressed_tensors[kFullyConnectedBiasTensor] = &bias_comp_data; + bias_comp_data.scheme = CompressionScheme::kBinQuant; + bias_comp_data.data.bin_quant.compressed_bit_width = + comp_info->bias_bit_width; + bias_comp_data.data.bin_quant.value_table = comp_info->bias_value_table; + bias_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->bias_value_table_stride; + bias_comp_data.data.bin_quant.is_per_channel_quantized = false; + bias_comp_data.data.bin_quant.use_alternate_axis = false; + } + comp_list_p = &comp_list; + } else { + return kTfLiteError; + } + } + +#endif // USE_TFLM_COMPRESSION + const TFLMRegistration registration = Register_FULLY_CONNECTED(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data)); + reinterpret_cast(&builtin_data), nullptr +#ifdef USE_TFLM_COMPRESSION + , + comp_list_p +#endif // USE_TFLM_COMPRESSION + ); TfLiteStatus status = runner.InitAndPrepare(); if (status != kTfLiteOk) { @@ -293,11 +392,17 @@ TfLiteStatus ValidateFullyConnectedGoldens( return kTfLiteOk; } +template TfLiteStatus TestFullyConnectedFloat( int* input_dims_data, const float* input_data, int* weights_dims_data, const float* weights_data, int* bias_dims_data, const float* bias_data, const float* golden, int* output_dims_data, - TfLiteFusedActivation activation, float* output_data) { + TfLiteFusedActivation activation, float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* weights_dims = IntArrayFromInts(weights_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); @@ -305,16 +410,15 @@ TfLiteStatus TestFullyConnectedFloat( const int output_dims_count = ElementCount(*output_dims); bool null_bias = bias_data == nullptr ? true : false; - constexpr int array_size = 4; // Avoid variable length array warning. - const int inputs_size = bias_data == nullptr ? 2 : 3; + const int inputs_size = null_bias ? 2 : 3; constexpr int outputs_size = 1; const int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[array_size]; + TfLiteTensor tensors[kMaxTensors]; tensors[0] = CreateTensor(input_data, input_dims); tensors[1] = CreateTensor(weights_data, weights_dims); - if (bias_data == nullptr) { + if (null_bias) { tensors[2] = CreateTensor(output_data, output_dims); } else { tensors[2] = CreateTensor(bias_data, bias_dims); @@ -323,7 +427,12 @@ TfLiteStatus TestFullyConnectedFloat( return ValidateFullyConnectedGoldens(tensors, tensors_size, null_bias, activation, 1e-4f, output_dims_count, - golden, output_data); + golden, output_data +#ifdef USE_TFLM_COMPRESSION + , + comp_info +#endif // USE_TFLM_COMPRESSION + ); } template @@ -345,7 +454,7 @@ TfLiteStatus TestFullyConnectedQuantized( bool null_bias = bias_data == nullptr ? true : false; constexpr int array_size = 4; // Avoid variable length array warning. - const int inputs_size = bias_data == nullptr ? 2 : 3; + const int inputs_size = null_bias ? 2 : 3; constexpr int outputs_size = 1; const int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[array_size]; @@ -355,7 +464,7 @@ TfLiteStatus TestFullyConnectedQuantized( tensors[1] = CreateQuantizedTensor( weights_data, weights_quantized, weights_dims, weights_scale, weights_zero_point, false, weights_packed_type); - if (bias_data == nullptr) { + if (null_bias) { tensors[2] = CreateQuantizedTensor(output_data, output_dims, output_scale, output_zero_point); } else { @@ -373,6 +482,71 @@ TfLiteStatus TestFullyConnectedQuantized( golden_quantized, output_data); } +#ifdef USE_TFLM_COMPRESSION + +template +TfLiteStatus TestFullyConnectedQuantizedCompressed( + int* input_dims_data, const float* input_data, TIO* input_quantized, + float input_scale, int input_zero_point, int* output_dims_data, + const float* expected_output_data, TIO* expected_output_quantized, + TIO* output_quantized, float output_scale, int output_zero_point, + const TfLiteFusedActivation activation, + const TestCompressionQuantizedInfo* comp_info) { + // TODO(ddavis-2015): account for optional bias tensor + + bool null_bias = comp_info->bias_data == nullptr ? true : false; + + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + TfLiteFloatArray* filter_scales = + FloatArrayFromFloats(comp_info->filter_scales); + TfLiteIntArray* filter_zero_points = + IntArrayFromInts(comp_info->filter_zero_points); + + TfLiteTensor filter_tensor = CreateQuantizedTensor( + comp_info->filter_compressed, filter_dims, filter_scales->data[0], + filter_zero_points->data[0], false, kTfLiteInt8); + SymmetricQuantize(comp_info->filter_data, comp_info->filter_value_table, + ElementCount(*filter_dims), filter_scales->data[0]); + + TfLiteTensor bias_tensor = CreateQuantizedTensor( + comp_info->bias_compressed, bias_dims, + input_scale * filter_scales->data[0], 0, false, typeToTfLiteType()); + SymmetricQuantize(comp_info->bias_data, comp_info->bias_value_table, + ElementCount(*bias_dims), bias_tensor.params.scale); + + for (int i = 0; i < ElementCount(*bias_dims); i++) { + int64_t bias_data0 = comp_info->bias_value_table[i]; + MicroPrintf( + "bias scale %f bias zero_point %d" + " bias data %f bias data quantized %lld", + (double)bias_tensor.params.scale, bias_tensor.params.zero_point, + (double)comp_info->bias_data[i], bias_data0); + } + + constexpr int tensors_size = kMaxTensors; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point), + filter_tensor, + bias_tensor, + CreateQuantizedTensor(output_quantized, output_dims, output_scale, + output_zero_point), + }; + + const int output_dims_count = ElementCount(*output_dims); + Quantize(expected_output_data, expected_output_quantized, output_dims_count, + output_scale, output_zero_point); + return ValidateFullyConnectedGoldens( + tensors, tensors_size, null_bias, activation, 0.0f, output_dims_count, + expected_output_quantized, output_quantized, comp_info); +} + +#endif // USE_TFLM_COMPRESSION + } // namespace } // namespace testing } // namespace tflite @@ -393,6 +567,37 @@ TF_LITE_MICRO_TEST(SimpleTest) { kTfLiteOk); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestCompressed) { + float output_data[tflite::testing::simple_output_size]; + + tflite::testing::TestCompressionInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = tflite::testing::kBinQuantFilterValueTable; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth; + comp_info.bias_value_table = tflite::testing::simple_bias_data; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth; + + TF_LITE_MICRO_EXPECT_EQ( + tflite::testing::TestFullyConnectedFloat( + tflite::testing::simple_input_dims, + tflite::testing::simple_input_data, + tflite::testing::simple_weights_dims, + reinterpret_cast(tflite::testing::kBinQuantFilterData), + tflite::testing::simple_bias_dims, + reinterpret_cast(tflite::testing::kBinQuantBiasData), + tflite::testing::simple_golden, tflite::testing::simple_output_dims, + kTfLiteActNone, output_data, &comp_info), + kTfLiteOk); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(SimpleTestNullBias) { float output_data[tflite::testing::simple_output_size]; TF_LITE_MICRO_EXPECT_EQ( @@ -434,6 +639,54 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8) { kTfLiteOk); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Compressed) { + const float input_scale = 1.0f; + const int input_zero_point = -1; + constexpr float weights_scale[] = {1, 1.0f}; + constexpr int weights_zero_point[] = {1, 0}; + const float output_scale = 0.5f; + const int output_zero_point = -1; + + int8_t input_quantized[tflite::testing::simple_input_size]; + int8_t weights_quantized[tflite::testing::simple_weights_size]; + int32_t bias_quantized[tflite::testing::simple_output_size]; + int8_t golden_quantized[tflite::testing::simple_output_size]; + int8_t output_data[tflite::testing::simple_output_size]; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = weights_quantized; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth; + comp_info.filter_compressed = tflite::testing::kBinQuantFilterData; + comp_info.filter_data = tflite::testing::kBinQuantFilterValueTable; + comp_info.filter_dims_data = tflite::testing::simple_weights_dims; + comp_info.filter_scales = weights_scale; + comp_info.filter_zero_points = weights_zero_point; + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth; + comp_info.bias_compressed = tflite::testing::kBinQuantBiasData; + comp_info.bias_data = tflite::testing::simple_bias_data; + comp_info.bias_dims_data = tflite::testing::simple_bias_dims; + // bias_scales and bias_zero_points are not used + + TF_LITE_MICRO_EXPECT_EQ( + tflite::testing::TestFullyConnectedQuantizedCompressed( + tflite::testing::simple_input_dims, + tflite::testing::simple_input_data, input_quantized, input_scale, + input_zero_point, tflite::testing::simple_output_dims, + tflite::testing::simple_golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone, &comp_info), + kTfLiteOk); +} + +#endif // USE_TFLM_COMPRESSION + #if !defined(HEXAGON) TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) { const float input_scale = 128.0 / 65536; @@ -443,7 +696,6 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) { const float output_scale = 128.0 / 65536; const int output_zero_point = 0; - const float simple_golden[] = {24, 25, 26, 58, 59, 60}; int16_t input_quantized[tflite::testing::simple_input_size]; int8_t weights_quantized[tflite::testing::simple_weights_size]; int64_t bias_quantized[tflite::testing::simple_output_size]; @@ -457,12 +709,62 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16) { input_zero_point, tflite::testing::simple_weights_dims, tflite::testing::simple_weights_data, weights_quantized, weights_scale, weights_zero_point, tflite::testing::simple_bias_dims, - tflite::testing::simple_bias_data, bias_quantized, simple_golden, - golden_quantized, tflite::testing::simple_output_dims, output_scale, - output_zero_point, kTfLiteActNone, output_data), + tflite::testing::simple_bias_data, bias_quantized, + tflite::testing::simple_golden, golden_quantized, + tflite::testing::simple_output_dims, output_scale, output_zero_point, + kTfLiteActNone, output_data), kTfLiteOk); } -#endif + +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantizedInt16Compressed) { + const float input_scale = 128.0 / 65536; + const int input_zero_point = 0; + constexpr float weights_scale[] = {1, 1.0f}; + constexpr int weights_zero_point[] = {1, 0}; + const float output_scale = 128.0 / 65536; + const int output_zero_point = 0; + + int16_t input_quantized[tflite::testing::simple_input_size]; + int8_t weights_quantized[tflite::testing::simple_weights_size]; + int64_t bias_quantized[tflite::testing::simple_output_size]; + int16_t golden_quantized[tflite::testing::simple_output_size]; + int16_t output_data[tflite::testing::simple_output_size]; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = weights_quantized; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = tflite::testing::kBinQuantFilterBitWidth; + comp_info.filter_compressed = tflite::testing::kBinQuantFilterData; + comp_info.filter_data = tflite::testing::kBinQuantFilterValueTable; + comp_info.filter_dims_data = tflite::testing::simple_weights_dims; + comp_info.filter_scales = weights_scale; + comp_info.filter_zero_points = weights_zero_point; + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = tflite::testing::kBinQuantBiasBitWidth; + comp_info.bias_compressed = tflite::testing::kBinQuantBiasData; + comp_info.bias_data = tflite::testing::simple_bias_data; + comp_info.bias_dims_data = tflite::testing::simple_bias_dims; + // bias_scales and bias_zero_points are not used + + TF_LITE_MICRO_EXPECT_EQ( + tflite::testing::TestFullyConnectedQuantizedCompressed( + tflite::testing::simple_input_dims, + tflite::testing::simple_input_data, input_quantized, input_scale, + input_zero_point, tflite::testing::simple_output_dims, + tflite::testing::simple_golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone, &comp_info), + kTfLiteOk); +} + +#endif // USE_TFLM_COMPRESSION + +#endif // !defined(HEXAGON) TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) { const float input_scale = 1.0f; diff --git a/tensorflow/lite/micro/kernels/kernel_runner.cc b/tensorflow/lite/micro/kernels/kernel_runner.cc index 602778d7c50..da797d03aa3 100644 --- a/tensorflow/lite/micro/kernels/kernel_runner.cc +++ b/tensorflow/lite/micro/kernels/kernel_runner.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -38,12 +38,22 @@ KernelRunner::KernelRunner(const TFLMRegistration& registration, TfLiteTensor* tensors, int tensors_size, TfLiteIntArray* inputs, TfLiteIntArray* outputs, const void* builtin_data, - TfLiteIntArray* intermediates) + TfLiteIntArray* intermediates +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* compressed_tensors +#endif // USE_TFLM_COMPRESSION + ) : registration_(registration), allocator_(SingleArenaBufferAllocator::Create(kKernelRunnerBuffer_, kKernelRunnerBufferSize_)), mock_micro_graph_(allocator_), - fake_micro_context_(tensors, allocator_, &mock_micro_graph_) { + fake_micro_context_(tensors, allocator_, &mock_micro_graph_ +#ifdef USE_TFLM_COMPRESSION + , + compressed_tensors +#endif // USE_TFLM_COMPRESSION + ) { // Prepare TfLiteContext: context_.impl_ = static_cast(&fake_micro_context_); context_.ReportError = MicroContextReportOpError; diff --git a/tensorflow/lite/micro/kernels/kernel_runner.h b/tensorflow/lite/micro/kernels/kernel_runner.h index 25b97c11302..8dbd7f8b015 100644 --- a/tensorflow/lite/micro/kernels/kernel_runner.h +++ b/tensorflow/lite/micro/kernels/kernel_runner.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,7 +36,12 @@ class KernelRunner { KernelRunner(const TFLMRegistration& registration, TfLiteTensor* tensors, int tensors_size, TfLiteIntArray* inputs, TfLiteIntArray* outputs, const void* builtin_data, - TfLiteIntArray* intermediates = nullptr); + TfLiteIntArray* intermediates = nullptr +#ifdef USE_TFLM_COMPRESSION + , + const CompressedTensorList* compressed_tensors = nullptr +#endif // USE_TFLM_COMPRESSION + ); // Calls init and prepare on the kernel (i.e. TFLMRegistration) struct. // Any exceptions will be DebugLog'd and returned as a status code. diff --git a/tensorflow/lite/micro/kernels/kernel_util.h b/tensorflow/lite/micro/kernels/kernel_util.h index f14c927133d..977ed9563e1 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.h +++ b/tensorflow/lite/micro/kernels/kernel_util.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -91,6 +91,31 @@ const T* GetOptionalTensorData(const TfLiteEvalTensor* tensor) { : reinterpret_cast(tensor->data.raw); } +#ifdef USE_TFLM_COMPRESSION + +// Overloads existing GetTensorData. If not compressed, this will return +// tensor->data. +// +// TODO(ddavis-2015): make micro_context a const pointer +template +const T* GetTensorData(MicroContext* micro_context, + const TfLiteEvalTensor* tensor, + const CompressionTensorData* compression_data, + int scratch_buffer_handle) { + if (tensor == nullptr) { + return nullptr; + } + if (compression_data == nullptr) { + return reinterpret_cast(tensor->data.data); + } + + void* uncompressed_data = micro_context->DecompressTensorToScratchBuffer( + *tensor, *compression_data, scratch_buffer_handle); + return reinterpret_cast(uncompressed_data); +} + +#endif // USE_TFLM_COMPRESSION + // Returns the shape of a TfLiteEvalTensor struct. const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor); diff --git a/tensorflow/lite/micro/kernels/transpose_conv.cc b/tensorflow/lite/micro/kernels/transpose_conv.cc index ea0efae0607..7932f290f81 100644 --- a/tensorflow/lite/micro/kernels/transpose_conv.cc +++ b/tensorflow/lite/micro/kernels/transpose_conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,6 +51,14 @@ struct OpData { // A scratch buffer is required for quantized implementations. int scratch_buffer_index; +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int filter_scratch_index; + int bias_scratch_index; + +#endif // USE_TFLM_COMPRESSION + // Index to the converted 64-bit bias buffer from 16-bit bias. This is // required to handle 16x8 transpose convolutions where a 16-bit bias is // provided, whereas the kernel expects 64-bit biases. @@ -244,6 +252,17 @@ TfLiteStatus TransposeConvPrepare(TfLiteContext* context, TfLiteNode* node) { data->params.stride_width = params->stride_width; data->params.stride_height = params->stride_height; +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->filter_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kFilterTensor); + data->bias_scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kBiasTensor); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(output); micro_context->DeallocateTempTfLiteTensor(input); micro_context->DeallocateTempTfLiteTensor(filter); @@ -262,6 +281,18 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); +#ifdef USE_TFLM_COMPRESSION + + // TODO(ddavis-2015): make micro_context a const pointer + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* filter_comp_td = + micro_context->GetTensorCompressionData(node, kFilterTensor); + const CompressionTensorData* bias_comp_td = + micro_context->GetTensorCompressionData(node, kBiasTensor); + +#endif // USE_TFLM_COMPRESSION + TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); @@ -280,9 +311,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { op_params, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(micro_context, bias, bias_comp_td, + data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr); @@ -296,9 +335,17 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, filter, filter_comp_td, data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); @@ -311,16 +358,29 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { auto* bias_converted_buffer = static_cast(context->GetScratchBuffer( context, data.bias_converted_buffer_index)); + const int16_t* const bias_int16_data = +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index); +#else // USE_TFLM_COMPRESSION + static_cast(bias->data.data); +#endif // USE_TFLM_COMPRESSION for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize(); i++) { - bias_converted_buffer[i] = bias->data.i16[i]; + bias_converted_buffer[i] = bias_int16_data[i]; } reference_integer_ops::TransposeConv( data.params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(bias), bias_converted_buffer, tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), @@ -331,9 +391,18 @@ TfLiteStatus TransposeConvEval(TfLiteContext* context, TfLiteNode* node) { data.per_channel_output_shift, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(micro_context, filter, + filter_comp_td, + data.filter_scratch_index), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData( + micro_context, bias, bias_comp_td, data.bias_scratch_index), +#else // USE_TFLM_COMPRESSION tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetOptionalTensorData(bias), +#endif // USE_TFLM_COMPRESSION tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output), tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); diff --git a/tensorflow/lite/micro/kernels/transpose_conv_test.cc b/tensorflow/lite/micro/kernels/transpose_conv_test.cc index 49d2c90f439..64dded4dba0 100644 --- a/tensorflow/lite/micro/kernels/transpose_conv_test.cc +++ b/tensorflow/lite/micro/kernels/transpose_conv_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/kernels/conv_test.h" #include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/test_helpers.h" @@ -25,28 +27,88 @@ namespace tflite { namespace testing { namespace { +constexpr float kTolerance = 1e-5; + // Common inputs and outputs. constexpr int kInputElements = 32; static int kInputShape[] = {4, 1, 4, 4, 2}; -static const float kInputData[kInputElements] = { +static constexpr float kInputData[kInputElements] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; constexpr int kFilterElements = 18; static int kFilterShape[] = {4, 1, 3, 3, 2}; -static const float kFilterData[kFilterElements] = { +static constexpr float kFilterData[kFilterElements] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; constexpr int kBiasElements = 1; static int kBiasShape[] = {4, 1, 1, 1, 1}; -static const float kBiasData[kBiasElements] = {0}; +static constexpr float kBiasData[kBiasElements] = {0}; constexpr int kOutputElements = 16; static int kOutputShape[] = {4, 1, 4, 4, 1}; -static const float kGoldenData[kOutputElements] = { +static constexpr float kGoldenData[kOutputElements] = { 184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}; +// Common inputs and outputs (quantized single channel). +// data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel +constexpr int kInputElementsQ1 = 16; +static int kInputShapeQ1[] = {4, 1, 4, 4, 1}; +static constexpr float kInputDataQ1[kInputElementsQ1] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + +constexpr int kFilterElementsQ1 = 9; +static int kFilterShapeQ1[] = {4, 1, 3, 3, 1}; +static constexpr float kFilterDataQ1[kFilterElementsQ1] = {1, 2, 3, 4, 5, + 6, 7, 8, 9}; + +constexpr int kBiasElementsQ1 = 1; +static int kBiasShapeQ1[] = {1, 1}; +static constexpr float kBiasDataQ1[kBiasElementsQ1] = {1}; + +constexpr int kOutputElementsQ1 = 16; +static int kOutputShapeQ1[] = {4, 1, 4, 4, 1}; +static constexpr float kGoldenDataQ1[kOutputElementsQ1] = { + 30, 62, 84, 76, 100, 192, 238, 198, 206, 372, 416, 330, 262, 446, 484, 366}; + +// Common inputs and outputs (quantized multi channel). +// data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 +constexpr int kInputElementsQ2 = 12; +static int kInputShapeQ2[] = {4, 1, 2, 3, 2}; +static constexpr float kInputDataQ2[kInputElementsQ2] = { + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 +}; + +constexpr int kFilterElementsQ2 = 16; +static int kFilterShapeQ2[] = {4, 2, 2, 2, 2}; +static constexpr float kFilterDataQ2[kFilterElementsQ2] = { + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 +}; + +constexpr int kBiasElementsQ2 = 2; +static int kBiasShapeQ2[] = {1, 2}; +static constexpr float kBiasDataQ2[kBiasElementsQ2] = {3, -2}; + +constexpr int kOutputElementsQ2 = 12; +static int kOutputShapeQ2[] = {4, 1, 2, 3, 2}; +static constexpr float kGoldenDataQ2[kOutputElementsQ2] = { + 10, 35, 19, 24, -6, -41, 30, 64, 51, 40, -29, -64}; + // Transpose conv uses TfLiteConvParams. static TfLiteConvParams common_conv_params = {kTfLitePaddingSame, // padding 1, // stride_width @@ -56,19 +118,114 @@ static TfLiteConvParams common_conv_params = {kTfLitePaddingSame, // padding 1, kTfLiteNoType}; -template -TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, - TfLiteConvParams* conv_params, - T* output_data) { +// Compression inputs and associated data +constexpr int kMaxTensors = 5; +constexpr int kOutputTensor = 4; // physical index + +#ifdef USE_TFLM_COMPRESSION + +constexpr int kFilterTensor = 1; // physical index +constexpr int kBiasTensor = 3; // physical index + +template +struct TestCompressionInfo { + TFILTER* filter_value_table; + size_t filter_value_table_stride; + int filter_bit_width; + bool use_filter_alt_axis; + TBIAS* bias_value_table; + size_t bias_value_table_stride; + int bias_bit_width; + bool use_bias_alt_axis; + CompressionScheme scheme; +}; + +template +struct TestCompressionQuantizedInfo : TestCompressionInfo { + const uint8_t* filter_compressed; + const float* filter_data; + const int* filter_dims_data; // TfLiteIntArray + const float* filter_scales; // TfLiteFloatArray + const int* filter_zero_points; // TfLiteIntArray + + const uint8_t* bias_compressed; + const float* bias_data; + const int* bias_dims_data; // TfLiteIntArray + float* bias_scales; // TfLiteFloatArray (computed) + int* bias_zero_points; // TfLiteIntArray (computed) +}; + +#endif // USE_TFLM_COMPRESSION + +template +TfLiteStatus InvokeTransposeConv( + TfLiteTensor* tensors, int tensors_size, const TfLiteConvParams* conv_params +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { int inputs_array_data[] = {4, 0, 1, 2, 3}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 4}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + // TODO(ddavis-2015): account for optional bias tensor + +#ifdef USE_TFLM_COMPRESSION + + CompressionTensorData* compressed_tensors[kMaxTensors] = {}; + CompressionTensorData filter_comp_data = {}; + CompressionTensorData bias_comp_data = {}; + CompressedTensorList comp_list = {compressed_tensors}; + CompressedTensorList* comp_list_p = nullptr; + + if (comp_info != nullptr) { + if (comp_info->scheme == CompressionScheme::kBinQuant) { + bool is_per_channel_quantized = + std::is_same::value ? false : true; + if (comp_info->filter_value_table != nullptr) { + compressed_tensors[kFilterTensor] = &filter_comp_data; + filter_comp_data.scheme = CompressionScheme::kBinQuant; + filter_comp_data.data.bin_quant.compressed_bit_width = + comp_info->filter_bit_width; + filter_comp_data.data.bin_quant.value_table = + comp_info->filter_value_table; + filter_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->filter_value_table_stride; + filter_comp_data.data.bin_quant.is_per_channel_quantized = + is_per_channel_quantized; + filter_comp_data.data.bin_quant.use_alternate_axis = + comp_info->use_filter_alt_axis; + } + if (comp_info->bias_value_table != nullptr) { + compressed_tensors[kBiasTensor] = &bias_comp_data; + bias_comp_data.scheme = CompressionScheme::kBinQuant; + bias_comp_data.data.bin_quant.compressed_bit_width = + comp_info->bias_bit_width; + bias_comp_data.data.bin_quant.value_table = comp_info->bias_value_table; + bias_comp_data.data.bin_quant.value_table_channel_stride = + comp_info->bias_value_table_stride; + bias_comp_data.data.bin_quant.is_per_channel_quantized = + is_per_channel_quantized; + bias_comp_data.data.bin_quant.use_alternate_axis = + comp_info->use_bias_alt_axis; + } + comp_list_p = &comp_list; + } else { + return kTfLiteError; + } + } + +#endif // USE_TFLM_COMPRESSION const TFLMRegistration registration = tflite::Register_TRANSPOSE_CONV(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, - outputs_array, conv_params); + outputs_array, conv_params, nullptr +#ifdef USE_TFLM_COMPRESSION + , + comp_list_p +#endif // USE_TFLM_COMPRESSION + ); const char* init_data = reinterpret_cast(conv_params); TfLiteStatus status = runner.InitAndPrepare(init_data); @@ -78,43 +235,65 @@ TfLiteStatus InvokeTransposeConv(TfLiteTensor* tensors, int tensors_size, return runner.Invoke(); } -template -TfLiteStatus ValidateTransposeConvGoldens(TfLiteTensor* tensors, - int tensors_size, - const T* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - T* output_data, float tolerance) { - TfLiteStatus status = InvokeTransposeConv( - tensors, tensors_size, output_length, conv_params, output_data); +template +TfLiteStatus ValidateTransposeConvGoldens( + TfLiteTensor* tensors, int tensors_size, const float* expected_output_data, + int output_length, float* output_data, T* output_quantized, + TfLiteConvParams* conv_params, float tolerance +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { + TfLiteStatus status = InvokeTransposeConv(tensors, tensors_size, conv_params +#ifdef USE_TFLM_COMPRESSION + , + comp_info +#endif // USE_TFLM_COMPRESSION + ); if (status != kTfLiteOk) { return status; } + + if (output_quantized != nullptr) { + // TODO(ddavis-2015): account for optional bias tensor + const float scale = tensors[kOutputTensor].params.scale; + const int zero_point = tensors[kOutputTensor].params.zero_point; + Dequantize(output_quantized, output_length, scale, zero_point, output_data); + MicroPrintf("Dequantize: scale %f zero_point %d length %d", (double)scale, + zero_point, output_length); + } for (int i = 0; i < output_length; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], tolerance); } + return kTfLiteOk; } +template TfLiteStatus TestTransposeConvFloat( int* input_dims_data, const float* input_data, int* filter_dims_data, const float* filter_data, int* bias_dims_data, const float* bias_data, int* output_dims_data, const float* expected_output_data, - TfLiteConvParams* conv_params, float* output_data) { + TfLiteConvParams* conv_params, float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo* comp_info = nullptr +#endif // USE_TFLM_COMPRESSION +) { + // TODO(ddavis-2015): account for optional bias tensor + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); int output_shape_dims_data[] = {1, 0}; int32_t* output_shape = nullptr; TfLiteIntArray* output_shape_dims = IntArrayFromInts(output_shape_dims_data); - constexpr int inputs_size = 4; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; + constexpr int tensors_size = kMaxTensors; TfLiteTensor tensors[tensors_size] = { CreateTensor(output_shape, output_shape_dims), CreateTensor(filter_data, filter_dims), @@ -123,110 +302,205 @@ TfLiteStatus TestTransposeConvFloat( CreateTensor(output_data, output_dims), }; - return ValidateTransposeConvGoldens(tensors, tensors_size, - expected_output_data, output_dims_count, - conv_params, output_data, 0.001f); + const int output_dims_count = ElementCount(*output_dims); + return ValidateTransposeConvGoldens( + tensors, tensors_size, expected_output_data, output_dims_count, + output_data, nullptr, conv_params, kTolerance +#ifdef USE_TFLM_COMPRESSION + , + comp_info +#endif // USE_TFLM_COMPRESSION + ); } +template TfLiteStatus TestTransposeConvQuantized( - int* input_dims_data, const float* input_data, int8_t* input_quantized, + int* input_dims_data, const float* input_data, TIO* input_quantized, float input_scale, int input_zero_point, int* filter_dims_data, - const float* filter_data, int8_t* filter_quantized, float filter_scale, - int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, - float* bias_scales, int* bias_zero_points, int* output_dims_data, - const float* expected_output_data, int8_t* expected_output_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - int8_t* output_data) { + const float* filter_data, int8_t* filter_quantized, int* bias_dims_data, + const float* bias_data, TBIAS* bias_quantized, int* output_dims_data, + const float* expected_output_data, float* output_data, + TIO* output_quantized, float output_scale, int output_zero_point, + TfLiteConvParams* conv_params) { + // TODO(ddavis-2015): account for optional bias tensor + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); int filter_zero_points[5]; - float filter_scales[5]; + float filter_scales[std::extent::value]; TfLiteAffineQuantization filter_quant; + TF_LITE_MICRO_EXPECT_LE(static_cast(filter_dims->data[0]), + std::extent::value - 1); + TF_LITE_MICRO_CHECK_FAIL(); TfLiteTensor filter_tensor = CreateSymmetricPerChannelQuantizedTensor( filter_data, filter_quantized, filter_dims, filter_scales, filter_zero_points, &filter_quant, 0 /* quantized dimension */); - tflite::Quantize(expected_output_data, expected_output_quantized, - output_dims_count, output_scale, 0); + MicroPrintf( + "input scale %f filter scale %f filter zero_point %d filter size %d %d" + " filter qp %p %p filter data %f filter data quantized %d", + (double)input_scale, (double)filter_quant.scale->data[0], + filter_quant.zero_point->data[0], filter_quant.scale->size, + filter_quant.zero_point->size, &filter_quant, + filter_tensor.quantization.params, (double)filter_data[0], + filter_quantized[0]); + + int bias_zero_points[std::extent::value]; + float bias_scales[std::extent::value]; + TfLiteAffineQuantization bias_quant; + TfLiteTensor bias_tensor = {}; + // TODO(ddavis-2015): cleanup + if (filter_quant.scale->size > 0) { + bias_tensor = CreatePerChannelQuantizedBiasTensor( + bias_data, bias_quantized, bias_dims, input_scale, + filter_quant.scale->data, bias_scales, bias_zero_points, &bias_quant, + 0 /* quantized dimension */); + int64_t bias_data0 = bias_quantized[0]; + MicroPrintf( + "bias scale %f bias zero_point %d bias size %d %d bias qp %p %p" + " bias data %f bias data quantized %lld", + (double)bias_quant.scale->data[0], bias_quant.zero_point->data[0], + bias_quant.scale->size, bias_quant.zero_point->size, &bias_quant, + bias_tensor.quantization.params, (double)bias_data[0], bias_data0); + } else { + bias_tensor = + CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, + input_scale, filter_quant.scale->data[0]); + + int64_t bias_data0 = bias_quantized[0]; + MicroPrintf( + "bias scale %f bias zero_point %d bias qp %p bias data %f bias data " + "quantized %lld", + (double)bias_tensor.params.scale, bias_tensor.params.zero_point, + bias_tensor.quantization.params, (double)bias_data[0], bias_data0); + } int output_shape_dims_data[] = {1, 0}; int32_t* output_shape = nullptr; TfLiteIntArray* output_shape_dims = IntArrayFromInts(output_shape_dims_data); - constexpr int inputs_size = 4; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; + constexpr int tensors_size = kMaxTensors; TfLiteTensor tensors[tensors_size] = { - CreateTensor(output_shape, output_shape_dims), filter_tensor, + CreateTensor(output_shape, output_shape_dims), + filter_tensor, CreateQuantizedTensor(input_data, input_quantized, input_dims, input_scale, input_zero_point), - CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, - input_scale, filter_scale), - CreateQuantizedTensor(output_data, output_dims, output_scale, - output_zero_point)}; + bias_tensor, + CreateQuantizedTensor(output_quantized, output_dims, output_scale, + output_zero_point), + }; + // TODO(ddavis-2015): investigate why the tolerance differs from the TfLite + // tests which use 1e-5 + // + // Tolerance is slightly looser for 8x16 compared with float, since quant + // error is more pronounced on the finer-grained 16-bit output. + constexpr float tolerance = std::is_same::value ? 2.0f : 4.0f; + const int output_dims_count = ElementCount(*output_dims); return ValidateTransposeConvGoldens( - tensors, tensors_size, expected_output_quantized, output_dims_count, - conv_params, output_data, 1.0f); + tensors, tensors_size, expected_output_data, output_dims_count, + output_data, output_quantized, conv_params, tolerance); } -template -TfLiteStatus TestTransposeConvQuantized( - int* input_dims_data, const float* input_data, int16_t* input_quantized, - float input_scale, int input_zero_point, int* filter_dims_data, - const float* filter_data, int8_t* filter_quantized, float filter_scale, - int* bias_dims_data, const float* bias_data, T* bias_quantized, - float* bias_scales, int* bias_zero_points, int* output_dims_data, - const float* expected_output_data, int16_t* expected_output_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - int16_t* output_data) { +#ifdef USE_TFLM_COMPRESSION + +template +TfLiteStatus TestTransposeConvQuantizedCompressed( + int* input_dims_data, const float* input_data, TIO* input_quantized, + float input_scale, int input_zero_point, int* output_dims_data, + const float* expected_output_data, float* output_data, + TIO* output_quantized, float output_scale, int output_zero_point, + TfLiteConvParams* conv_params, + const TestCompressionQuantizedInfo* comp_info) { + // TODO(ddavis-2015): account for optional bias tensor + MicroPrintf("%s", __PRETTY_FUNCTION__); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(comp_info->filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(comp_info->bias_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - int filter_zero_points[5]; - float filter_scales[5]; - TfLiteAffineQuantization filter_quant; - TfLiteTensor filter_tensor = CreateSymmetricPerChannelQuantizedTensor( - filter_data, filter_quantized, filter_dims, filter_scales, - filter_zero_points, &filter_quant, 0 /* quantized dimension */); - tflite::Quantize(expected_output_data, expected_output_quantized, - output_dims_count, output_scale, 0); + TfLiteFloatArray* filter_scales = + FloatArrayFromFloats(comp_info->filter_scales); + TfLiteIntArray* filter_zero_points = + IntArrayFromInts(comp_info->filter_zero_points); + TfLiteFloatArray* bias_scales = FloatArrayFromFloats(comp_info->bias_scales); + TfLiteIntArray* bias_zero_points = + IntArrayFromInts(comp_info->bias_zero_points); + + size_t quantized_axis; + + TfLiteAffineQuantization filter_quant_params; + quantized_axis = comp_info->use_filter_alt_axis ? 3 : 0; + TfLiteTensor filter_tensor = CreatePerChannelQuantizedTensor( + comp_info->filter_compressed, filter_dims, filter_scales, + filter_zero_points, &filter_quant_params, quantized_axis, false, + kTfLiteInt8); + SymmetricPerChannelQuantize( + comp_info->filter_data, comp_info->filter_value_table, + ElementCount(*filter_dims), filter_dims->data[quantized_axis], + filter_scales->data); + + TfLiteAffineQuantization bias_quant_params; + quantized_axis = comp_info->use_bias_alt_axis ? 3 : 0; + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( + comp_info->bias_compressed, bias_dims, input_scale, filter_scales, + bias_scales, bias_zero_points, &bias_quant_params, quantized_axis, false, + typeToTfLiteType()); + SymmetricPerChannelQuantize(comp_info->bias_data, comp_info->bias_value_table, + ElementCount(*bias_dims), + bias_dims->data[quantized_axis], + bias_scales->data); + for (int i = 0; i < bias_scales->size; i++) { + int64_t bias_data0 = comp_info->bias_value_table[i]; + MicroPrintf( + "bias scale %f bias zero_point %d bias size %d %d bias qp %p %p" + " bias data %f bias data quantized %lld", + (double)bias_quant_params.scale->data[i], + bias_quant_params.zero_point->data[i], bias_quant_params.scale->size, + bias_quant_params.zero_point->size, &bias_quant_params, + bias_tensor.quantization.params, (double)comp_info->bias_data[i], + bias_data0); + } int output_shape_dims_data[] = {1, 0}; int32_t* output_shape = nullptr; TfLiteIntArray* output_shape_dims = IntArrayFromInts(output_shape_dims_data); - constexpr int inputs_size = 4; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; + constexpr int tensors_size = kMaxTensors; TfLiteTensor tensors[tensors_size] = { - CreateTensor(output_shape, output_shape_dims), filter_tensor, + CreateTensor(output_shape, output_shape_dims), + filter_tensor, CreateQuantizedTensor(input_data, input_quantized, input_dims, input_scale, input_zero_point), - CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, - input_scale, filter_scale), - CreateQuantizedTensor(output_data, output_dims, output_scale, - output_zero_point)}; + bias_tensor, + CreateQuantizedTensor(output_quantized, output_dims, output_scale, + output_zero_point), + }; + // TODO(ddavis-2015): why is int8 tolerance so large? + // // Tolerance is slightly looser for 8x16 compared with float, since quant // error is more pronounced on the finer-grained 16-bit output. + constexpr float tolerance = std::is_same::value ? 2.0f : 0.19f; + const int output_dims_count = ElementCount(*output_dims); return ValidateTransposeConvGoldens( - tensors, tensors_size, expected_output_quantized, output_dims_count, - conv_params, output_data, 4.0f); + tensors, tensors_size, expected_output_data, output_dims_count, + output_data, output_quantized, conv_params, tolerance, comp_info); } +#endif // USE_TFLM_COMPRESSION + } // namespace } // namespace testing } // namespace tflite TF_LITE_MICRO_TESTS_BEGIN +// TODO(ddavis-2015): add tests with no bias tensor + TF_LITE_MICRO_TEST(SimpleTestFloat) { float output_data[tflite::testing::kOutputElements]; @@ -240,6 +514,44 @@ TF_LITE_MICRO_TEST(SimpleTestFloat) { &tflite::testing::common_conv_params, output_data)); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestFloatCompressed) { + float output_data[tflite::testing::kOutputElements]; + + // compressed filter data for kBinQuant scheme + constexpr uint8_t kBinQuantFilterData[] = { + 0x00, 0x44, 0x32, 0x14, 0xC7, 0x42, 0x54, 0xB6, 0x35, 0xCF, 0x84, 0x40}; + constexpr int kBinQuantFilterBitWidth = 5; + // compressed bias data for kBinQuant scheme + constexpr uint8_t kBinQuantBiasData[] = {0x00}; + constexpr int kBinQuantBiasBitWidth = 1; + + tflite::testing::TestCompressionInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = tflite::testing::kFilterData; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = kBinQuantFilterBitWidth; + comp_info.bias_value_table = tflite::testing::kBiasData; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = kBinQuantBiasBitWidth; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvFloat( + tflite::testing::kInputShape, tflite::testing::kInputData, + tflite::testing::kFilterShape, + reinterpret_cast(kBinQuantFilterData), + tflite::testing::kBiasShape, + reinterpret_cast(kBinQuantBiasData), + tflite::testing::kOutputShape, tflite::testing::kGoldenData, + &tflite::testing::common_conv_params, output_data, &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(fusedRELUTest) { float output_data[tflite::testing::kOutputElements]; float golden_data[] = {29, 24, 0, 0, 99, 72, 0, 0, @@ -317,21 +629,27 @@ TF_LITE_MICRO_TEST(MultiChannelBiasWithFusedActivationTest) { bias_data, output_shape, golden_data, &conv_params, output_data)); } +#ifdef notdef +// TODO(ddavis-2015): remove TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { - int8_t output_data[tflite::testing::kOutputElements]; - - const float input_scale = 0.5f; - const float output_scale = 30.0f; - const float filter_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - int8_t input_quantized[tflite::testing::kInputElements]; int8_t filter_quantized[tflite::testing::kFilterElements]; int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; + int8_t output_quantized[tflite::testing::kOutputElements]; + float output_data[tflite::testing::kOutputElements]; + + auto mm = std::minmax_element(std::begin(tflite::testing::kInputData), + std::end(tflite::testing::kInputData)); + const float input_scale = + tflite::testing::ScaleFromMinMax(*mm.first, *mm.second); + const int input_zero_point = + tflite::testing::ZeroPointFromMinMax(*mm.first, *mm.second); + mm = std::minmax_element(std::begin(tflite::testing::kGoldenData), + std::end(tflite::testing::kGoldenData)); + const float output_scale = + tflite::testing::ScaleFromMinMax(*mm.first, *mm.second); + const int output_zero_point = + tflite::testing::ZeroPointFromMinMax(*mm.first, *mm.second); TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, @@ -339,28 +657,303 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { tflite::testing::kInputShape, tflite::testing::kInputData, input_quantized, input_scale, input_zero_point, tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, filter_scale, tflite::testing::kBiasShape, - tflite::testing::kBiasData, bias_quantized, scales, zero_points, + filter_quantized, tflite::testing::kBiasShape, + tflite::testing::kBiasData, bias_quantized, tflite::testing::kOutputShape, tflite::testing::kGoldenData, - golden_quantized, output_scale, output_zero_point, - &tflite::testing::common_conv_params, output_data)); + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); } +#endif + +TF_LITE_MICRO_TEST(SimpleBiasTestQuantizedPerChannelSingleChannel) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel + const float input_scale = 16.0f / 255.0f; + const float output_scale = 2.0f; + const int input_zero_point = -128; + const int output_zero_point = -128; + + int8_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int32_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int8_t output_quantized[tflite::testing::kOutputElementsQ1]; + float output_data[tflite::testing::kOutputElementsQ1]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantized( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShapeQ1, tflite::testing::kFilterDataQ1, + filter_quantized, tflite::testing::kBiasShapeQ1, + tflite::testing::kBiasDataQ1, bias_quantized, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); +} + +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelSingleChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannelSingleChannel + const float input_scale = 16.0f / 255.0f; + const float output_scale = 2.0f; + const int input_zero_point = -128; + const int output_zero_point = -128; + + constexpr float kFilterScales[] = {1, 9.0f / 127.0f}; + constexpr int kFilterZeroPoints[] = {1, 0}; + // all values will be computed + float kBiasScales[std::extent::value] = {}; + // all values will be computed + int kBiasZeroPoints[std::extent::value] = {}; + + int8_t input_quantized[tflite::testing::kInputElementsQ1]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ1]; + int32_t bias_quantized[tflite::testing::kBiasElementsQ1]; + int8_t output_quantized[tflite::testing::kOutputElementsQ1]; + float output_data[tflite::testing::kOutputElementsQ1]; + + // compressed filter data for kBinQuant scheme + constexpr uint8_t kBinQuantFilterData[] = {0x01, 0x23, 0x45, 0x67, 0x80}; + constexpr int kBinQuantFilterBitWidth = 4; + // compressed bias data for kBinQuant scheme + constexpr uint8_t kBinQuantBiasData[] = {0x00}; + constexpr int kBinQuantBiasBitWidth = 1; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent::value; + comp_info.filter_bit_width = kBinQuantFilterBitWidth; + comp_info.filter_compressed = kBinQuantFilterData; + comp_info.filter_data = tflite::testing::kFilterDataQ1; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ1; + comp_info.filter_scales = kFilterScales; + comp_info.filter_zero_points = kFilterZeroPoints; + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value; + comp_info.bias_bit_width = kBinQuantBiasBitWidth; + comp_info.bias_compressed = kBinQuantBiasData; + comp_info.bias_data = tflite::testing::kBiasDataQ1; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ1; + comp_info.bias_scales = kBiasScales; + comp_info.bias_zero_points = kBiasZeroPoints; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ1, tflite::testing::kInputDataQ1, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ1, tflite::testing::kGoldenDataQ1, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleBiasTestQuantizedPerChannelBias16MultiChannel) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ2]; + int16_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + float output_data[tflite::testing::kOutputElementsQ2]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantized( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShapeQ2, tflite::testing::kFilterDataQ2, + filter_quantized, tflite::testing::kBiasShapeQ2, + tflite::testing::kBiasDataQ2, bias_quantized, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); +} + +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST( + SimpleBiasTestQuantizedPerChannelBias16MultiChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; -TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel) { - int16_t output_data[tflite::testing::kOutputElements]; + constexpr int kNumChannels = 2; + constexpr float kFilterScales[] = {kNumChannels, 7.0f / 127.0f, + 8.0f / 127.0f}; + constexpr int kFilterZeroPoints[] = {kNumChannels, 0, 0}; + // all values will be computed + float kBiasScales[std::extent::value] = {}; + // all values will be computed + int kBiasZeroPoints[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ2]; + int16_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + float output_data[tflite::testing::kOutputElementsQ2]; + + // compressed filter data for kBinQuant scheme + constexpr uint8_t kBinQuantFilterData[] = {0x05, 0x34, 0xE5, + 0xDE, 0x54, 0xC1}; + constexpr float kBinQuantFilterValueTable[] = {1, 2, 3, 4, 5, 6, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8}; + constexpr int kBinQuantFilterBitWidth = 3; + // compressed bias data for kBinQuant scheme + constexpr uint8_t kBinQuantBiasData[] = {0x00}; + constexpr int kBinQuantBiasBitWidth = 1; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent::value / kNumChannels; + comp_info.filter_bit_width = kBinQuantFilterBitWidth; + comp_info.filter_compressed = kBinQuantFilterData; + comp_info.filter_data = kBinQuantFilterValueTable; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ2; + comp_info.filter_scales = kFilterScales; + comp_info.filter_zero_points = kFilterZeroPoints; + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value / kNumChannels; + comp_info.bias_bit_width = kBinQuantBiasBitWidth; + comp_info.bias_compressed = kBinQuantBiasData; + comp_info.bias_data = tflite::testing::kBiasDataQ2; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ2; + comp_info.bias_scales = kBiasScales; + comp_info.bias_zero_points = kBiasZeroPoints; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleBiasTestQuantizedPerChannelBias64MultiChannel) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ2]; + int64_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + float output_data[tflite::testing::kOutputElementsQ2]; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantized( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShapeQ2, tflite::testing::kFilterDataQ2, + filter_quantized, tflite::testing::kBiasShapeQ2, + tflite::testing::kBiasDataQ2, bias_quantized, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); +} + +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST( + SimpleBiasTestQuantizedPerChannelBias64MultiChannelCompressed) { + // data from TfLite test: SimpleBiasTestQuantizedPerChannel16x8Bias64 + const float input_scale = 4.0f / 127.0f; + const float output_scale = 128.0f / 65536.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + constexpr int kNumChannels = 2; + + constexpr float kFilterScales[] = {kNumChannels, 7.0f / 127.0f, + 8.0f / 127.0f}; + constexpr int kFilterZeroPoints[] = {kNumChannels, 0, 0}; + // all values will be computed + float kBiasScales[std::extent::value] = {}; + // all values will be computed + int kBiasZeroPoints[std::extent::value] = {}; + + int16_t input_quantized[tflite::testing::kInputElementsQ2]; + int8_t filter_quantized[tflite::testing::kFilterElementsQ2]; + int64_t bias_quantized[tflite::testing::kBiasElementsQ2]; + int16_t output_quantized[tflite::testing::kOutputElementsQ2]; + float output_data[tflite::testing::kOutputElementsQ2]; + + // compressed filter data for kBinQuant scheme + constexpr uint8_t kBinQuantFilterData[] = {0x05, 0x34, 0xE5, + 0xDE, 0x54, 0xC1}; + constexpr float kBinQuantFilterValueTable[] = {1, 2, 3, 4, 5, 6, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8}; + constexpr int kBinQuantFilterBitWidth = 3; + // compressed bias data for kBinQuant scheme + constexpr uint8_t kBinQuantBiasData[] = {0x00}; + constexpr int kBinQuantBiasBitWidth = 2; + + tflite::testing::TestCompressionQuantizedInfo comp_info = {}; + comp_info.scheme = tflite::CompressionScheme::kBinQuant; + comp_info.filter_value_table = filter_quantized; + comp_info.filter_value_table_stride = + std::extent::value / kNumChannels; + comp_info.filter_bit_width = kBinQuantFilterBitWidth; + comp_info.filter_compressed = kBinQuantFilterData; + comp_info.filter_data = kBinQuantFilterValueTable; + comp_info.filter_dims_data = tflite::testing::kFilterShapeQ2; + comp_info.filter_scales = kFilterScales; + comp_info.filter_zero_points = kFilterZeroPoints; + comp_info.bias_value_table = bias_quantized; + comp_info.bias_value_table_stride = + std::extent::value / kNumChannels; + comp_info.bias_bit_width = kBinQuantBiasBitWidth; + comp_info.bias_compressed = kBinQuantBiasData; + comp_info.bias_data = tflite::testing::kBiasDataQ2; + comp_info.bias_dims_data = tflite::testing::kBiasShapeQ2; + comp_info.bias_scales = kBiasScales; + comp_info.bias_zero_points = kBiasZeroPoints; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + tflite::testing::TestTransposeConvQuantizedCompressed( + tflite::testing::kInputShapeQ2, tflite::testing::kInputDataQ2, + input_quantized, input_scale, input_zero_point, + tflite::testing::kOutputShapeQ2, tflite::testing::kGoldenDataQ2, + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params, &comp_info)); +} + +#endif // USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannelSingleChannel) { const float input_scale = 1.0f; const float output_scale = 1.0f; - const float filter_scale = 1.0f; const int input_zero_point = 0; const int output_zero_point = 0; int16_t input_quantized[tflite::testing::kInputElements]; int8_t filter_quantized[tflite::testing::kFilterElements]; - std::int64_t bias_quantized[tflite::testing::kBiasElements]; - int16_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; + int64_t bias_quantized[tflite::testing::kBiasElements]; + int16_t output_quantized[tflite::testing::kOutputElements]; + float output_data[tflite::testing::kOutputElements]; TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, @@ -368,28 +961,25 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel) { tflite::testing::kInputShape, tflite::testing::kInputData, input_quantized, input_scale, input_zero_point, tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, filter_scale, tflite::testing::kBiasShape, - tflite::testing::kBiasData, bias_quantized, scales, zero_points, + filter_quantized, tflite::testing::kBiasShape, + tflite::testing::kBiasData, bias_quantized, tflite::testing::kOutputShape, tflite::testing::kGoldenData, - golden_quantized, output_scale, output_zero_point, - &tflite::testing::common_conv_params, output_data)); + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); } -TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannelWithInt16Bias) { - int16_t output_data[tflite::testing::kOutputElements]; - +TF_LITE_MICRO_TEST( + SimpleTestQuantized16x8PerChannelWithInt16BiasSingleChannel) { const float input_scale = 1.0f; const float output_scale = 1.0f; - const float filter_scale = 1.0f; const int input_zero_point = 0; const int output_zero_point = 0; int16_t input_quantized[tflite::testing::kInputElements]; int8_t filter_quantized[tflite::testing::kFilterElements]; int16_t bias_quantized[tflite::testing::kBiasElements]; - int16_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; + int16_t output_quantized[tflite::testing::kOutputElements]; + float output_data[tflite::testing::kOutputElements]; TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, @@ -397,11 +987,11 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannelWithInt16Bias) { tflite::testing::kInputShape, tflite::testing::kInputData, input_quantized, input_scale, input_zero_point, tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, filter_scale, tflite::testing::kBiasShape, - tflite::testing::kBiasData, bias_quantized, scales, zero_points, + filter_quantized, tflite::testing::kBiasShape, + tflite::testing::kBiasData, bias_quantized, tflite::testing::kOutputShape, tflite::testing::kGoldenData, - golden_quantized, output_scale, output_zero_point, - &tflite::testing::common_conv_params, output_data)); + output_data, output_quantized, output_scale, output_zero_point, + &tflite::testing::common_conv_params)); } TF_LITE_MICRO_TEST(InputOutputDifferentTypeIsError) { @@ -413,7 +1003,6 @@ TF_LITE_MICRO_TEST(InputOutputDifferentTypeIsError) { TfLiteIntArray* filter_dims = IntArrayFromInts(tflite::testing::kFilterShape); TfLiteIntArray* bias_dims = IntArrayFromInts(tflite::testing::kBiasShape); TfLiteIntArray* output_dims = IntArrayFromInts(tflite::testing::kOutputShape); - const int output_dims_count = tflite::ElementCount(*output_dims); constexpr int inputs_size = 4; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; @@ -433,9 +1022,9 @@ TF_LITE_MICRO_TEST(InputOutputDifferentTypeIsError) { /*zero_point=*/0), }; TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, tflite::testing::InvokeTransposeConv( - tensors, tensors_size, output_dims_count, - &tflite::testing::common_conv_params, output_data)); + kTfLiteError, + tflite::testing::InvokeTransposeConv( + tensors, tensors_size, &tflite::testing::common_conv_params)); } TF_LITE_MICRO_TEST(HybridModeIsError) { @@ -447,7 +1036,6 @@ TF_LITE_MICRO_TEST(HybridModeIsError) { TfLiteIntArray* filter_dims = IntArrayFromInts(tflite::testing::kFilterShape); TfLiteIntArray* bias_dims = IntArrayFromInts(tflite::testing::kBiasShape); TfLiteIntArray* output_dims = IntArrayFromInts(tflite::testing::kOutputShape); - const int output_dims_count = tflite::ElementCount(*output_dims); constexpr int inputs_size = 4; constexpr int outputs_size = 1; @@ -471,9 +1059,9 @@ TF_LITE_MICRO_TEST(HybridModeIsError) { }; TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, tflite::testing::InvokeTransposeConv( - tensors, tensors_size, output_dims_count, - &tflite::testing::common_conv_params, output_data)); + kTfLiteError, + tflite::testing::InvokeTransposeConv( + tensors, tensors_size, &tflite::testing::common_conv_params)); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 930da754bb5..f90bb2d62c0 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,6 +36,15 @@ limitations under the License. #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h" #include "tensorflow/lite/schema/schema_generated.h" +#ifdef USE_TFLM_COMPRESSION + +#include +#include + +#include "tensorflow/lite/micro/compression/metadata_generated.h" + +#endif // USE_TFLM_COMPRESSION + namespace tflite { namespace { @@ -355,6 +364,149 @@ TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer( return kTfLiteOk; } +#ifdef USE_TFLM_COMPRESSION + +const tflite::micro::compression::Metadata* GetCompressionMetadata( + const Model& model) { + const auto metadata_vector = model.metadata(); + if (metadata_vector == nullptr) { + return nullptr; + } + auto buffers = model.buffers(); + if (buffers == nullptr) { + return nullptr; + } + const size_t metadata_string_length = std::strlen(kCompressionMetadataString); + for (size_t metadata_index = 0; metadata_index < metadata_vector->size(); + metadata_index++) { + auto metadata = metadata_vector->Get(metadata_index); + if (metadata->name() == nullptr || metadata->name()->size() == 0) { + continue; + } + const char* s = metadata->name()->c_str(); + if ((metadata->name()->size() == metadata_string_length) && + (std::strncmp(s, kCompressionMetadataString, metadata_string_length) == + 0)) { + auto buffer_index = metadata->buffer(); + if (buffer_index == 0 || buffer_index >= buffers->size()) { + MicroPrintf("Compression: Invalid buffer index %u", buffer_index); + continue; + } + auto vp = buffers->Get(buffer_index)->data(); + if (vp == nullptr || vp->data() == nullptr) { + MicroPrintf("Compression: Invalid data for buffer index %u", + buffer_index); + continue; + } + // TODO(ddavis-2015): support multiple compression methods + auto compression_metadata = + tflite::micro::compression::GetSizePrefixedMetadata(vp); + flatbuffers::Verifier verifier(vp->data(), vp->size(), + flatbuffers::Verifier::Options()); + if (!tflite::micro::compression::VerifyMetadataBuffer(verifier)) { + MicroPrintf("Compression: verification failure"); + return nullptr; + } else { + return compression_metadata; + } + } + } + + return nullptr; +} + +TfLiteStatus InitializeCompressionTensorDataFromFlatbuffer( + const Model& model, const tflite::micro::compression::LutTensor& lut_tensor, + CompressionTensorData* ctd) { + ctd->scheme = CompressionScheme::kBinQuant; + + const size_t subgraph_index = lut_tensor.subgraph(); + if (subgraph_index >= model.subgraphs()->size()) { + MicroPrintf("Compression: invalid subgraph index %u in LutTensor", + subgraph_index); + return kTfLiteError; + } + const size_t tensor_index = lut_tensor.tensor(); + auto tensors = model.subgraphs()->Get(subgraph_index)->tensors(); + if (tensor_index >= tensors->size()) { + MicroPrintf("Compression: invalid tensor index %u in LutTensor", + tensor_index); + return kTfLiteError; + } + const size_t index_bit_width = lut_tensor.index_bitwidth(); + if (index_bit_width > BinQuantData::kMaxBitWidth) { + MicroPrintf("Compression: invalid bit width %u in LutTensor", + index_bit_width); + return kTfLiteError; + } + ctd->data.bin_quant.compressed_bit_width = index_bit_width; + const size_t value_buffer_index = lut_tensor.value_buffer(); + if (value_buffer_index >= model.buffers()->size()) { + MicroPrintf("Compression: invalid value_buffer %u in LutTensor", + value_buffer_index); + return kTfLiteError; + } + auto value_buffer = model.buffers()->Get(value_buffer_index)->data(); + if (value_buffer == nullptr || value_buffer->data() == nullptr) { + MicroPrintf("Compression: invalid value table for value_buffer %u", + value_buffer_index); + return kTfLiteError; + } + ctd->data.bin_quant.value_table = value_buffer->data(); + auto tensor = + model.subgraphs()->Get(subgraph_index)->tensors()->Get(tensor_index); + if (tensor->shape() == nullptr) { + MicroPrintf("Compression: scalar tensors not supported"); + return kTfLiteError; + } + if (tensor->buffer() != lut_tensor.index_buffer()) { + MicroPrintf("Compression: mismatched index_buffer %u != %u in LutTensor", + lut_tensor.index_buffer(), tensor->buffer()); + return kTfLiteError; + } + TfLiteType tensor_type = kTfLiteNoType; + TfLiteStatus status = ConvertTensorType(tensor->type(), &tensor_type); + if (status != kTfLiteOk) { + MicroPrintf("Compression: failed to convert tensor type"); + return kTfLiteError; + } + size_t tensor_type_size = 0; + status = TfLiteTypeSizeOf(tensor_type, &tensor_type_size); + if (status != kTfLiteOk) { + MicroPrintf("Compression: failed to get tensor type size"); + return kTfLiteError; + } + if (tensor->quantization() != nullptr && + tensor->quantization()->scale() != nullptr && + tensor->quantization()->scale()->size() > 1) { + const size_t num_channels = tensor->quantization()->scale()->size(); + ctd->data.bin_quant.is_per_channel_quantized = true; + const TfLiteIntArray* dims = + FlatBufferVectorToTfLiteTypeArray(tensor->shape()); + int32_t quantized_axis = tensor->quantization()->quantized_dimension(); + if (quantized_axis == 0) { + ctd->data.bin_quant.use_alternate_axis = false; + } else if (quantized_axis == (dims->size - 1)) { + ctd->data.bin_quant.use_alternate_axis = true; + } else { + MicroPrintf("Compression: unsupported quantization axis %u", + quantized_axis); + return kTfLiteError; + } + ctd->data.bin_quant.value_table_channel_stride = + (value_buffer->size() / tensor_type_size) / num_channels; + } else { + ctd->data.bin_quant.is_per_channel_quantized = false; + ctd->data.bin_quant.use_alternate_axis = false; + ctd->data.bin_quant.value_table_channel_stride = + value_buffer->size() / tensor_type_size; + } + + return kTfLiteOk; +} + +#endif // USE_TFLM_COMPRESSION + } // namespace internal size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) { @@ -502,7 +654,11 @@ SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) { return nullptr; } - if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk || + if ( +#ifdef USE_TFLM_COMPRESSION + AllocateCompressedTensorsList(model, output) != kTfLiteOk || +#endif // USE_TFLM_COMPRESSION + AllocateTfLiteEvalTensors(model, output) != kTfLiteOk || AllocateNodeAndRegistrations(model, output) != kTfLiteOk) { return nullptr; } @@ -757,6 +913,96 @@ bool MicroAllocator::IsAllTempDeallocated() { return non_persistent_buffer_allocator_->IsAllTempDeallocated(); } +#ifdef USE_TFLM_COMPRESSION + +TfLiteStatus MicroAllocator::AllocateCompressedTensorsList( + const Model* model, SubgraphAllocations* subgraph_allocations) { + TFLITE_DCHECK(subgraph_allocations != nullptr); + + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + subgraph_allocations[subgraph_idx].compressed.tensors = nullptr; + } + + const tflite::micro::compression::Metadata* compression_metadata = + internal::GetCompressionMetadata(*model); + if (compression_metadata == nullptr) { + // no compression metadata is available + return kTfLiteOk; + } + if (compression_metadata->lut_tensors() == nullptr) { + MicroPrintf("Compression: invalid LutTensor vector"); + return kTfLiteError; + } + if (compression_metadata->lut_tensors()->size() == 0) { + MicroPrintf("Compression: zero length LutTensor vector"); + return kTfLiteError; + } + + for (size_t lut_tensors_index = 0; + lut_tensors_index < compression_metadata->lut_tensors()->size(); + lut_tensors_index++) { + auto lut_tensor = + compression_metadata->lut_tensors()->Get(lut_tensors_index); + + CompressionTensorData* ctd = reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(CompressionTensorData), alignof(CompressionTensorData))); + if (ctd == nullptr) { + MicroPrintf( + "Compressions: failed to allocate memory for CompressionTensorData, " + "%d bytes required", + sizeof(CompressionTensorData)); + return kTfLiteError; + } + + TfLiteStatus status = + internal::InitializeCompressionTensorDataFromFlatbuffer( + *model, *lut_tensor, ctd); + if (status != kTfLiteOk) { + MicroPrintf("Compression: failed to initialize data for LutTensor %u", + lut_tensors_index); + return kTfLiteError; + } + + const size_t subgraph_index = lut_tensor->subgraph(); + if (subgraph_allocations[subgraph_index].compressed.tensors == nullptr) { + size_t alloc_count = + model->subgraphs()->Get(subgraph_index)->tensors()->size(); + CompressionTensorData** tensors = + reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(CompressionTensorData*) * alloc_count, + alignof(CompressionTensorData*))); + if (tensors == nullptr) { + MicroPrintf( + "Compression: failed to allocate memory for compression tensor " + "list, %d bytes required", + sizeof(CompressionTensorData*) * alloc_count); + return kTfLiteError; + } + + subgraph_allocations[subgraph_index].compressed.tensors = tensors; + std::fill(tensors, tensors + alloc_count, nullptr); + } + + const size_t tensor_index = lut_tensor->tensor(); + if (subgraph_allocations[subgraph_index].compressed.tensors[tensor_index] != + nullptr) { + MicroPrintf("Compression: duplicate LutTensor subgraph %u tensor %u", + subgraph_index, tensor_index); + return kTfLiteError; + } else { + subgraph_allocations[subgraph_index].compressed.tensors[tensor_index] = + ctd; + } + } + + return kTfLiteOk; +} + +#endif // USE_TFLM_COMPRESSION + TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors( const Model* model, SubgraphAllocations* subgraph_allocations) { TFLITE_DCHECK(subgraph_allocations != nullptr); diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index 4eff167d67f..7a52c44bccf 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,6 +26,12 @@ limitations under the License. #include "tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h" #include "tensorflow/lite/schema/schema_generated.h" +#ifdef USE_TFLM_COMPRESSION + +#include "tensorflow/lite/micro/compression.h" + +#endif // USE_TFLM_COMPRESSION + namespace tflite { // TODO(b/199402574): rename to tflite_internal or just remove internal @@ -91,6 +97,9 @@ struct ScratchBufferHandle { struct SubgraphAllocations { NodeAndRegistration* node_and_registrations; TfLiteEvalTensor* tensors; +#ifdef USE_TFLM_COMPRESSION + CompressedTensorList compressed; +#endif // USE_TFLM_COMPRESSION }; // Allocator responsible for allocating memory for all intermediate tensors @@ -258,6 +267,15 @@ class MicroAllocator { MicroMemoryPlanner* memory_planner); virtual ~MicroAllocator(); +#ifdef USE_TFLM_COMPRESSION + + // Allocates an array in the arena of pointers to the compressions data + // required to decompress tensors for each subgraph within the model. + virtual TfLiteStatus AllocateCompressedTensorsList( + const Model* model, SubgraphAllocations* subgraph_allocations); + +#endif // USE_TFLM_COMPRESSION + // Allocates an array in the arena to hold pointers to the node and // registration pointers required to represent the inference graph of the // model. diff --git a/tensorflow/lite/micro/micro_context.cc b/tensorflow/lite/micro/micro_context.cc index 295b3c34463..55af3e39021 100644 --- a/tensorflow/lite/micro/micro_context.cc +++ b/tensorflow/lite/micro/micro_context.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ limitations under the License. #include #include +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/micro_common.h" #include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/micro/micro_utils.h" namespace tflite { namespace { @@ -34,6 +36,76 @@ int GetTensorIndex(int index, int max_size, const int* tensor_indices) { return -1; } +#ifdef USE_TFLM_COMPRESSION + +// TODO(ddavis-2015): break this up such that template expansion is decreased +template +T* DecompressToBuffer(const uint8_t* compressed_indices, + const size_t count_indices, void* buffer, + const CompressionTensorData& comp_data, + const size_t num_channels) { + const size_t compressed_bit_width = + comp_data.data.bin_quant.compressed_bit_width; + TFLITE_DCHECK(compressed_bit_width <= BinQuantData::kMaxBitWidth); + TFLITE_DCHECK(compressed_bit_width > 0); + + size_t channel = 0; + size_t index_in_channel = 0; + const size_t elements_per_channel = + comp_data.data.bin_quant.use_alternate_axis + ? 1 + : count_indices / num_channels; + size_t buffer_index = 0; + size_t table_index = 0; + size_t table_index_bits_to_fill = compressed_bit_width; + size_t current_offset = 0; + size_t current_bits_remaining = 8; + uint8_t current_byte = compressed_indices[current_offset]; + + // no division (other than power of 2) inside loop + while (buffer_index < count_indices) { + while (table_index_bits_to_fill > 0) { + if (current_bits_remaining == 0) { + current_offset++; + current_byte = compressed_indices[current_offset]; + current_bits_remaining = 8; + } + + const uint8_t mask_bit_count = + std::min(table_index_bits_to_fill, + std::min(compressed_bit_width, current_bits_remaining)); + const uint8_t current_byte_mask = (1 << mask_bit_count) - 1; + table_index <<= mask_bit_count; + table_index |= + (current_byte >> (current_bits_remaining - mask_bit_count)) & + current_byte_mask; + + table_index_bits_to_fill -= mask_bit_count; + current_bits_remaining -= mask_bit_count; + } + + static_cast(buffer)[buffer_index] = + static_cast(comp_data.data.bin_quant.value_table) + [table_index + + (channel * comp_data.data.bin_quant.value_table_channel_stride)]; + buffer_index++; + table_index_bits_to_fill = compressed_bit_width; + table_index = 0; + index_in_channel++; + if (index_in_channel == elements_per_channel) { + index_in_channel = 0; + channel++; + if (channel == num_channels) { + channel = 0; + } + } + } + + return static_cast(buffer); +} + +#endif // USE_TFLM_COMPRESSION + } // namespace TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node, @@ -74,4 +146,65 @@ void MicroContextReportOpError(struct TfLiteContext* context, va_end(args); } +#ifdef USE_TFLM_COMPRESSION + +void* MicroContext::DecompressTensorToScratchBuffer( + const TfLiteEvalTensor& tensor, + const CompressionTensorData& compression_data, int scratch_buffer_handle) { + TFLITE_DCHECK(compression_data.scheme == CompressionScheme::kBinQuant); + TFLITE_DCHECK(scratch_buffer_handle != -1); + void* scratch_buffer = GetScratchBuffer(scratch_buffer_handle); + TFLITE_DCHECK(scratch_buffer != nullptr); + size_t count = ElementCount(*tensor.dims); + size_t num_channels = 1; + + if (compression_data.data.bin_quant.is_per_channel_quantized) { + const size_t channel_axis = + compression_data.data.bin_quant.use_alternate_axis + ? tensor.dims->size - 1 + : 0; + num_channels = tensor.dims->data[channel_axis]; + } + + switch (tensor.type) { + case kTfLiteBool: { + return DecompressToBuffer(static_cast(tensor.data.data), + count, scratch_buffer, compression_data, + num_channels); + } break; + case kTfLiteInt8: { + return DecompressToBuffer(static_cast(tensor.data.data), + count, scratch_buffer, compression_data, + num_channels); + } break; + case kTfLiteInt16: { + return DecompressToBuffer( + static_cast(tensor.data.data), count, scratch_buffer, + compression_data, num_channels); + } break; + case kTfLiteInt32: { + return DecompressToBuffer( + static_cast(tensor.data.data), count, scratch_buffer, + compression_data, num_channels); + } break; + case kTfLiteInt64: { + return DecompressToBuffer( + static_cast(tensor.data.data), count, scratch_buffer, + compression_data, num_channels); + } break; + case kTfLiteFloat32: { + return DecompressToBuffer(static_cast(tensor.data.data), + count, scratch_buffer, compression_data, + num_channels); + } break; + default: { + MicroPrintf("Unsupported decompression tensor type %d", tensor.type); + } break; + } + + return nullptr; +} + +#endif // USE_TFLM_COMPRESSION + } // namespace tflite diff --git a/tensorflow/lite/micro/micro_context.h b/tensorflow/lite/micro/micro_context.h index 2dd3233a159..33cad89143c 100644 --- a/tensorflow/lite/micro/micro_context.h +++ b/tensorflow/lite/micro/micro_context.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,6 +19,12 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/micro_graph.h" +#ifdef USE_TFLM_COMPRESSION + +#include "tensorflow/lite/micro/compression.h" + +#endif // USE_TFLM_COMPRESSION + namespace tflite { // TODO(b/149795762): kTfLiteAbort cannot be part of the tflite TfLiteStatus. const TfLiteStatus kTfLiteAbort = static_cast(15); @@ -95,6 +101,30 @@ class MicroContext { virtual MicroGraph& graph() = 0; +#ifdef USE_TFLM_COMPRESSION + + // Available during Prepare & Eval. Returns false if tensor is not + // compressed. + virtual bool IsTensorCompressed(const TfLiteNode* node, int tensor_idx) = 0; + + // Only available during Prepare. The kernel is responsible for storing the + // scratch buffer handle. + virtual int AllocateDecompressionScratchBuffer(const TfLiteNode* node, + int tensor_idx) = 0; + + // Available during Prepare & Eval. Returns nullptr if tensor is not + // compressed. + virtual const CompressionTensorData* GetTensorCompressionData( + const TfLiteNode* node, int tensor_idx) = 0; + + // Only available during Eval. Returns nullptr on failure, otherwise returns a + // pointer to the scratch buffer. + virtual void* DecompressTensorToScratchBuffer( + const TfLiteEvalTensor& tensor, + const CompressionTensorData& compression_data, int scratch_buffer_handle); + +#endif // USE_TFLM_COMPRESSION + private: TF_LITE_REMOVE_VIRTUAL_DELETE }; diff --git a/tensorflow/lite/micro/micro_interpreter_context.cc b/tensorflow/lite/micro/micro_interpreter_context.cc index 098df15d522..2d6341d1894 100644 --- a/tensorflow/lite/micro/micro_interpreter_context.cc +++ b/tensorflow/lite/micro/micro_interpreter_context.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,8 +18,29 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/memory_helpers.h" +#include "tensorflow/lite/micro/micro_utils.h" namespace tflite { + +namespace { + +#ifdef USE_TFLM_COMPRESSION + +int GetInputTensorIndex(const TfLiteNode* node, const int index) { + if (index >= 0 && index < node->inputs->size) { + const int tensor_index = node->inputs->data[index]; + if (tensor_index != kTfLiteOptionalTensor) { + return tensor_index; + } + } + return -1; +} + +#endif // USE_TFLM_COMPRESSION + +} // namespace + MicroInterpreterContext::MicroInterpreterContext(MicroAllocator* allocator, const Model* model, MicroInterpreterGraph* graph) @@ -106,4 +127,83 @@ MicroInterpreterContext::GetInterpreterState() const { return state_; } +#ifdef USE_TFLM_COMPRESSION + +// Available during Prepare & Eval. Returns false if tensor is not +// compressed. +bool MicroInterpreterContext::IsTensorCompressed(const TfLiteNode* node, + int tensor_idx) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare || + state_ == InterpreterState::kInvoke); + + const SubgraphAllocations* allocations = + &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()]; + if (allocations->compressed.tensors == nullptr) { + return false; + } + int index = GetInputTensorIndex(node, tensor_idx); + if (index == -1) { + return false; + } + return allocations->compressed.tensors[index] != nullptr; +} + +// Only available during Prepare. The kernel is responsible for storing the +// scratch buffer handle. +int MicroInterpreterContext::AllocateDecompressionScratchBuffer( + const TfLiteNode* node, int tensor_idx) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare); + + const SubgraphAllocations* allocations = + &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()]; + if (allocations->compressed.tensors == nullptr) { + return -1; + } + int index = GetInputTensorIndex(node, tensor_idx); + if (index == -1 || allocations->compressed.tensors[index] == nullptr) { + return -1; + } + const TfLiteEvalTensor* tensor = &allocations->tensors[index]; + const size_t byte_count = EvalTensorBytes(tensor); + int scratch_index = -1; + TfLiteStatus result = RequestScratchBufferInArena(byte_count, &scratch_index); + if (result != kTfLiteOk) { + return -1; + } + + return scratch_index; +} + +// Available during Prepare & Eval. Returns nullptr if tensor is not +// compressed. +const CompressionTensorData* MicroInterpreterContext::GetTensorCompressionData( + const TfLiteNode* node, int tensor_idx) { + TFLITE_DCHECK(state_ == InterpreterState::kPrepare || + state_ == InterpreterState::kInvoke); + + const SubgraphAllocations* allocations = + &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()]; + if (allocations->compressed.tensors == nullptr) { + return nullptr; + } + int index = GetInputTensorIndex(node, tensor_idx); + if (index == -1) { + return nullptr; + } + return allocations->compressed.tensors[index]; +} + +// Only available during Eval. Returns nullptr on failure, otherwise returns a +// pointer to the scratch buffer. +void* MicroInterpreterContext::DecompressTensorToScratchBuffer( + const TfLiteEvalTensor& tensor, + const CompressionTensorData& compression_data, int scratch_buffer_handle) { + TFLITE_DCHECK(state_ == InterpreterState::kInvoke); + + return MicroContext::DecompressTensorToScratchBuffer(tensor, compression_data, + scratch_buffer_handle); +} + +#endif // USE_TFLM_COMPRESSION + } // namespace tflite diff --git a/tensorflow/lite/micro/micro_interpreter_context.h b/tensorflow/lite/micro/micro_interpreter_context.h index 5986dc37fd2..7b336aacea9 100644 --- a/tensorflow/lite/micro/micro_interpreter_context.h +++ b/tensorflow/lite/micro/micro_interpreter_context.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -106,6 +106,31 @@ class MicroInterpreterContext : public MicroContext { // housekeeping in MicroInterpreterContext. void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles); +#ifdef USE_TFLM_COMPRESSION + + // Available during Prepare & Eval. Returns false if tensor is not + // compressed. + bool IsTensorCompressed(const TfLiteNode* node, int tensor_idx) override; + + // Only available during Prepare. The kernel is responsible for storing the + // scratch buffer handle. + int AllocateDecompressionScratchBuffer(const TfLiteNode* node, + int tensor_idx) override; + + // Available during Prepare & Eval. Returns nullptr if tensor is not + // compressed. + const CompressionTensorData* GetTensorCompressionData( + const TfLiteNode* node, int tensor_idx) override; + + // Only available during Eval. Returns nullptr on failure, otherwise returns a + // pointer to the scratch buffer. + void* DecompressTensorToScratchBuffer( + const TfLiteEvalTensor& tensor, + const CompressionTensorData& compression_data, + int scratch_buffer_handle) override; + +#endif // USE_TFLM_COMPRESSION + private: MicroAllocator& allocator_; MicroInterpreterGraph& graph_; diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index e44de6b09aa..873ea96ac1e 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_interpreter.h" #include +#include #include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h" #include "tensorflow/lite/micro/compatibility.h" @@ -108,6 +109,58 @@ TF_LITE_MICRO_TEST(TestInterpreter) { TF_LITE_MICRO_EXPECT_EQ(tflite::testing::MockCustom::freed_, true); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TestInterpreterCompression) { + const tflite::Model* model = tflite::testing::GetSimpleMockModelCompressed(); + TF_LITE_MICRO_EXPECT(nullptr != model); + tflite::testing::TestingOpResolver op_resolver; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + tflite::testing::GetTestingOpResolver(op_resolver)); + + constexpr size_t allocator_buffer_size = 2000; + uint8_t allocator_buffer[allocator_buffer_size]; + + // Create a new scope so that we can test the destructor. + { + tflite::MicroInterpreter interpreter(model, op_resolver, allocator_buffer, + allocator_buffer_size); + TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.inputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.outputs_size()); + + TfLiteTensor* input = interpreter.input(0); + TF_LITE_MICRO_EXPECT(nullptr != input); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt16, input->type); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(static_cast(2), input->bytes); + TF_LITE_MICRO_EXPECT(nullptr != input->data.data); + static_cast(input->data.data)[0] = 42; + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter.Invoke()); + + const std::initializer_list kGolden = { + 43, 44, 45, 46, 47, 41, 40, 39, 38, 37, 43, 44, 45, 46, 47}; + const int kGoldenCount = kGolden.size(); + TfLiteTensor* output = interpreter.output(0); + TF_LITE_MICRO_EXPECT(nullptr != output); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt16, output->type); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(kGoldenCount, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ( + static_cast(kGoldenCount * sizeof(*kGolden.begin())), + output->bytes); + TF_LITE_MICRO_EXPECT(nullptr != output->data.data); + for (int i = 0; i < kGoldenCount; i++) { + TF_LITE_MICRO_EXPECT_EQ(static_cast(output->data.data)[i], + kGolden.begin()[i]); + } + } +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(TestMultiTenantInterpreter) { tflite::testing::TestingOpResolver op_resolver; TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index f41dba61d7d..18addaee5f7 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -78,6 +78,12 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( return recorded_node_and_registration_array_data_; case RecordedAllocationType::kOpData: return recorded_op_data_; +#ifdef USE_TFLM_COMPRESSION + case RecordedAllocationType::kCompressionData: + return recorded_compression_data_; +#endif // USE_TFLM_COMPRESSION + default: + break; } MicroPrintf("Invalid allocation type supplied: %d", allocation_type); return RecordedAllocation(); @@ -112,6 +118,13 @@ void RecordingMicroAllocator::PrintAllocations() const { "NodeAndRegistration structs"); PrintRecordedAllocation(RecordedAllocationType::kOpData, "Operator runtime data", "OpData structs"); + +#ifdef USE_TFLM_COMPRESSION + + PrintRecordedAllocation(RecordedAllocationType::kCompressionData, + "Persistent compression data", "allocations"); + +#endif // USE_TFLM_COMPRESSION } void* RecordingMicroAllocator::AllocatePersistentBuffer(size_t bytes) { @@ -228,6 +241,21 @@ TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer( return status; } +#ifdef USE_TFLM_COMPRESSION + +TfLiteStatus RecordingMicroAllocator::AllocateCompressedTensorsList( + const Model* model, SubgraphAllocations* subgraph_allocations) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = MicroAllocator::AllocateCompressedTensorsList( + model, subgraph_allocations); + + RecordAllocationUsage(allocations, recorded_compression_data_); + return status; +} + +#endif // USE_TFLM_COMPRESSION + RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const { return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(), /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(), diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h index b6f69264dc0..80f163240d3 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.h +++ b/tensorflow/lite/micro/recording_micro_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,11 @@ enum class RecordedAllocationType { kTfLiteTensorVariableBufferData, kNodeAndRegistrationArray, kOpData, +#ifdef USE_TFLM_COMPRESSION + kCompressionData, +#endif // USE_TFLM_COMPRESSION + + kNumAllocationTypes, // must be last }; // Container for holding information about allocation recordings by a given @@ -93,6 +98,13 @@ class RecordingMicroAllocator : public MicroAllocator { int subgraph_index, bool allocate_temp) override; +#ifdef USE_TFLM_COMPRESSION + + TfLiteStatus AllocateCompressedTensorsList( + const Model* model, SubgraphAllocations* subgraph_allocations) override; + +#endif // USE_TFLM_COMPRESSION + private: RecordingMicroAllocator(RecordingSingleArenaBufferAllocator* memory_allocator, MicroMemoryPlanner* memory_planner); @@ -113,6 +125,9 @@ class RecordingMicroAllocator : public MicroAllocator { RecordedAllocation recorded_persistent_buffer_data_ = {}; RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {}; RecordedAllocation recorded_node_and_registration_array_data_ = {}; +#ifdef USE_TFLM_COMPRESSION + RecordedAllocation recorded_compression_data_ = {}; +#endif // USE_TFLM_COMPRESSION // TODO(b/187993291): Re-enable OpData allocating tracking. RecordedAllocation recorded_op_data_ = {}; diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index 9d3a5965de4..1c4df8862a4 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -317,6 +317,70 @@ TF_LITE_MICRO_TEST(TestMultiSubgraphModel) { num_tensors * TF_LITE_EVAL_TENSOR_STRUCT_SIZE); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TestCompressedModel) { + tflite::ScratchBufferHandle* scratch_buffer_handles = nullptr; + tflite::testing::TestingOpResolver ops_resolver; + const tflite::Model* model = tflite::testing::GetSimpleMockModelCompressed(); + const int arena_size = 2048; + + uint8_t arena[arena_size]; + + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create(arena, arena_size); + TF_LITE_MICRO_EXPECT(micro_allocator != nullptr); + TF_LITE_MICRO_CHECK_FAIL(); + + tflite::SubgraphAllocations* subgraph_allocations = + micro_allocator->StartModelAllocation(model); + TF_LITE_MICRO_EXPECT(nullptr != subgraph_allocations); + TF_LITE_MICRO_CHECK_FAIL(); + + TfLiteStatus status = micro_allocator->FinishModelAllocation( + model, subgraph_allocations, &scratch_buffer_handles); + TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); + TF_LITE_MICRO_CHECK_FAIL(); + + micro_allocator->PrintAllocations(); + + size_t count_compression_allocations = 0; + size_t size_compression_allocations = 0; + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + tflite::CompressionTensorData** ctl = + subgraph_allocations[subgraph_idx].compressed.tensors; + if (ctl == nullptr) { + continue; + } + const tflite::SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + const size_t num_tensors = subgraph->tensors()->size(); + for (size_t i = 0; i < num_tensors; i++) { + if (ctl[i] != nullptr) { + count_compression_allocations++; + size_compression_allocations += sizeof(tflite::CompressionTensorData); + } + } + // Add the CompressionTensorData array + count_compression_allocations++; + size_compression_allocations += + num_tensors * sizeof(tflite::CompressionTensorData*); + } + + tflite::RecordedAllocation recorded_allocation = + micro_allocator->GetRecordedAllocation( + tflite::RecordedAllocationType::kCompressionData); + + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, + count_compression_allocations); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, + size_compression_allocations); + TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, + size_compression_allocations); +} + +#endif // USE_TFLM_COMPRESSION + // TODO(b/158124094): Find a way to audit OpData allocations on // cross-architectures. diff --git a/tensorflow/lite/micro/test_helper_custom_ops.cc b/tensorflow/lite/micro/test_helper_custom_ops.cc index 374aabcc9df..97577699961 100644 --- a/tensorflow/lite/micro/test_helper_custom_ops.cc +++ b/tensorflow/lite/micro/test_helper_custom_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -35,6 +35,18 @@ limitations under the License. namespace tflite { namespace testing { +namespace { + +template +void BroadcastAdd(const T input_scalar, const T* weights, T* output, + const size_t count) { + for (size_t i = 0; i < count; i++) { + output[i] = input_scalar + weights[i]; + } +} + +} // namespace + const TFLMRegistration* PackerOp::getRegistration() { return GetMutableRegistration(); } @@ -107,5 +119,180 @@ TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) { bool PackerOp::freed_ = false; +const TFLMRegistration* BroadcastAddOp::getRegistration() { + return GetMutableRegistration(); +} + +TFLMRegistration* BroadcastAddOp::GetMutableRegistration() { + static TFLMRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + return &r; +} + +void* BroadcastAddOp::Init(TfLiteContext* context, const char* buffer, + size_t length) { +#ifdef USE_TFLM_COMPRESSION + + weight_scratch_index_ = -1; + +#endif // USE_TFLM_COMPRESSION + + // Do nothing. + return nullptr; +} + +TfLiteStatus BroadcastAddOp::Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* weights = micro_context->AllocateTempInputTensor(node, 1); + TF_LITE_ENSURE(context, weights != nullptr); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, weights->type); + TF_LITE_ENSURE( + context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt8 || + input->type == kTfLiteInt16 || input->type == kTfLiteInt32 || + input->type == kTfLiteInt64); + TF_LITE_ENSURE(context, input->quantization.type == kTfLiteNoQuantization); + TF_LITE_ENSURE(context, weights->quantization.type == kTfLiteNoQuantization); + TF_LITE_ENSURE(context, output->quantization.type == kTfLiteNoQuantization); + TF_LITE_ENSURE(context, + ElementCount(*weights->dims) == ElementCount(*output->dims)); + TF_LITE_ENSURE(context, ElementCount(*input->dims) == 1); + TF_LITE_ENSURE(context, input->dims->size == 1); + TF_LITE_ENSURE(context, weights->dims->size == 1); + +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + weight_scratch_index_ = + micro_context->AllocateDecompressionScratchBuffer(node, 1); + if (micro_context->IsTensorCompressed(node, 1)) { + TF_LITE_ENSURE(context, weight_scratch_index_ != -1); + } else { + TF_LITE_ENSURE(context, weight_scratch_index_ == -1); + } + +#endif // USE_TFLM_COMPRESSION + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(weights); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus BroadcastAddOp::Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); + const TfLiteEvalTensor* weights = + tflite::micro::GetEvalInput(context, node, 1); + TF_LITE_ENSURE(context, weights != nullptr); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); + +#ifdef USE_TFLM_COMPRESSION + + MicroContext* micro_context = GetMicroContext(context); + + const CompressionTensorData* weights_comp_td = + micro_context->GetTensorCompressionData(node, 1); + if (micro_context->IsTensorCompressed(node, 1)) { + TF_LITE_ENSURE(context, weights_comp_td != nullptr); + } else { + TF_LITE_ENSURE(context, weights_comp_td == nullptr); + } + +#endif // USE_TFLM_COMPRESSION + + switch (input->type) { + case kTfLiteFloat32: { + BroadcastAdd( + tflite::micro::GetTensorData(input)[0], +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, weights, weights_comp_td, weight_scratch_index_), +#else // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(weights), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(output), + ElementCount(*output->dims)); + } break; + + case kTfLiteInt8: { + BroadcastAdd( + tflite::micro::GetTensorData(input)[0], +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, weights, weights_comp_td, weight_scratch_index_), +#else // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(weights), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(output), + ElementCount(*output->dims)); + } break; + + case kTfLiteInt16: { + BroadcastAdd( + tflite::micro::GetTensorData(input)[0], +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, weights, weights_comp_td, weight_scratch_index_), +#else // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(weights), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(output), + ElementCount(*output->dims)); + } break; + + case kTfLiteInt32: { + BroadcastAdd( + tflite::micro::GetTensorData(input)[0], +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, weights, weights_comp_td, weight_scratch_index_), +#else // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(weights), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(output), + ElementCount(*output->dims)); + } break; + + case kTfLiteInt64: { + BroadcastAdd( + tflite::micro::GetTensorData(input)[0], +#ifdef USE_TFLM_COMPRESSION + tflite::micro::GetTensorData( + micro_context, weights, weights_comp_td, weight_scratch_index_), +#else // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(weights), +#endif // USE_TFLM_COMPRESSION + tflite::micro::GetTensorData(output), + ElementCount(*output->dims)); + } break; + + default: { + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +#ifdef USE_TFLM_COMPRESSION + +int BroadcastAddOp::weight_scratch_index_ = -1; + +#endif // USE_TFLM_COMPRESSION + } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/micro/test_helper_custom_ops.h b/tensorflow/lite/micro/test_helper_custom_ops.h index d28bb4038f1..53a8cc3bdd4 100644 --- a/tensorflow/lite/micro/test_helper_custom_ops.h +++ b/tensorflow/lite/micro/test_helper_custom_ops.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,6 +43,23 @@ class PackerOp { static bool freed_; }; +// This op optionally supports compressed weights +class BroadcastAddOp { + public: + static const TFLMRegistration* getRegistration(); + static TFLMRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + private: +#ifdef USE_TFLM_COMPRESSION + + static int weight_scratch_index_; // decompression scratch buffer index + +#endif // USE_TFLM_COMPRESSION +}; + } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 3f0f5ec0826..d5f50773f69 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/lite/micro/test_helpers.h" +#include #include #include #include @@ -33,6 +34,12 @@ limitations under the License. #include "tensorflow/lite/micro/test_helper_custom_ops.h" #include "tensorflow/lite/schema/schema_generated.h" +#ifdef USE_TFLM_COMPRESSION + +#include "tensorflow/lite/micro/compression/metadata_generated.h" + +#endif // USE_TFLM_COMPRESSION + // TODO(b/170464050): Use TFLM test only version of schema_utils. namespace tflite { @@ -236,7 +243,7 @@ const Model* ModelBuilder::BuildModel( *builder_, 0, builder_->CreateVector(operator_codes_, next_operator_code_id_), builder_->CreateVector(subgraphs, subgraphs_size), - builder_->CreateString("teset_model"), + builder_->CreateString("test_model"), builder_->CreateVector(buffers, buffer_size), 0, builder_->CreateVector(metadata_, ModelBuilder::nbr_of_metadata_buffers_)); @@ -245,7 +252,7 @@ const Model* ModelBuilder::BuildModel( *builder_, 0, builder_->CreateVector(operator_codes_, next_operator_code_id_), builder_->CreateVector(subgraphs, subgraphs_size), - builder_->CreateString("teset_model"), + builder_->CreateString("test_model"), builder_->CreateVector(buffers, buffer_size)); } @@ -578,6 +585,116 @@ const Model* BuildSimpleMockModel() { return model; } +#ifdef USE_TFLM_COMPRESSION + +const flatbuffers::span BuildLutMetadata( + const std::initializer_list& + lut_tensor_structs) { + using flatbuffers::Offset; + namespace compression = tflite::micro::compression; + + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + auto lut_tensors = builder->CreateVectorOfStructs(lut_tensor_structs.begin(), + lut_tensor_structs.size()); + auto metadata = compression::CreateMetadata(*builder, lut_tensors); + compression::FinishMetadataBuffer(*builder, metadata); + return builder->GetBufferSpan(); +} + +const Model* BuildSimpleMockModelCompressed() { + using flatbuffers::Offset; + using flatbuffers::Vector; + using tflite::micro::compression::LutTensor; + constexpr uint kEmptyBuffer = 0; + constexpr uint kMetadataBuffer = 1; + constexpr uint kWeightsBuffer = 2; + constexpr uint kValueTableBuffer = 3; + // constexpr uint kInputTensor = 0; + constexpr uint kWeightsTensor = 1; + // constexpr uint kOutputTensor = 2; + constexpr uint kSubgraphIndex = 0; + constexpr uint kCompressedBitWidth = 4; + + const std::initializer_list lut_tensors = { + LutTensor(kSubgraphIndex, kWeightsTensor, kCompressedBitWidth, + kWeightsBuffer, kValueTableBuffer), + }; + auto lut_tensors_span = BuildLutMetadata(lut_tensors); + + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + // [1, 2, 3, 4, 5, -1, -2, -3, -4, -5, 1, 2, 3, 4, 5] + const std::initializer_list weights_data = {0x01, 0x23, 0x45, 0x98, + 0x76, 0x01, 0x23, 0x40}; + const std::initializer_list value_table_data = {1, 2, 3, 4, 5, + -1, -5, -4, -3, -2}; + auto value_table_offset = builder->CreateVector(value_table_data).o; + const std::initializer_list> buffers = { + CreateBuffer(*builder), + CreateBuffer(*builder, builder->CreateVector(lut_tensors_span)), + CreateBuffer(*builder, builder->CreateVector(weights_data)), + CreateBuffer(*builder, Offset>(value_table_offset)), + }; + + const std::initializer_list input_shape = {1}; + const std::initializer_list weights_shape = {15}; + const std::initializer_list output_shape = weights_shape; + const std::initializer_list> tensors = { + CreateTensor(*builder, builder->CreateVector(input_shape), + TensorType_INT16, kEmptyBuffer, + builder->CreateString("test_input_tensor"), 0, false), + CreateTensor(*builder, builder->CreateVector(weights_shape), + TensorType_INT16, kWeightsBuffer, + builder->CreateString("test_weight_tensor"), 0, false), + CreateTensor(*builder, builder->CreateVector(output_shape), + TensorType_INT16, kEmptyBuffer, + builder->CreateString("test_output_tensor"), 0, false), + }; + + const std::initializer_list subgraph_inputs = {0}; + const std::initializer_list subgraph_outputs = {2}; + const std::initializer_list operator_inputs = {0, 1}; + const std::initializer_list operator_outputs = {2}; + const std::initializer_list> operators = { + CreateOperator(*builder, 0, builder->CreateVector(operator_inputs), + builder->CreateVector(operator_outputs), + BuiltinOptions_NONE), + }; + + const std::initializer_list> subgraphs = { + CreateSubGraph(*builder, builder->CreateVector(tensors), + builder->CreateVector(subgraph_inputs), + builder->CreateVector(subgraph_outputs), + builder->CreateVector(operators), + builder->CreateString("test_subgraph")), + }; + + const std::initializer_list> operator_codes = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "broadcast_add_op", + /*version=*/0, BuiltinOperator_CUSTOM), + }; + + const std::initializer_list> metadata = { + CreateMetadata(*builder, + builder->CreateString(kCompressionMetadataString), + kMetadataBuffer), + }; + + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes), + builder->CreateVector(subgraphs), builder->CreateString("test_model"), + builder->CreateVector(buffers), 0, builder->CreateVector(metadata)); + + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + + return model; +} + +#endif // USE_TFLM_COMPRESSION + const Model* BuildComplexMockModel() { using flatbuffers::Offset; flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); @@ -1665,6 +1782,8 @@ TfLiteStatus GetTestingOpResolver( op_resolver.AddCustom("no_op", NoOp::GetMutableRegistration())); TF_LITE_ENSURE_STATUS(op_resolver.AddCustom( "custom_packer_op", PackerOp::GetMutableRegistration())); + TF_LITE_ENSURE_STATUS(op_resolver.AddCustom( + "broadcast_add_op", BroadcastAddOp::GetMutableRegistration())); TF_LITE_ENSURE_STATUS(op_resolver.AddIf()); return kTfLiteOk; } @@ -1698,6 +1817,18 @@ const Model* GetSimpleMockModel() { return model; } +#ifdef USE_TFLM_COMPRESSION + +const Model* GetSimpleMockModelCompressed() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleMockModelCompressed()); + } + return model; +} + +#endif // USE_TFLM_COMPRESSION + const Model* GetSimpleMultipleInputsModel() { static Model* model = nullptr; if (!model) { @@ -1890,6 +2021,7 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) { return reinterpret_cast(const_cast(floats)); } +// TODO(ddavis-2015): make template TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, TfLiteIntArray* dims, float input_scale, float weights_scale, bool is_variable) { @@ -1904,6 +2036,7 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, return result; } +// TODO(ddavis-2015): make template TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, TfLiteIntArray* dims, float input_scale, float weights_scale, bool is_variable) { @@ -1918,6 +2051,7 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, return result; } +// TODO(ddavis-2015): make template TfLiteTensor CreateQuantizedBiasTensor(const float* data, std::int64_t* quantized, TfLiteIntArray* dims, float input_scale, @@ -1933,37 +2067,7 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, return result; } -// Quantizes int32_t bias tensor with per-channel weights determined by input -// scale multiplied by weight scale for each channel. -template -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, T* quantized, TfLiteIntArray* dims, float input_scale, - float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable) { - int input_size = ElementCount(*dims); - int num_channels = dims->data[quantized_dimension]; - // First element is reserved for array length - zero_points[0] = num_channels; - scales[0] = static_cast(num_channels); - float* scales_array = &scales[1]; - for (int i = 0; i < num_channels; i++) { - scales_array[i] = input_scale * weight_scales[i]; - zero_points[i + 1] = 0; - } - - SymmetricPerChannelQuantize(input, quantized, input_size, num_channels, - scales_array); - - affine_quant->scale = FloatArrayFromFloats(scales); - affine_quant->zero_point = IntArrayFromInts(zero_points); - affine_quant->quantized_dimension = quantized_dimension; - - TfLiteTensor result = CreateTensor(quantized, dims, is_variable); - result.quantization = {kTfLiteAffineQuantization, affine_quant}; - return result; -} - +// TODO(ddavis-2015): remove TfLiteTensor CreatePerChannelQuantizedBiasTensor( const float* input, int32_t* quantized, TfLiteIntArray* dims, float input_scale, float* weight_scales, float* scales, int* zero_points, @@ -1974,6 +2078,7 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor( affine_quant, quantized_dimension, is_variable); } +// TODO(ddavis-2015): remove TfLiteTensor CreatePerChannelQuantizedBiasTensor( const float* input, std::int64_t* quantized, TfLiteIntArray* dims, float input_scale, float* weight_scales, float* scales, int* zero_points, diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h index 6315b9fecdc..bad6e47d672 100644 --- a/tensorflow/lite/micro/test_helpers.h +++ b/tensorflow/lite/micro/test_helpers.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -112,6 +112,15 @@ TfLiteStatus GetTestingOpResolver(TestingOpResolver& op_resolver); // 1 layer of weights, 1 output Tensor, and 1 operator. const Model* GetSimpleMockModel(); +#ifdef USE_TFLM_COMPRESSION + +// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, +// 1 layer of weights, 1 output Tensor, and 1 operator (BroadcastAddOp). The +// weights tensor is compressed. +const Model* GetSimpleMockModelCompressed(); + +#endif // USE_TFLM_COMPRESSION + // Returns a flatbuffer TensorFlow Lite model with more inputs, variable // tensors, and operators. const Model* GetComplexMockModel(); @@ -220,8 +229,6 @@ TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims, result.is_variable = is_variable; result.allocation_type = kTfLiteMemNone; result.data.data = const_cast(data); - result.bytes = ElementCount(*dims) * sizeof(T); - result.data.data = const_cast(data); if (type == kTfLiteInt4) { result.type = kTfLiteInt4; @@ -233,7 +240,13 @@ TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims, // a single CreateTensor method. A Const array should be used for immutable // input tensors and non-const array should be used for mutable and output // tensors. - result.type = typeToTfLiteType(); + if (type == kTfLiteNoType) { + result.type = typeToTfLiteType(); + } else { + result.type = type; + } + + result.bytes = ElementCount(*dims) * TfLiteTypeGetSize(result.type); } return result; } @@ -260,37 +273,106 @@ TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized, type); } +// TODO(ddavis-2015): remove TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, TfLiteIntArray* dims, float input_scale, float weights_scale, bool is_variable = false); +// TODO(ddavis-2015): remove TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, TfLiteIntArray* dims, float input_scale, float weights_scale, bool is_variable = false); +// TODO(ddavis-2015): remove TfLiteTensor CreateQuantizedBiasTensor(const float* data, std::int64_t* quantized, TfLiteIntArray* dims, float input_scale, float weights_scale, bool is_variable = false); -// Quantizes int32_t bias tensor with per-channel weights determined by input -// scale multiplied by weight scale for each channel. +// Creates bias tensor with pre-calculated compressed input data and per-channel +// weights determined by input scale multiplied by weight scale for each +// channel. +template TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, int32_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable = false); + const T* input_data, TfLiteIntArray* dims, float input_scale, + const TfLiteFloatArray* weight_scales, TfLiteFloatArray* scales, + TfLiteIntArray* zero_points, TfLiteAffineQuantization* affine_quant, + int quantized_dimension, bool is_variable = false, + TfLiteType type = kTfLiteNoType) { + int num_channels = dims->data[quantized_dimension]; + zero_points->size = num_channels; + scales->size = num_channels; + for (int i = 0; i < num_channels; i++) { + scales->data[i] = input_scale * weight_scales->data[i]; + zero_points->data[i] = 0; + MicroPrintf("index %d scales %f zero_point %d input scale %f weight %f", i, + (double)scales->data[i], zero_points->data[i], + (double)input_scale, (double)weight_scales->data[i]); + } + + affine_quant->scale = scales; + affine_quant->zero_point = zero_points; + affine_quant->quantized_dimension = quantized_dimension; + + TfLiteTensor result = CreateTensor(input_data, dims, is_variable, type); + result.quantization = {kTfLiteAffineQuantization, affine_quant}; + return result; +} -// Quantizes int64_t bias tensor with per-channel weights determined by input +// Quantizes bias tensor with per-channel weights determined by input // scale multiplied by weight scale for each channel. +template TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, std::int64_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, + const float* input, T* quantized, TfLiteIntArray* dims, float input_scale, + const float* weight_scales, float* scales, int* zero_points, TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable = false); + bool is_variable = false) { + int input_size = ElementCount(*dims); + int num_channels = dims->data[quantized_dimension]; + // First element is reserved for array length + zero_points[0] = num_channels; + scales[0] = static_cast(num_channels); + float* scales_array = &scales[1]; + for (int i = 0; i < num_channels; i++) { + scales_array[i] = input_scale * weight_scales[i]; + zero_points[i + 1] = 0; + MicroPrintf("index %d scales %f zero_point %d input scale %f weight %f", i, + (double)scales_array[i], zero_points[i + 1], + (double)input_scale, (double)weight_scales[i]); + } + + SymmetricPerChannelQuantize(input, quantized, input_size, num_channels, + scales_array); + + affine_quant->scale = FloatArrayFromFloats(scales); + affine_quant->zero_point = IntArrayFromInts(zero_points); + affine_quant->quantized_dimension = quantized_dimension; + + TfLiteTensor result = CreateTensor(quantized, dims, is_variable); + result.quantization = {kTfLiteAffineQuantization, affine_quant}; + int64_t data0 = quantized[0]; + MicroPrintf("quantp %p data %f data quantized %lld", affine_quant, + (double)input[0], data0); + return result; +} + +template +TfLiteTensor CreatePerChannelQuantizedTensor( + const T* quantized, TfLiteIntArray* dims, TfLiteFloatArray* scales, + TfLiteIntArray* zero_points, TfLiteAffineQuantization* affine_quant, + int quantized_dimension, bool is_variable = false, + TfLiteType type = kTfLiteNoType) { + affine_quant->scale = scales; + affine_quant->zero_point = zero_points; + affine_quant->quantized_dimension = quantized_dimension; + + TfLiteTensor result = CreateTensor(quantized, dims, is_variable, type); + result.quantization = {kTfLiteAffineQuantization, affine_quant}; + return result; +} TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales, diff --git a/tensorflow/lite/micro/testing/micro_test.h b/tensorflow/lite/micro/testing/micro_test.h index a28f4b6d8e4..1e17531efea 100644 --- a/tensorflow/lite/micro/testing/micro_test.h +++ b/tensorflow/lite/micro/testing/micro_test.h @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -264,4 +264,11 @@ inline void InitializeTest() { InitializeTarget(); } } \ } while (false) +#define TF_LITE_MICRO_CHECK_FAIL() \ + do { \ + if (micro_test::did_test_fail) { \ + return kTfLiteError; \ + } \ + } while (false) + #endif // TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_ diff --git a/tensorflow/lite/micro/tools/benchmarking/metrics.cc b/tensorflow/lite/micro/tools/benchmarking/metrics.cc index 3a4bf7e4917..f71a4cd139e 100644 --- a/tensorflow/lite/micro/tools/benchmarking/metrics.cc +++ b/tensorflow/lite/micro/tools/benchmarking/metrics.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,7 +46,8 @@ struct LogAllocationRecord { constexpr int kArenaRows = 3; constexpr int kArenaColumns = 3; -constexpr int kAllocationTypes = 7; +constexpr int kAllocationTypes = + static_cast(tflite::RecordedAllocationType::kNumAllocationTypes); constexpr int kAllocationColumns = 6; constexpr int kMaxBufSize = 100; @@ -85,16 +86,25 @@ LogAllocationRecord GetLogAllocationRecord( tflite::RecordedAllocationType::kPersistentBufferData, tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData, tflite::RecordedAllocationType::kNodeAndRegistrationArray, - tflite::RecordedAllocationType::kOpData}; + tflite::RecordedAllocationType::kOpData, +#ifdef USE_TFLM_COMPRESSION + tflite::RecordedAllocationType::kCompressionData, +#endif // USE_TFLM_COMPRESSION + }; static_assert(std::extent::value == kAllocationTypes, "kAllocationTypes mismatch"); - const char* titles[] = {"Eval tensor data", - "Persistent tensor data", - "Persistent quantization data", - "Persistent buffer data", - "Tensor variable buffer data", - "Node and registration array", - "Operation data"}; + const char* titles[] = { + "Eval tensor data", + "Persistent tensor data", + "Persistent quantization data", + "Persistent buffer data", + "Tensor variable buffer data", + "Node and registration array", + "Operation data", +#ifdef USE_TFLM_COMPRESSION + "Compression data", +#endif // USE_TFLM_COMPRESSION + }; static_assert(std::extent::value == kAllocationTypes, "kAllocationTypes mismatch"); const size_t total_bytes = diff --git a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh index 998827f24de..f5392dddeec 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh @@ -41,6 +41,12 @@ readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/M readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR} +# optional TFLM tensor compression - execute the unit tests +readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test \ + TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + EXTERNAL_DIR=${EXTERNAL_DIR} \ + USE_TFLM_COMPRESSION=yes + # run generic benchmark readable_run make -j$(nproc) -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 90b0c2945ff..45a7af96382 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -270,6 +270,17 @@ endif # runtime that can be linked in to other programs. MICROLITE_LIB_NAME := libtensorflow-microlite.a +# TFLM optional compression support (default disabled) +ENABLE_COMPRESSION := no +ifneq ($(USE_TFLM_COMPRESSION),) + # currently only Linux targets supported + ifeq ($(TARGET), $(filter $(TARGET), linux)) + CXXFLAGS += -DUSE_TFLM_COMPRESSION + CCFLAGS += -DUSE_TFLM_COMPRESSION + ENABLE_COMPRESSION := yes + endif +endif + # Where compiled objects are stored. BASE_GENDIR := gen GENDIR := $(BASE_GENDIR)/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE) @@ -279,6 +290,9 @@ endif ifneq ($(CO_PROCESSOR),) GENDIR := $(GENDIR)_$(CO_PROCESSOR) endif +ifeq ($(ENABLE_COMPRESSION), yes) + GENDIR := $(GENDIR)_compression +endif GENDIR := $(GENDIR)_$(TOOLCHAIN)/ CORE_OBJDIR := $(GENDIR)obj/core/