Create codegen_preprocessor (#2219)

tensorflow · Sep 14, 2023 · 77e2cdb · 77e2cdb
1 parent 3323a41
commit 77e2cdb
Show file tree

Hide file tree

Showing 6 changed files with 181 additions and 3 deletions.
diff --git a/codegen/BUILD b/codegen/BUILD
@@ -64,6 +64,7 @@ py_binary(
     deps = [
         ":graph",
         ":inference_generator",
+        "//codegen/preprocessor:preprocessor_schema_py",
         "//tensorflow/lite/tools:flatbuffer_utils",
         "@absl_py//absl:app",
         "@absl_py//absl/flags",

diff --git a/codegen/code_generator.py b/codegen/code_generator.py
@@ -22,18 +22,27 @@
 
 from tflite_micro.codegen import inference_generator
 from tflite_micro.codegen import graph
+from tflite_micro.codegen.preprocessor import preprocessor_schema_py_generated as preprocessor_fb
 from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
 
 # Usage information:
 # Default:
-#   `bazel run codegen:code_generator -- --model=</path/to/my_model.tflite>`
+#   `bazel run codegen:code_generator -- \
+#        --model=</path/to/my_model.tflite> \
+#        --preprocessed_data=</path/to/preprocesser_output>`
 # Output will be located at: /path/to/my_model.h|cc
 
 _MODEL_PATH = flags.DEFINE_string(name="model",
                                   default=None,
                                   help="Path to the TFLite model file.",
                                   required=True)
 
+_PREPROCESSED_DATA_PATH = flags.DEFINE_string(
+    name="preprocessed_data",
+    default=None,
+    help="Path to output of codegen_preprocessor.",
+    required=True)
+
 _OUTPUT_DIR = flags.DEFINE_string(
     name="output_dir",
     default=None,
@@ -48,12 +57,27 @@
     required=False)
 
 
+def _read_preprocessed_data(
+    preprocessed_data_file: str) -> preprocessor_fb.DataT:
+  with open(preprocessed_data_file, 'rb') as file:
+    data_byte_array = bytearray(file.read())
+  return preprocessor_fb.DataT.InitFromObj(
+      preprocessor_fb.Data.GetRootAs(data_byte_array, 0))
+
+
 def main(argv: Sequence[str]) -> None:
   output_dir = _OUTPUT_DIR.value or os.path.dirname(_MODEL_PATH.value)
   output_name = _OUTPUT_NAME.value or os.path.splitext(
       os.path.basename(_MODEL_PATH.value))[0]
 
   model = flatbuffer_utils.read_model(_MODEL_PATH.value)
+  preprocessed_data = _read_preprocessed_data(_PREPROCESSED_DATA_PATH.value)
+
+  print("Generating inference code for model:\n"
+        "  model: {}\n"
+        "  preprocessed_model: {}\n".format(
+            _MODEL_PATH.value,
+            preprocessed_data.inputModelPath.decode('utf-8')))
 
   inference_generator.generate(output_dir, output_name,
                                graph.OpCodeTable([model]), graph.Graph(model))

diff --git a/codegen/examples/hello_world/README.md b/codegen/examples/hello_world/README.md
@@ -1,18 +1,51 @@
 # Codegen Hello World Example
 
-This is a code-generated example of the hello world model.
+This is a code-generated example of the hello world model. The process is
+currently somewhat involved:
+
+## Build the preprocessor for your target
+
+This creates a target-specific preprocessor binary capable of performing the
+init and prepare stages of the Interpreter and serializing the output. This
+binary can be re-used for multiple models.
+
+### x86
+```
+make -f tensorflow/lite/micro/tools/make/Makefile codegen_preprocessor
+```
+
+## Run the preprocessor
+
+The preprocessor will take the provided model, create a TFLM Interpreter, and
+allocate tensors. It will then capture and serialize the resulting data
+structures needed for inference. For embedded targets, this should be run under
+simulation.
+
+### x86
+```
+./gen/linux_x86_64_default/bin/codegen_preprocessor \
+  $(pwd)/tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite \
+  $(pwd)/gen/linux_86_64_default/genfiles/hello_world_int8.ppd
+```
+
+## Generate the inference code
 
 To generate the inference code at `codegen/example/hello_world_model.h/.cc`:
 
+### x86
 ```
 bazel run codegen:code_generator -- \
   --model $(pwd)/tensorflow/lite/micro/examples/hello_world/models/hello_world_int8.tflite \
+  --preprocessed_data $(pwd)/gen/linux_86_64_default/genfiles/hello_world_int8.ppd \
   --output_dir $(pwd)/codegen/examples/hello_world \
   --output_name hello_world_model
 ```
 
-To compile the generated source, you can use the Makefile:
+## Compile the generated inference code
+
+ To compile the generated source, you can use the Makefile:
 
+### x86
 ```
 make -f tensorflow/lite/micro/tools/make/Makefile codegen_hello_world
 ```
diff --git a/codegen/preprocessor/Makefile.inc b/codegen/preprocessor/Makefile.inc
@@ -0,0 +1,17 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+CODEGEN_PREPROCESSOR_SRCS := \
+$(TENSORFLOW_ROOT)codegen/preprocessor/main.cc
diff --git a/codegen/preprocessor/main.cc b/codegen/preprocessor/main.cc
@@ -0,0 +1,88 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <memory>
+
+#include "codegen/preprocessor/preprocessor_schema_generated.h"
+#include "flatbuffers/flatbuffers.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace {
+
+std::unique_ptr<char[]> ReadModelFile(const char* model_file_name) {
+  std::ifstream model_file(model_file_name, std::ios::binary);
+  if (!model_file.is_open()) {
+    std::cerr << "codegen_preprocessor: could not open model file: "
+              << model_file_name << std::endl;
+    return nullptr;
+  }
+
+  model_file.seekg(0, std::ios::end);
+  size_t num_bytes = model_file.tellg();
+  model_file.seekg(0, std::ios::beg);
+  std::unique_ptr<char[]> model_data(new char[num_bytes]);
+  model_file.read(model_data.get(), num_bytes);
+
+  return model_data;
+}
+
+int WriteOutputFile(const char* output_file_name,
+                    flatbuffers::span<uint8_t> output) {
+  std::ofstream output_file(output_file_name, std::ios::trunc);
+  if (!output_file.is_open()) {
+    std::cerr << "codegen_preprocessor: could not open output file: "
+              << output_file_name << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  output_file.write(reinterpret_cast<char*>(output.data()), output.size());
+  return 0;
+}
+
+}  // namespace
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    std::cerr << "codegen_preprocessor: invalid usage!" << std::endl;
+    std::cerr << "usage: codegen_preprocessor <tflite_model> <output_file>"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  const char* model_file_name = argv[1];
+  const char* output_file_name = argv[2];
+
+  const auto model_data = ReadModelFile(model_file_name);
+  if (!model_data) {
+    return EXIT_FAILURE;
+  }
+
+  // We have to create our own allocator, as the typical TFLM runtime disables
+  // its use (to avoid dynamic allocation).
+  flatbuffers::DefaultAllocator allocator;
+  flatbuffers::FlatBufferBuilder builder{2048, &allocator};
+  const auto input_model_path = builder.CreateString(model_file_name);
+
+  // Do the preprocess work.
+
+  tflm::codegen::preprocessor::DataBuilder data_builder(builder);
+  data_builder.add_input_model_path(input_model_path);
+  builder.Finish(data_builder.Finish());
+
+  return WriteOutputFile(output_file_name, builder.GetBufferSpan());
+}
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
@@ -294,6 +294,8 @@ MICRO_LITE_BENCHMARKS := $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tool
 MICROLITE_BENCHMARK_SRCS := \
 $(wildcard $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking/*benchmark.cc)
 
+MICRO_LITE_CODEGEN_PREPROCESSOR := $(TENSORFLOW_ROOT)codegen/preprocessor/Makefile.inc
+
 MICRO_LITE_CODEGEN_EXAMPLES := $(shell find $(TENSORFLOW_ROOT)codegen/examples/ -name Makefile.inc)
 
 MICROLITE_TEST_SRCS := \
@@ -704,6 +706,9 @@ include $(MICRO_LITE_BENCHMARKS)
 # Load custom kernel tests.
 include $(MAKEFILE_DIR)/additional_tests.inc
 
+# Load codegen preprocessor rules
+include $(MICRO_LITE_CODEGEN_PREPROCESSOR)
+
 # Create rules for downloading third-party dependencies.
 THIRD_PARTY_TARGETS :=
 $(foreach DOWNLOAD,$(THIRD_PARTY_DOWNLOADS),$(eval $(call create_download_rule,$(DOWNLOAD))))
@@ -863,6 +868,16 @@ integration_tests: $(MICROLITE_INTEGRATION_TEST_TARGETS)
 generated_micro_mutable_op_resolver: $(MICROLITE_GEN_OP_RESOLVER_TEST_TARGETS)
 endif
 
+CODEGEN_PREPROCESSOR_PATH := $(BINDIR)codegen_preprocessor
+
+codegen_preprocessor: $(CODEGEN_PREPROCESSOR_PATH)
+
+$(CODEGEN_PREPROCESSOR_PATH): $(CODEGEN_PREPROCESSOR_SRCS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $@ $< \
+	$(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
 # Just build the test targets
 build: $(MICROLITE_BUILD_TARGETS)