The CSE (Common Subexpression Elimination) problem about running custom operation in Tensorflow

Recently, we create a new custom operation in Tensorflow:

REGISTER_OP("GetImageID")
    .Input("count: int32")
    .Output("image_id: string");
using namespace tensorflow;
using namespace std;
class GetImageIDOp : public OpKernel {
 public:
  explicit GetImageIDOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
  }
  void Compute(OpKernelContext* ctx) override {
    const Tensor* cnt;
    OP_REQUIRES_OK(ctx, ctx->input("count", &cnt));  // This is how we should get 'input' of Op
    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(cnt->shape()),
                errors::InvalidArgument("cnt is not a scalar: ", cnt->shape().DebugString()));
    int32 count = cnt->scalar()();
    vector image_ids;
    int num = get_image_ids(&image_ids);  // Get image_ids from somewhere, such as network, or disk
    Tensor* image_id;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("image_id", TensorShape({static_cast(num)}), &image_id));
    auto imageid_flat = image_id->flat();
    for (int i = 0; i < num; i++) {
      imageid_flat(i) = image_ids[i];
    }
  }
 private:
};
REGISTER_KERNEL_BUILDER(Name("GetImageID").Device(DEVICE_CPU), GetImageIDOp);

It's as simple as the example in Tensorflow's document. But when we run this Op in session:

get_image_id_op = get_image_id(32)
with tf.Session() as sess:
  while (True):
    sess.run(get_image_id_op)

It only get image_ids from network once, and then use the result of first 'run' forever, without even call 'Compute()' function in cpp code again!
Seems Tensorflow optimized the new Op and never run it twice. My colleague give a suggestion to solve this problem by using tf.placeholder:

counter = tf.placeholder(tf.int32)
get_image_id_op = get_image_id(counter)
with tf.Session() as sess:
  while (True):
    sess.run(get_image_id_op, feed_dict = {counter: count})

Looks a little tricky. The final solution is add flag in cpp code to let new Op to avoid CSE (Common Subexpression Elimination):

REGISTER_OP("GetImageID")
    .SetIsStateful()
    .Input("count: int32")
    .Output("image_id: string");
......

Attachment of the 'CMakeLists.txt':

cmake_minimum_required(VERSION 2.8)
project(my_proj)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11 -O2 -g2")
#set(CMAKE_MACOSX_RPATH 0)
set(CMAKE_SKIP_RPATH TRUE)
if (APPLE)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -undefined dynamic_lookup")
elseif (UNIX)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
execute_process(COMMAND python -c "import tensorflow as tf; print(tf.sysconfig.get_include())" OUTPUT_VARIABLE tf_inc)
include_directories(${tf_inc} "my/")
link_directories("/usr/lib64/" "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow-core/third_party/erpc_lib/lib/")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
add_library(my_op SHARED my_op.cc)
target_link_libraries(my_op tbb protobuf)

Robin on Linux

The CSE (Common Subexpression Elimination) problem about running custom operation in Tensorflow

Leave a Reply Cancel reply

Robin on Linux

Related Posts

Leave a Reply Cancel reply