Add quantization support to converters. (#93)

adarob · pyu10055 · commit 68b0130b2ad4 · 2018-04-25T21:49:45.000-07:00
* Add optional weight quantization.

* lint

* Add checks

* Remove cruft and update doc.

* Support int32 quantization.

* Add documentation.

* Fixed typo and corrected weight manifest entry.

* Add precise zero point scaling.

* Add precise zero point scaling.

* Move quantization to util and add tests.

* Add quantization support to read_weights.

* Fix doc

* Lint

* Add all equal test and fix revealed bug.

* Fixes based on reviewer comments.

* Remove unused conditions.

* Respond to reviewer comments.

* Respond to reviewer comments.

* Respond to reviewer comments.

* Responding to reviewer comments.

* Fix python2 failures.

* Fix failing test.

* Fix pip bug

* merge

* update desc

* Add testing TODO

* Respond to reviewer comments
diff --git a/python/tensorflowjs/converters/converter.py b/python/tensorflowjs/converters/converter.py
@@ -23,11 +23,13 @@
 
 import h5py
 
+from tensorflowjs import quantization
 from tensorflowjs.converters import keras_h5_conversion
 from tensorflowjs.converters import tf_saved_model_conversion
 
 
-def dispatch_pykeras_conversion(h5_path, output_dir=None):
+def dispatch_pykeras_conversion(
+    h5_path, output_dir=None, quantization_dtype=None):
   """Converts a Keras HDF5 saved-model file to TensorFlow.js format.
 
   Auto-detects saved_model versus weights-only and generates the correct
@@ -66,7 +68,8 @@ def dispatch_pykeras_conversion(h5_path, output_dir=None):
           'Output path "%s" already exists as a file' % output_dir)
     elif not os.path.isdir(output_dir):
       os.makedirs(output_dir)
-    converter.write_artifacts(model_json, groups, output_dir)
+    converter.write_artifacts(
+        model_json, groups, output_dir, quantization_dtype)
 
   return model_json, groups
 
@@ -106,9 +109,20 @@ def main():
       '"tf_saved_model".')
   parser.add_argument(
       'output_dir', type=str, help='Path for all output artifacts.')
+  parser.add_argument(
+      '--quantization_bytes',
+      type=int,
+      choices=set(quantization.QUANTIZATION_BYTES_TO_DTYPES.keys()),
+      help='How many bytes to optionally quantize/compress the weights to. 1- '
+      'and 2-byte quantizaton is supported. The default (unquantized) size is '
+      '4 bytes.')
 
   FLAGS = parser.parse_args()
 
+  quantization_dtype = (
+      quantization.QUANTIZATION_BYTES_TO_DTYPES[FLAGS.quantization_bytes]
+      if FLAGS.quantization_bytes else None)
+
   # TODO(cais, piyu): More conversion logics can be added as additional
   #   branches below.
   if FLAGS.input_format == 'keras':
@@ -118,15 +132,17 @@ def main():
           '"tensorflow", but the current input format is "keras".')
 
     dispatch_pykeras_conversion(
-        FLAGS.input_path, output_dir=FLAGS.output_dir)
+        FLAGS.input_path, output_dir=FLAGS.output_dir,
+        quantization_dtype=quantization_dtype)
   elif FLAGS.input_format == 'tf_saved_model':
     tf_saved_model_conversion.convert_tf_saved_model(
         FLAGS.input_path, FLAGS.output_node_names,
-        FLAGS.output_dir, saved_model_tags=FLAGS.saved_model_tags)
+        FLAGS.output_dir, saved_model_tags=FLAGS.saved_model_tags,
+        quantization_dtype=quantization_dtype)
   elif FLAGS.input_format == 'tf_session_bundle':
     tf_saved_model_conversion.convert_tf_session_bundle(
         FLAGS.input_path, FLAGS.output_node_names,
-        FLAGS.output_dir)
+        FLAGS.output_dir, quantization_dtype=quantization_dtype)
   else:
     raise ValueError('Invalid input format: \'%s\'' % FLAGS.input_format)
 
diff --git a/python/tensorflowjs/converters/converter_test.py b/python/tensorflowjs/converters/converter_test.py
@@ -29,6 +29,8 @@
 
 from tensorflowjs.converters import converter
 
+# TODO(adarob): Add tests for quantization option.
+
 
 class ConvertH5WeightsTest(unittest.TestCase):
 
diff --git a/python/tensorflowjs/converters/keras_h5_conversion.py b/python/tensorflowjs/converters/keras_h5_conversion.py
@@ -213,7 +213,8 @@ def h5_weights_to_tfjs_format(self, h5file):
   def write_artifacts(self,
                       topology,
                       weights,
-                      output_dir):
+                      output_dir,
+                      quantization_dtype=None):
     """Writes weights and topology to the output_dir.
 
     If `topology` is Falsy (e.g., `None`), only emit weights to output_dir.
@@ -222,6 +223,8 @@ def write_artifacts(self,
       topology: a JSON dictionary, representing the Keras config.
       weights: an array of weight groups (as defined in tfjs write_weights).
       output_dir: the directory to hold all the contents.
+      quantization_dtype: An optional numpy dtype to quantize weights to for
+        compression. Only np.uint8 and np.uint16 are supported.
     """
     # TODO(cais, nielsene): This method should allow optional arguments of
     #   `write_weights.write_weights` (e.g., shard size) and forward them.
@@ -235,7 +238,8 @@ def write_artifacts(self,
 
     model_json[ARTIFACT_MODEL_TOPOLOGY_KEY] = topology or None
     weights_manifest = write_weights.write_weights(
-        weights, output_dir, write_manifest=False)
+        weights, output_dir, write_manifest=False,
+        quantization_dtype=quantization_dtype)
     if not isinstance(weights_manifest, list):
       weights_manifest = json.loads(weights_manifest)
     assert isinstance(weights_manifest, list)
@@ -246,7 +250,7 @@ def write_artifacts(self,
       json.dump(model_json, f)
 
 
-def save_keras_model(model, artifacts_dir):
+def save_keras_model(model, artifacts_dir, quantization_dtype=None):
   r"""Save a Keras model and its weights in TensorFlow.js format.
 
   Args:
@@ -263,6 +267,8 @@ def save_keras_model(model, artifacts_dir):
         - files containing weight values in groups, with the file name pattern
           group(\d+)-shard(\d+)of(\d+).
       If the directory does not exist, this function will attempt to create it.
+    quantization_dtype: An optional numpy dtype to quantize weights to for
+        compression. Only np.uint8 and np.uint16 are supported.
 
   Raises:
     ValueError: If `artifacts_dir` already exists as a file (not a directory).
@@ -277,5 +283,7 @@ def save_keras_model(model, artifacts_dir):
     raise ValueError('Path "%s" already exists as a file.' % artifacts_dir)
   elif not os.path.isdir(artifacts_dir):
     os.makedirs(artifacts_dir)
-  converter.write_artifacts(topology_json, weights_group, artifacts_dir)
+  converter.write_artifacts(
+      topology_json, weights_group, artifacts_dir,
+      quantization_dtype=quantization_dtype)
   os.remove(temp_h5_path)
diff --git a/python/tensorflowjs/converters/tf_saved_model_conversion.py b/python/tensorflowjs/converters/tf_saved_model_conversion.py
@@ -86,7 +86,7 @@ def validate(nodes):
   return not_supported
 
 
-def optimize_graph(graph, output_graph):
+def optimize_graph(graph, output_graph, quantization_dtype=None):
   """Takes a Python Graph object and optimizes the graph.
 
   Args:
@@ -102,16 +102,20 @@ def optimize_graph(graph, output_graph):
   optimized_graph = tf_optimizer.OptimizeGraph(
       rewriter_config, meta_graph, cluster=get_cluster())
 
-  extract_weights(optimized_graph, output_graph)
+  extract_weights(optimized_graph, output_graph, quantization_dtype)
   return optimize_graph
 
 
-def extract_weights(graph_def, output_graph):
+def extract_weights(graph_def,
+                    output_graph,
+                    quantization_dtype=None):
   """Takes a Python GraphDef object and extract the weights.
 
   Args:
     graph_def: tf.GraphDef tensorflow GraphDef proto object, which represents
       the model topology
+    quantization_dtype: An optional numpy dtype to quantize weights to for
+        compression. Only np.uint8 and np.uint16 are supported.
   """
   constants = [node for node in graph_def.node if node.op == 'Const']
   constInputs = {}
@@ -140,15 +144,17 @@ def extract_weights(graph_def, output_graph):
       # Remove the binary array from tensor and save it to the external file.
       const.attr["value"].tensor.ClearField('tensor_content')
 
-  write_weights.write_weights([const_manifest], path)
+  write_weights.write_weights(
+      [const_manifest], path, quantization_dtype=quantization_dtype)
 
   file_io.atomic_write_string_to_file(
       os.path.abspath(output_graph), graph_def.SerializeToString())
 
 
 def convert_tf_session_bundle(session_bundle_dir,
                               output_node_names,
-                              output_dir):
+                              output_dir,
+                              quantization_dtype=None):
   """Freeze the Session Bundle model and check the model compatibility with
   Tensorflow.js.
 
@@ -163,6 +169,8 @@ def convert_tf_session_bundle(session_bundle_dir,
       - a file named 'tensorflowjs_model.pb'
       - a JSON weights manifest file named 'weights_manifest.json'
       - possibly sharded binary weight files.
+    quantization_dtype: An optional numpy dtype to quantize weights to for
+      compression. Only np.uint8 and np.uint16 are supported.
   """
 
   print("Tensorflow has deprecated the Session Bundle format, ",
@@ -191,15 +199,16 @@ def convert_tf_session_bundle(session_bundle_dir,
   if unsupported:
     print('Unsupported Ops in the model\n' + ', '.join(unsupported))
   else:
-    optimize_graph(graph, output_graph)
+    optimize_graph(graph, output_graph, quantization_dtype)
 
   # Clean up the temp files.
   if os.path.exists(frozen_file):
     os.remove(frozen_file)
 
 
 def convert_tf_saved_model(saved_model_dir, output_node_names,
-                           output_dir, saved_model_tags='serve'):
+                           output_dir, saved_model_tags='serve',
+                           quantization_dtype=None):
   """Freeze the SavedModel and check the model compatibility with Tensorflow.js.
 
   Optimize and convert the model to Tensorflow.js format, when the model passes
@@ -215,6 +224,8 @@ def convert_tf_saved_model(saved_model_dir, output_node_names,
       - possibly sharded binary weight files.
     saved_model_tags: string Tagset of the MetaGraphDef to load, in comma
       separated string format. Defaulted to 'serve'
+    quantization_dtype: An optional numpy dtype to quantize weights to for
+      compression. Only np.uint8 and np.uint16 are supported.
   """
 
   if not os.path.exists(output_dir):
@@ -240,7 +251,7 @@ def convert_tf_saved_model(saved_model_dir, output_node_names,
   if unsupported:
     print('Unsupported Ops in the model\n' + ', '.join(unsupported))
   else:
-    optimize_graph(graph, output_graph)
+    optimize_graph(graph, output_graph, quantization_dtype)
 
   # Clean up the temp files.
   if os.path.exists(frozen_file):
diff --git a/python/tensorflowjs/quantization.py b/python/tensorflowjs/quantization.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-_QUANTIZATION_DTYPES = [np.uint8, np.uint16]
+QUANTIZATION_BYTES_TO_DTYPES = {1: np.uint8, 2: np.uint16}
 
 
 def quantize_weights(data, quantization_dtype):
@@ -47,7 +47,7 @@ def quantize_weights(data, quantization_dtype):
   Raises:
     ValueError: if `quantization_dtype` is not a valid type.
   """
-  if quantization_dtype not in _QUANTIZATION_DTYPES:
+  if quantization_dtype not in QUANTIZATION_BYTES_TO_DTYPES.values():
     raise ValueError('Invalid `quantization_dtype`: %r' % quantization_dtype)
 
   # Compute the min and max for the group.
@@ -97,7 +97,7 @@ def _get_quantization_range(min_val, max_val, quantization_dtype):
   Raises:
     ValueError: if `quantization_dtype` is not a valid type.
   """
-  if quantization_dtype not in _QUANTIZATION_DTYPES:
+  if quantization_dtype not in QUANTIZATION_BYTES_TO_DTYPES.values():
     raise ValueError('Invalid `quantization_dtype`: %r' % quantization_dtype)
 
   quant_max = np.iinfo(quantization_dtype).max