diff --git a/site/en/tutorials/load_data/pandas_dataframe.ipynb b/site/en/tutorials/load_data/pandas_dataframe.ipynb index c35cc75fc7..b9d0763e06 100644 --- a/site/en/tutorials/load_data/pandas_dataframe.ipynb +++ b/site/en/tutorials/load_data/pandas_dataframe.ipynb @@ -447,79 +447,77 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "U3QDo-jwHYXc" + "id": "voDoA447GBC3" }, "outputs": [], "source": [ - "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))" + "numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n", + "target_array = target.to_numpy()[:, tf.newaxis]" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "yyEERK9ldIi_" + "id": "U3QDo-jwHYXc" }, + "outputs": [], "source": [ - "Here are the first three examples from that dataset:" + "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "q0tDwk0VdH6D" + "id": "HL4Bf1b7M7DT" }, "outputs": [], "source": [ - "for row in numeric_dict_ds.take(3):\n", - " print(row)" + "len(numeric_features_dict)" ] }, { "cell_type": "markdown", "metadata": { - "id": "DEAM6HAFxlMy" + "id": "yyEERK9ldIi_" }, "source": [ - "### Dictionaries with Keras" + "Here are the first three examples from that dataset:" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "dnoyoWLWx07i" + "id": "q0tDwk0VdH6D" }, + "outputs": [], "source": [ - "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", - "\n", - "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." + "for row in numeric_dict_ds.take(3):\n", + " print(row)" ] }, { "cell_type": "markdown", "metadata": { - "id": "5xUTrm0apDTr" + "id": "dnoyoWLWx07i" }, "source": [ - "#### 1. The Model-subclass style\n", + "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n", "\n", - "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" + "There are two equivalent ways you can write a Keras model that accepts a dictionary as input." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "Zc3HV99CFRWL" + "id": "5xUTrm0apDTr" }, - "outputs": [], "source": [ - " def stack_dict(inputs, fun=tf.stack):\n", - " values = []\n", - " for key in sorted(inputs.keys()):\n", - " values.append(tf.cast(inputs[key], tf.float32))\n", + "### 1. The Model-subclass style\n", "\n", - " return fun(values, axis=-1)" + "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:" ] }, { @@ -545,14 +543,23 @@ " tf.keras.layers.Dense(1)\n", " ])\n", "\n", + " self.concat = tf.keras.layers.Concatenate(axis=1)\n", + "\n", + " def _stack(self, input_dict):\n", + " values = []\n", + " for key, value in sorted(input_dict.items()):\n", + " values.append(value)\n", + "\n", + " return self.concat(values)\n", + "\n", " def adapt(self, inputs):\n", " # Stack the inputs and `adapt` the normalization layer.\n", - " inputs = stack_dict(inputs)\n", + " inputs = self._stack(inputs)\n", " self.normalizer.adapt(inputs)\n", "\n", " def call(self, inputs):\n", " # Stack the inputs\n", - " inputs = stack_dict(inputs)\n", + " inputs = self._stack(inputs)\n", " # Run them through all the layers.\n", " result = self.seq(inputs)\n", "\n", @@ -560,7 +567,7 @@ "\n", "model = MyModel()\n", "\n", - "model.adapt(dict(numeric_features))\n", + "model.adapt(numeric_features_dict)\n", "\n", "model.compile(optimizer='adam',\n", " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", @@ -585,7 +592,7 @@ }, "outputs": [], "source": [ - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" + "model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)" ] }, { @@ -626,7 +633,7 @@ "id": "QIIdxIYm13Ik" }, "source": [ - "#### 2. The Keras functional style" + "### 2. The Keras functional style" ] }, { @@ -653,10 +660,13 @@ }, "outputs": [], "source": [ - "x = stack_dict(inputs, fun=tf.concat)\n", + "xs = [value for key, value in sorted(inputs.items())]\n", + "\n", + "concat = tf.keras.layers.Concatenate(axis=1)\n", + "x = concat(xs)\n", "\n", "normalizer = tf.keras.layers.Normalization(axis=-1)\n", - "normalizer.adapt(stack_dict(dict(numeric_features)))\n", + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n", "\n", "x = normalizer(x)\n", "x = tf.keras.layers.Dense(10, activation='relu')(x)\n", @@ -679,7 +689,7 @@ }, "outputs": [], "source": [ - "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)" + "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" ] }, { @@ -699,7 +709,7 @@ }, "outputs": [], "source": [ - "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)" + "model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)" ] }, { @@ -807,7 +817,7 @@ " else:\n", " dtype = tf.float32\n", "\n", - " inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)" + " inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)" ] }, { @@ -853,9 +863,7 @@ "\n", "for name in binary_feature_names:\n", " inp = inputs[name]\n", - " inp = inp[:, tf.newaxis]\n", - " float_value = tf.cast(inp, tf.float32)\n", - " preprocessed.append(float_value)\n", + " preprocessed.append(inp)\n", "\n", "preprocessed" ] @@ -880,7 +888,7 @@ "outputs": [], "source": [ "normalizer = tf.keras.layers.Normalization(axis=-1)\n", - "normalizer.adapt(stack_dict(dict(numeric_features)))" + "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))" ] }, { @@ -900,11 +908,11 @@ }, "outputs": [], "source": [ - "numeric_inputs = {}\n", + "numeric_inputs = []\n", "for name in numeric_feature_names:\n", - " numeric_inputs[name]=inputs[name]\n", + " numeric_inputs.append(inputs[name])\n", "\n", - "numeric_inputs = stack_dict(numeric_inputs)\n", + "numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n", "numeric_normalized = normalizer(numeric_inputs)\n", "\n", "preprocessed.append(numeric_normalized)\n", @@ -986,7 +994,7 @@ " else:\n", " lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n", "\n", - " x = inputs[name][:, tf.newaxis]\n", + " x = inputs[name]\n", " x = lookup(x)\n", " preprocessed.append(x)" ] @@ -1037,7 +1045,7 @@ }, "outputs": [], "source": [ - "preprocessed_result = tf.concat(preprocessed, axis=-1)\n", + "preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n", "preprocessed_result" ] }, @@ -1069,7 +1077,7 @@ }, "outputs": [], "source": [ - "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)" + "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True, show_layer_names=True)" ] }, { @@ -1184,6 +1192,17 @@ " metrics=['accuracy'])" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i_Z2C2ZcZ3oC" + }, + "outputs": [], + "source": [ + "tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1259,7 +1278,6 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "pandas_dataframe.ipynb", "toc_visible": true },