diff --git a/NEWS.md b/NEWS.md index f278570c4..2567b3077 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,9 @@ - Default TF version installed by `install_keras()` is now 2.13. -- `layer_batch_normalization()` updated signature, with changes to options for distributed training. +- Updated layers: + - `layer_batch_normalization()` updated signature, with changes to options for distributed training. + - `layer_embedding()` gains a `sparse` argument. - Fixed deadlock when an R generator was passed to `fit()`, `predict()`, and other endpoints. diff --git a/R/layers-embedding.R b/R/layers-embedding.R index 3bcccbab7..db366dd2d 100644 --- a/R/layers-embedding.R +++ b/R/layers-embedding.R @@ -1,56 +1,72 @@ -#' Turns positive integers (indexes) into dense vectors of fixed size. -#' -#' For example, `list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))` This layer -#' can only be used as the first layer in a model. -#' -#' @inheritParams layer_dense -#' -#' @param input_dim int > 0. Size of the vocabulary, i.e. maximum integer -#' index + 1. -#' @param output_dim int >= 0. Dimension of the dense embedding. -#' @param embeddings_initializer Initializer for the `embeddings` matrix. -#' @param embeddings_regularizer Regularizer function applied to the -#' `embeddings` matrix. -#' @param activity_regularizer activity_regularizer -#' @param embeddings_constraint Constraint function applied to the `embeddings` -#' matrix. -#' @param mask_zero Whether or not the input value 0 is a special "padding" -#' value that should be masked out. This is useful when using recurrent -#' layers, which may take variable length inputs. If this is `TRUE` then all -#' subsequent layers in the model need to support masking or an exception will -#' be raised. If mask_zero is set to TRUE, as a consequence, index 0 cannot be -#' used in the vocabulary (input_dim should equal size of vocabulary + 1). -#' @param input_length Length of input sequences, when it is constant. This -#' argument is required if you are going to connect `Flatten` then `Dense` -#' layers upstream (without it, the shape of the dense outputs cannot be -#' computed). +#' Turns positive integers (indexes) into dense vectors of fixed size +#' +#' @details +#' For example, `list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))`. +#' +#' This layer can only be used on positive integer inputs of a fixed range. The +#' `layer_text_vectorization()`, `layer_string_lookup()`, +#' and `layer_integer_lookup()` preprocessing layers can help prepare +#' inputs for an `Embedding` layer. +#' +#' This layer accepts `tf.Tensor`, `tf.RaggedTensor` and `tf.SparseTensor` +#' input. +#' +#' @param input_dim Integer. Size of the vocabulary, +#' i.e. maximum integer index + 1. +#' +#' @param output_dim Integer. Dimension of the dense embedding. +#' +#' @param embeddings_initializer Initializer for the `embeddings` +#' matrix (see `keras.initializers`). +#' +#' @param embeddings_regularizer Regularizer function applied to +#' the `embeddings` matrix (see `keras.regularizers`). +#' +#' @param embeddings_constraint Constraint function applied to +#' the `embeddings` matrix (see `keras.constraints`). +#' +#' @param mask_zero Boolean, whether or not the input value 0 is a special +#' "padding" value that should be masked out. This is useful when using +#' recurrent layers which may take variable length input. If this is +#' `TRUE`, then all subsequent layers in the model need to support masking +#' or an exception will be raised. If mask_zero is set to TRUE, as a +#' consequence, index 0 cannot be used in the vocabulary (input_dim should +#' equal size of vocabulary + 1). +#' +#' @param input_length Length of input sequences, when it is constant. +#' This argument is required if you are going to connect +#' `Flatten` then `Dense` layers upstream +#' (without it, the shape of the dense outputs cannot be computed). +#' +#' @param sparse If TRUE, calling this layer returns a `tf.SparseTensor`. If FALSE, +#' the layer returns a dense `tf.Tensor`. For an entry with no features in +#' a sparse tensor (entry with value 0), the embedding vector of index 0 is +#' returned by default. +#' @param ... standard layer arguments. #' #' @section Input shape: 2D tensor with shape: `(batch_size, sequence_length)`. #' #' @section Output shape: 3D tensor with shape: `(batch_size, sequence_length, #' output_dim)`. #' -#' @section References: -#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) -#' +#' @seealso +#' + +#' + #' @export -layer_embedding <- function(object, input_dim, output_dim, embeddings_initializer = "uniform", embeddings_regularizer = NULL, - activity_regularizer = NULL, embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$Embedding, object, list( - input_dim = as.integer(input_dim), - output_dim = as.integer(output_dim), - embeddings_initializer = embeddings_initializer, - embeddings_regularizer = embeddings_regularizer, - activity_regularizer = activity_regularizer, - embeddings_constraint = embeddings_constraint, - mask_zero = mask_zero, - input_length = if (!is.null(input_length)) as.integer(input_length) else NULL, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) -} +layer_embedding <- + function(object, input_dim, output_dim, embeddings_initializer = "uniform", + embeddings_regularizer = NULL, activity_regularizer = NULL, + embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL, + sparse = FALSE, ...) + { + args <- capture_args(match.call(), list( + input_dim = as.integer, + output_dim = as.integer, + input_length = as_nullable_integer, + batch_size = as_nullable_integer, + ), ignore = "object") + create_layer(keras$layers$Embedding, object, args) + } + diff --git a/man/layer_embedding.Rd b/man/layer_embedding.Rd index 441b4d7db..7b362eb99 100644 --- a/man/layer_embedding.Rd +++ b/man/layer_embedding.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/layers-embedding.R \name{layer_embedding} \alias{layer_embedding} -\title{Turns positive integers (indexes) into dense vectors of fixed size.} +\title{Turns positive integers (indexes) into dense vectors of fixed size} \usage{ layer_embedding( object, @@ -14,62 +14,58 @@ layer_embedding( embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL, - batch_size = NULL, - name = NULL, - trainable = NULL, - weights = NULL + sparse = FALSE, + ... ) } \arguments{ -\item{object}{What to compose the new \code{Layer} instance with. Typically a -Sequential model or a Tensor (e.g., as returned by \code{layer_input()}). -The return value depends on \code{object}. If \code{object} is: -\itemize{ -\item missing or \code{NULL}, the \code{Layer} instance is returned. -\item a \code{Sequential} model, the model with an additional layer is returned. -\item a Tensor, the output tensor from \code{layer_instance(object)} is returned. -}} - -\item{input_dim}{int > 0. Size of the vocabulary, i.e. maximum integer -index + 1.} - -\item{output_dim}{int >= 0. Dimension of the dense embedding.} - -\item{embeddings_initializer}{Initializer for the \code{embeddings} matrix.} - -\item{embeddings_regularizer}{Regularizer function applied to the -\code{embeddings} matrix.} +\item{input_dim}{Integer. Size of the vocabulary, +i.e. maximum integer index + 1.} -\item{activity_regularizer}{activity_regularizer} +\item{output_dim}{Integer. Dimension of the dense embedding.} -\item{embeddings_constraint}{Constraint function applied to the \code{embeddings} -matrix.} +\item{embeddings_initializer}{Initializer for the \code{embeddings} +matrix (see \code{keras.initializers}).} -\item{mask_zero}{Whether or not the input value 0 is a special "padding" -value that should be masked out. This is useful when using recurrent -layers, which may take variable length inputs. If this is \code{TRUE} then all -subsequent layers in the model need to support masking or an exception will -be raised. If mask_zero is set to TRUE, as a consequence, index 0 cannot be -used in the vocabulary (input_dim should equal size of vocabulary + 1).} +\item{embeddings_regularizer}{Regularizer function applied to +the \code{embeddings} matrix (see \code{keras.regularizers}).} -\item{input_length}{Length of input sequences, when it is constant. This -argument is required if you are going to connect \code{Flatten} then \code{Dense} -layers upstream (without it, the shape of the dense outputs cannot be -computed).} +\item{embeddings_constraint}{Constraint function applied to +the \code{embeddings} matrix (see \code{keras.constraints}).} -\item{batch_size}{Fixed batch size for layer} +\item{mask_zero}{Boolean, whether or not the input value 0 is a special +"padding" value that should be masked out. This is useful when using +recurrent layers which may take variable length input. If this is +\code{TRUE}, then all subsequent layers in the model need to support masking +or an exception will be raised. If mask_zero is set to TRUE, as a +consequence, index 0 cannot be used in the vocabulary (input_dim should +equal size of vocabulary + 1).} -\item{name}{An optional name string for the layer. Should be unique in a -model (do not reuse the same name twice). It will be autogenerated if it -isn't provided.} +\item{input_length}{Length of input sequences, when it is constant. +This argument is required if you are going to connect +\code{Flatten} then \code{Dense} layers upstream +(without it, the shape of the dense outputs cannot be computed).} -\item{trainable}{Whether the layer weights will be updated during training.} +\item{sparse}{If TRUE, calling this layer returns a \code{tf.SparseTensor}. If FALSE, +the layer returns a dense \code{tf.Tensor}. For an entry with no features in +a sparse tensor (entry with value 0), the embedding vector of index 0 is +returned by default.} -\item{weights}{Initial weights for layer.} +\item{...}{standard layer arguments.} } \description{ -For example, \code{list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))} This layer -can only be used as the first layer in a model. +Turns positive integers (indexes) into dense vectors of fixed size +} +\details{ +For example, \code{list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))}. + +This layer can only be used on positive integer inputs of a fixed range. The +\code{layer_text_vectorization()}, \code{layer_string_lookup()}, +and \code{layer_integer_lookup()} preprocessing layers can help prepare +inputs for an \code{Embedding} layer. + +This layer accepts \code{tf.Tensor}, \code{tf.RaggedTensor} and \code{tf.SparseTensor} +input. } \section{Input shape}{ 2D tensor with shape: \verb{(batch_size, sequence_length)}. @@ -79,10 +75,9 @@ can only be used as the first layer in a model. 3D tensor with shape: \verb{(batch_size, sequence_length, output_dim)}. } -\section{References}{ - +\seealso{ \itemize{ -\item \href{https://arxiv.org/abs/1512.05287}{A Theoretically Grounded Application of Dropout in Recurrent Neural Networks} +\item \url{https://www.tensorflow.org/versions/r2.13/api_docs/python/keras/src/layers/core/embedding/Embedding} +\item \url{https://keras.io/api/layers} } } - diff --git a/tools/find-api-diffs.R b/tools/find-api-diffs.R index 8969d6323..b33c9cbb6 100644 --- a/tools/find-api-diffs.R +++ b/tools/find-api-diffs.R @@ -4,6 +4,7 @@ library(dplyr, warn.conflicts = FALSE) library(reticulate) library(envir) +use_virtualenv("r-keras") # keras::install_keras(envname = "tf-2.6-cpu") # tools/setup-test-envs.R # use_miniconda("tf-2.6-cpu", required=TRUE) @@ -20,7 +21,7 @@ py_to_r_python.builtin.dict_items <- function(x) { attach_eval({ inspect <- reticulate::import("inspect") - # import_from(magrittr, `%<>%`) + import_from(magrittr, `%<>%`) `%error%` <- function(x, y) tryCatch(x, error = function(e) y) diff --git a/tools/make-layer-wrapper.R b/tools/make-layer-wrapper.R index 06f4413b8..d066f5f07 100755 --- a/tools/make-layer-wrapper.R +++ b/tools/make-layer-wrapper.R @@ -2,7 +2,7 @@ library(tidyverse) library(tensorflow) library(keras) - +use_virtualenv("r-keras") stopifnot(interactive()) inspect <- reticulate::import("inspect") @@ -161,6 +161,8 @@ print.r_py_wrapper2 <- function(x, ...) { +new_layer_wrapper(keras$layers$Embedding) + new_layer_wrapper(keras$layers$BatchNormalization) ## example usage: