From ea38542a2e544489d9619590d3f9516571832332 Mon Sep 17 00:00:00 2001 From: christianknoepfle <98997149+christianknoepfle@users.noreply.github.com> Date: Tue, 1 Aug 2023 23:42:10 +0200 Subject: [PATCH] Support Spark 3.4 (#754) * first try adding 3.4 support * first try adding 3.4 support * first try adding 3.4 support * first try adding 3.4 support * moved main logic of ExcelOptions to ExcelOptionsTrait * cleanup imports * removed accidental changes --- .github/workflows/ci.yml | 2 +- README.md | 2 +- build.sc | 47 ++++++-- .../spark/excel/v2/ExcelOptions.scala | 38 +++++++ .../spark/excel/v2/ExcelOptions.scala | 38 +++++++ .../v2/ExcelPartitionReaderFactory.scala | 0 .../crealytics/spark/excel/v2/ExcelScan.scala | 0 .../spark/excel/v2/ExcelScanBuilder.scala | 0 .../spark/excel/v2/ExcelOptions.scala | 41 +++++++ .../v2/ExcelPartitionReaderFactory.scala | 100 ++++++++++++++++++ ...lOptions.scala => ExcelOptionsTrait.scala} | 53 +++++----- 11 files changed, 282 insertions(+), 39 deletions(-) create mode 100644 src/main/2.4/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala create mode 100644 src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala rename src/main/{3.0_and_up => 3.0_3.1_3.2_3.3}/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala (100%) rename src/main/{3.3 => 3.3_and_up}/scala/com/crealytics/spark/excel/v2/ExcelScan.scala (100%) rename src/main/{3.3 => 3.3_and_up}/scala/com/crealytics/spark/excel/v2/ExcelScanBuilder.scala (100%) create mode 100644 src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala create mode 100644 src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala rename src/main/scala/com/crealytics/spark/excel/v2/{ExcelOptions.scala => ExcelOptionsTrait.scala} (81%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4525ef5..c1e444f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: os: [ubuntu-latest] scala: [2.12.18, 2.13.11] java: [temurin@8] - spark: [2.4.1, 2.4.7, 2.4.8, 3.0.1, 3.0.3, 3.1.1, 3.1.2, 3.1.3, 3.2.4, 3.3.1] + spark: [2.4.1, 2.4.7, 2.4.8, 3.0.1, 3.0.3, 3.1.1, 3.1.2, 3.1.3, 3.2.4, 3.3.2, 3.4.1] exclude: - spark: 2.4.1 scala: 2.13.11 diff --git a/README.md b/README.md index 8bcd845e..7ad050f7 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ This library requires Spark 2.0+. List of spark versions, those are automatically tested: ``` -spark: ["2.4.1", "2.4.7", "2.4.8", "3.0.1", "3.0.3", "3.1.1", "3.1.2", "3.2.1"] +spark: ["2.4.1", "2.4.7", "2.4.8", "3.0.1", "3.0.3", "3.1.1", "3.1.2", "3.2.4", "3.3.2", "3.4.1"] ``` For more detail, please refer to project CI: [ci.yml](https://github.com/crealytics/spark-excel/blob/main/.github/workflows/ci.yml#L10) diff --git a/build.sc b/build.sc index 77a21163..eb179b50 100644 --- a/build.sc +++ b/build.sc @@ -4,25 +4,30 @@ import coursier.maven.MavenRepository import mill._, scalalib._, publish._ import mill.modules.Assembly._ -class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule with CiReleaseModule { outer => +class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule with CiReleaseModule { + outer => override def scalaVersion = _scalaVersion + override def millSourcePath = super.millSourcePath / os.up / os.up / os.up // Custom source layout for Spark Data Source API 2 - val sparkVersionSpecificSources = if (sparkVersion >= "3.3.0") { - Seq("scala", "3.3/scala", "3.0_and_up/scala", "3.1_and_up/scala", "3.2_and_up/scala") + val sparkVersionSpecificSources = if (sparkVersion >= "3.4.0") { + Seq("scala", "3.0_and_up/scala", "3.1_and_up/scala", "3.2_and_up/scala", "3.3_and_up/scala", "3.4_and_up/scala") + } else if (sparkVersion >= "3.3.0") { + Seq("scala", "3.0_3.1_3.2_3.3/scala", "3.0_and_up/scala", "3.1_and_up/scala", "3.2_and_up/scala", "3.3_and_up/scala") } else if (sparkVersion >= "3.2.0") { - Seq("scala", "3.0_3.1_3.2/scala", "3.0_and_up/scala", "3.1_and_up/scala", "3.2_and_up/scala") + Seq("scala", "3.0_3.1_3.2/scala", "3.0_3.1_3.2_3.3/scala", "3.0_and_up/scala", "3.1_and_up/scala", "3.2_and_up/scala") } else if (sparkVersion >= "3.1.0") { - Seq("scala", "3.1/scala", "3.0_3.1/scala", "3.0_3.1_3.2/scala", "3.0_and_up/scala", "3.1_and_up/scala") + Seq("scala", "3.1/scala", "3.0_3.1/scala", "3.0_3.1_3.2_3.3/scala", "3.0_3.1_3.2/scala", "3.0_and_up/scala", "3.1_and_up/scala") } else if (sparkVersion >= "3.0.0") { - Seq("scala", "3.0/scala", "3.0_3.1/scala", "3.0_3.1_3.2/scala", "3.0_and_up/scala") + Seq("scala", "3.0/scala", "3.0_3.1/scala", "3.0_3.1_3.2_3.3/scala", "3.0_3.1_3.2/scala", "3.0_and_up/scala") } else if (sparkVersion >= "2.4.0") { Seq("scala", "2.4/scala") } else { throw new UnsupportedOperationException(s"sparkVersion ${sparkVersion} is not supported") } + override def sources = T.sources { super.sources() ++ sparkVersionSpecificSources.map(s => PathRef(millSourcePath / "src" / "main" / os.RelPath(s))) } @@ -30,7 +35,9 @@ class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule override def docSources = T.sources(Seq[PathRef]()) override def artifactName = "spark-excel" + override def publishVersion = s"${sparkVersion}_${super.publishVersion()}" + def pomSettings = PomSettings( description = "A Spark plugin for reading and writing Excel files", organization = "com.crealytics", @@ -45,6 +52,7 @@ class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule Rule.Relocate("org.apache.commons.io.**", "shadeio.commons.io.@1"), Rule.Relocate("org.apache.commons.compress.**", "shadeio.commons.compress.@1") ) + override def extraPublish = Seq(PublishInfo(assembly(), classifier = None, ivyConfig = "compile")) val sparkDeps = Agg( @@ -52,6 +60,7 @@ class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule ivy"org.apache.spark::spark-sql:$sparkVersion", ivy"org.apache.spark::spark-hive:$sparkVersion" ) + override def compileIvyDeps = if (sparkVersion < "3.3.0") { sparkDeps ++ Agg(ivy"org.slf4j:slf4j-api:1.7.36".excludeOrg("stax")) } else { @@ -59,6 +68,7 @@ class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule } val poiVersion = "5.2.3" + override def ivyDeps = { val base = Agg( ivy"org.apache.poi:poi:$poiVersion", @@ -84,15 +94,25 @@ class SparkModule(_scalaVersion: String, sparkVersion: String) extends SbtModule base } } + object test extends Tests with SbtModule with TestModule.ScalaTest { override def millSourcePath = super.millSourcePath + override def sources = T.sources { Seq(PathRef(millSourcePath / "src" / "test" / "scala")) } - override def resources = T.sources { Seq(PathRef(millSourcePath / "src" / "test" / "resources")) } + + override def resources = T.sources { + Seq(PathRef(millSourcePath / "src" / "test" / "resources")) + } + def scalaVersion = outer.scalaVersion() - def repositoriesTask = T.task { super.repositoriesTask() ++ Seq(MavenRepository("https://jitpack.io")) } + + def repositoriesTask = T.task { + super.repositoriesTask() ++ Seq(MavenRepository("https://jitpack.io")) + } + def ivyDeps = sparkDeps ++ Agg( ivy"org.typelevel::cats-core:2.9.0", ivy"org.scalatest::scalatest:3.2.16", @@ -111,11 +131,16 @@ val spark24 = List("2.4.1", "2.4.7", "2.4.8") val spark30 = List("3.0.1", "3.0.3") val spark31 = List("3.1.1", "3.1.2", "3.1.3") val spark32 = List("3.2.4") -val spark33 = List("3.3.1") +val spark33 = List("3.3.2") +val spark34 = List("3.4.1") + +val crossMatrix = { -val crossMatrix = - (spark24 ++ spark30 ++ spark31 ++ spark32 ++ spark33).map(spark => (scala212, spark)) ++ (spark32 ++ spark33).map( + (spark24 ++ spark30 ++ spark31 ++ spark32 ++ spark33 ++ spark34).map(spark => (scala212, spark)) ++ (spark32 ++ spark33 ++ spark34).map( spark => (scala213, spark) ) + // (spark34).map(spark => (scala212, spark)) +} + object `spark-excel` extends Cross[SparkModule](crossMatrix: _*) {} diff --git a/src/main/2.4/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala b/src/main/2.4/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala new file mode 100644 index 00000000..9c76d2ca --- /dev/null +++ b/src/main/2.4/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala @@ -0,0 +1,38 @@ +/* + * Copyright 2022 Martin Mauch (@nightscape) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.crealytics.spark.excel.v2 + +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf + +class ExcelOptions( + @transient + val parameters: CaseInsensitiveMap[String], + val defaultTimeZoneId: String, + val defaultColumnNameOfCorruptRecord: String + ) extends ExcelOptionsTrait with Serializable { + // all parameter handling is implemented in ExcelOptionsTrait + + def this(parameters: Map[String, String], defaultTimeZoneId: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, SQLConf.get.columnNameOfCorruptRecord) + } + + def this(parameters: Map[String, String], defaultTimeZoneId: String, defaultColumnNameOfCorruptRecord: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, defaultColumnNameOfCorruptRecord) + } + +} \ No newline at end of file diff --git a/src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala b/src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala new file mode 100644 index 00000000..9c76d2ca --- /dev/null +++ b/src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala @@ -0,0 +1,38 @@ +/* + * Copyright 2022 Martin Mauch (@nightscape) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.crealytics.spark.excel.v2 + +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf + +class ExcelOptions( + @transient + val parameters: CaseInsensitiveMap[String], + val defaultTimeZoneId: String, + val defaultColumnNameOfCorruptRecord: String + ) extends ExcelOptionsTrait with Serializable { + // all parameter handling is implemented in ExcelOptionsTrait + + def this(parameters: Map[String, String], defaultTimeZoneId: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, SQLConf.get.columnNameOfCorruptRecord) + } + + def this(parameters: Map[String, String], defaultTimeZoneId: String, defaultColumnNameOfCorruptRecord: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, defaultColumnNameOfCorruptRecord) + } + +} \ No newline at end of file diff --git a/src/main/3.0_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala b/src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala similarity index 100% rename from src/main/3.0_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala rename to src/main/3.0_3.1_3.2_3.3/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala diff --git a/src/main/3.3/scala/com/crealytics/spark/excel/v2/ExcelScan.scala b/src/main/3.3_and_up/scala/com/crealytics/spark/excel/v2/ExcelScan.scala similarity index 100% rename from src/main/3.3/scala/com/crealytics/spark/excel/v2/ExcelScan.scala rename to src/main/3.3_and_up/scala/com/crealytics/spark/excel/v2/ExcelScan.scala diff --git a/src/main/3.3/scala/com/crealytics/spark/excel/v2/ExcelScanBuilder.scala b/src/main/3.3_and_up/scala/com/crealytics/spark/excel/v2/ExcelScanBuilder.scala similarity index 100% rename from src/main/3.3/scala/com/crealytics/spark/excel/v2/ExcelScanBuilder.scala rename to src/main/3.3_and_up/scala/com/crealytics/spark/excel/v2/ExcelScanBuilder.scala diff --git a/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala b/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala new file mode 100644 index 00000000..6a777d33 --- /dev/null +++ b/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2022 Martin Mauch (@nightscape) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.crealytics.spark.excel.v2 + +import org.apache.spark.sql.catalyst.FileSourceOptions +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf + + +class ExcelOptions( + @transient + val parameters: CaseInsensitiveMap[String], + val defaultTimeZoneId: String, + val defaultColumnNameOfCorruptRecord: String + ) extends FileSourceOptions(parameters) + with ExcelOptionsTrait { + + + def this(parameters: Map[String, String], defaultTimeZoneId: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, SQLConf.get.columnNameOfCorruptRecord) + } + + def this(parameters: Map[String, String], defaultTimeZoneId: String, defaultColumnNameOfCorruptRecord: String) = { + this(CaseInsensitiveMap(parameters), defaultTimeZoneId, defaultColumnNameOfCorruptRecord) + } + +} diff --git a/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala b/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala new file mode 100644 index 00000000..5d9130e6 --- /dev/null +++ b/src/main/3.4_and_up/scala/com/crealytics/spark/excel/v2/ExcelPartitionReaderFactory.scala @@ -0,0 +1,100 @@ +/* + * Copyright 2022 Martin Mauch (@nightscape) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.crealytics.spark.excel.v2 + +import org.apache.hadoop.conf.Configuration +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.read.PartitionReader +import org.apache.spark.sql.execution.datasources.PartitionedFile +import org.apache.spark.sql.execution.datasources.v2._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.Filter +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration +import scala.util.control.NonFatal + +/** A factory used to create Excel readers. + * + * @param sqlConf + * SQL configuration. + * @param broadcastedConf + * Broadcasted serializable Hadoop Configuration. + * @param dataSchema + * Schema of Excel files. + * @param readDataSchema + * Required data schema in the batch scan. + * @param partitionSchema + * Schema of partitions. + * @param options + * Options for parsing Excel files. + */ +case class ExcelPartitionReaderFactory( + sqlConf: SQLConf, + broadcastedConf: Broadcast[SerializableConfiguration], + dataSchema: StructType, + readDataSchema: StructType, + partitionSchema: StructType, + options: ExcelOptions, + filters: Seq[Filter] + ) extends FilePartitionReaderFactory { + + override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = { + val conf = broadcastedConf.value.value + val actualDataSchema = + StructType(dataSchema.filterNot(_.name == options.columnNameOfCorruptRecord)) + val actualReadDataSchema = + StructType(readDataSchema.filterNot(_.name == options.columnNameOfCorruptRecord)) + val parser = new ExcelParser(actualDataSchema, actualReadDataSchema, options, filters) + val headerChecker = + new ExcelHeaderChecker(actualReadDataSchema, options, source = s"Excel file: ${file.filePath}") + val iter = readFile(conf, file, parser, headerChecker, readDataSchema) + val partitionReader = new SparkExcelPartitionReaderFromIterator(iter) + new PartitionReaderWithPartitionValues(partitionReader, readDataSchema, partitionSchema, file.partitionValues) + } + + private def readFile( + conf: Configuration, + file: PartitionedFile, + parser: ExcelParser, + headerChecker: ExcelHeaderChecker, + requiredSchema: StructType + ): SheetData[InternalRow] = { + val excelHelper = ExcelHelper(options) + val sheetData = excelHelper.getSheetData(conf, file.filePath.toUri) + try { + SheetData( + ExcelParser.parseIterator(sheetData.rowIterator, parser, headerChecker, requiredSchema), + sheetData.resourcesToClose + ) + } catch { + case NonFatal(t) => { + sheetData.close() + throw t + } + } + } + +} + +private class SparkExcelPartitionReaderFromIterator(sheetData: SheetData[InternalRow]) + extends PartitionReaderFromIterator[InternalRow](sheetData.rowIterator) { + override def close(): Unit = { + super.close() + sheetData.close() + } +} diff --git a/src/main/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala b/src/main/scala/com/crealytics/spark/excel/v2/ExcelOptionsTrait.scala similarity index 81% rename from src/main/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala rename to src/main/scala/com/crealytics/spark/excel/v2/ExcelOptionsTrait.scala index 9adafbbb..586eb0c6 100644 --- a/src/main/scala/com/crealytics/spark/excel/v2/ExcelOptions.scala +++ b/src/main/scala/com/crealytics/spark/excel/v2/ExcelOptionsTrait.scala @@ -28,20 +28,11 @@ import java.time.ZoneId import java.util.Locale import scala.annotation.nowarn -class ExcelOptions( - @transient - val parameters: CaseInsensitiveMap[String], - defaultTimeZoneId: String, - defaultColumnNameOfCorruptRecord: String -) extends Serializable { - - def this(parameters: Map[String, String], defaultTimeZoneId: String) = { - this(CaseInsensitiveMap(parameters), defaultTimeZoneId, SQLConf.get.columnNameOfCorruptRecord) - } +trait ExcelOptionsTrait extends Serializable { - def this(parameters: Map[String, String], defaultTimeZoneId: String, defaultColumnNameOfCorruptRecord: String) = { - this(CaseInsensitiveMap(parameters), defaultTimeZoneId, defaultColumnNameOfCorruptRecord) - } + val parameters: CaseInsensitiveMap[String] + val defaultTimeZoneId: String + val defaultColumnNameOfCorruptRecord: String private def getInt(paramName: String): Option[Int] = { val paramValue = parameters.get(paramName) @@ -49,7 +40,9 @@ class ExcelOptions( case None => None case Some(null) => None case Some(value) => - try { Some(value.toInt) } + try { + Some(value.toInt) + } catch { case _: NumberFormatException => throw new RuntimeException(s"$paramName should be an integer. Found $value") @@ -59,10 +52,18 @@ class ExcelOptions( private def getBool(paramName: String, default: Boolean): Boolean = { val param = parameters.getOrElse(paramName, default.toString) - if (param == null) { default } - else if (param.toLowerCase(Locale.ROOT) == "true") { true } - else if (param.toLowerCase(Locale.ROOT) == "false") { false } - else { throw new Exception(s"$paramName flag can be true or false") } + if (param == null) { + default + } + else if (param.toLowerCase(Locale.ROOT) == "true") { + true + } + else if (param.toLowerCase(Locale.ROOT) == "false") { + false + } + else { + throw new Exception(s"$paramName flag can be true or false") + } } /* Parsing mode, how to handle corrupted record. Default to permissive */ @@ -92,8 +93,8 @@ class ExcelOptions( val excerptSize = getInt("excerptSize") /** Forcibly apply the specified or inferred schema to data files. If the option is enabled, headers of ABC files will - * be ignored. - */ + * be ignored. + */ val enforceSchema = getBool("enforceSchema", default = true) /* Name for column of corrupted records */ @@ -142,19 +143,19 @@ class ExcelOptions( } /** Optional parameter for using a streaming reader which can help with big files (will fail if used with xls format - * files) - */ + * files) + */ val maxRowsInMemory = getInt("maxRowsInMemory") // scalastyle:off /** Optional parameter for maxByteArraySize - */ + * href="https://poi.apache.org/apidocs/5.0/org/apache/poi/util/IOUtils.html#setByteArrayMaxOverride-int-">maxByteArraySize + */ val maxByteArraySize = getInt("maxByteArraySize") // scalastyle:on /** Optional parameter for specifying the number of bytes at which a zip entry is regarded as too large for holding in - * memory and the data is put in a temp file instead - useful for sheets with a lot of data - */ + * memory and the data is put in a temp file instead - useful for sheets with a lot of data + */ val tempFileThreshold = getInt("tempFileThreshold") }