diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a8c353f1..d9910c3e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,6 +8,18 @@ on: jobs: publish: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + scala: ["2.12.12", "2.13.7"] + spark: ["2.4.8", "3.0.3", "3.1.2", "3.2.0"] + exclude: + - scala: "2.13.7" + spark: "2.4.8" + - scala: "2.13.7" + spark: "3.0.3" + - scala: "2.13.7" + spark: "3.1.2" steps: - uses: actions/checkout@v2.3.4 with: @@ -15,8 +27,10 @@ jobs: - name: GPG import run: openssl aes-256-cbc -pbkdf2 -k "${{ secrets.PGP_PASSPHRASE }}" -in private-key.pem.enc -out private-key.pem -d && gpg --import --no-tty --batch --yes private-key.pem - name: Publish - run: sbt ci-release + run: sbt -Dspark.testVersion=${{ matrix.spark }} ++${{ matrix.scala }} ci-release env: + CI_RELEASE: publishSigned + CI_SNAPSHOT_RELEASE: publish GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} PGP_SECRET: ${{ secrets.PGP_SECRET }} diff --git a/build.sbt b/build.sbt index 8f91bcb0..cd71a8fb 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ name := "spark-excel" organization := "com.crealytics" -crossScalaVersions := Seq("2.12.15") +crossScalaVersions := Seq("2.12.15", "2.13.7") scalaVersion := crossScalaVersions.value.head @@ -47,10 +47,9 @@ libraryDependencies ++= Seq( "org.scalatestplus" %% "scalacheck-1-15" % "3.2.10.0" % Test, "org.scalacheck" %% "scalacheck" % "1.15.4" % Test, "com.github.alexarchambault" %% "scalacheck-shapeless_1.15" % "1.3.0" % Test, - "com.github.nightscape" %% "spark-testing-base" % "c2bc44caf4" % Test, // "com.holdenkarau" %% "spark-testing-base" % s"${testSparkVersion.value}_0.7.4" % Test, - "org.scalamock" %% "scalamock-scalatest-support" % "3.6.0" % Test -) + "org.scalamock" %% "scalamock" % "5.1.0" % Test +) ++ (if (scalaVersion.value.startsWith("2.12")) Seq("com.github.nightscape" %% "spark-testing-base" % "9496d55" % Test) else Seq()) // Custom source layout for Spark Data Source API 2 Compile / unmanagedSourceDirectories := { @@ -83,7 +82,7 @@ Compile / unmanagedSourceDirectories := { Test / fork := true Test / parallelExecution := false -javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:MaxPermSize=2048M", "-XX:+CMSClassUnloadingEnabled") +javaOptions ++= Seq("-Xms512M", "-Xmx2048M") publishMavenStyle := true diff --git a/src/main/scala/com/crealytics/spark/excel/DataLocator.scala b/src/main/scala/com/crealytics/spark/excel/DataLocator.scala index d295957a..1ed53993 100644 --- a/src/main/scala/com/crealytics/spark/excel/DataLocator.scala +++ b/src/main/scala/com/crealytics/spark/excel/DataLocator.scala @@ -90,7 +90,7 @@ trait AreaDataLocator extends DataLocator { val colInd = columnIndices(workbook) sheet.iterator.asScala .filter(r => rowInd.contains(r.getRowNum)) - .map(_.cellIterator().asScala.filter(c => colInd.contains(c.getColumnIndex)).to[Vector]) + .map(_.cellIterator().asScala.filter(c => colInd.contains(c.getColumnIndex)).toVector) } override def toSheet( diff --git a/src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala b/src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala index c3ccd3e8..35fccc25 100644 --- a/src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala +++ b/src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala @@ -29,7 +29,7 @@ case class ExcelRelation( with PrunedScan { type SheetRow = Seq[Cell] - lazy val excerpt: List[SheetRow] = workbookReader.withWorkbook(dataLocator.readFrom(_).take(excerptSize).to[List]) + lazy val excerpt: List[SheetRow] = workbookReader.withWorkbook(dataLocator.readFrom(_).take(excerptSize).toList) lazy val headerColumnForName = headerColumns.map(c => c.name -> c).toMap @@ -74,7 +74,7 @@ case class ExcelRelation( None }.get ) - val result = rows.to[Vector] + val result = rows.toVector parallelize(result.map(Row.fromSeq)) } } diff --git a/src/main/scala/com/crealytics/spark/excel/package.scala b/src/main/scala/com/crealytics/spark/excel/package.scala index 6885e0a9..339e59fb 100644 --- a/src/main/scala/com/crealytics/spark/excel/package.scala +++ b/src/main/scala/com/crealytics/spark/excel/package.scala @@ -51,7 +51,7 @@ package object excel { val (startRow, endRow) = table.cellRange.rowRange val (startColumn, endColumn) = table.cellRange.columnRange val tableRows = sheet.rows.filter(r => r.index.exists((startRow to endRow).contains)) - tableRows.map(_.cells.filter(_.index.exists((startColumn to endColumn).contains)).map(_.value).to[Seq]) + tableRows.map(_.cells.filter(_.index.exists((startColumn to endColumn).contains)).map(_.value).toSeq) } } diff --git a/src/main/scala/com/crealytics/spark/v2/excel/DataLocator.scala b/src/main/scala/com/crealytics/spark/v2/excel/DataLocator.scala index e32c214f..c55dfc44 100644 --- a/src/main/scala/com/crealytics/spark/v2/excel/DataLocator.scala +++ b/src/main/scala/com/crealytics/spark/v2/excel/DataLocator.scala @@ -41,7 +41,7 @@ trait DataLocator { colInd .filter(_ < r.getLastCellNum()) .map(r.getCell(_, MissingCellPolicy.CREATE_NULL_AS_BLANK)) - .to[Vector] + .toVector } }) @@ -52,7 +52,7 @@ trait DataLocator { colInd .filter(_ < r.getLastCellNum()) .map(r.getCell(_, MissingCellPolicy.CREATE_NULL_AS_BLANK)) - .to[Vector] + .toVector ) } } diff --git a/src/main/scala/com/crealytics/spark/v2/excel/ExcelInferSchema.scala b/src/main/scala/com/crealytics/spark/v2/excel/ExcelInferSchema.scala index 5ceedf74..302c51a6 100644 --- a/src/main/scala/com/crealytics/spark/v2/excel/ExcelInferSchema.scala +++ b/src/main/scala/com/crealytics/spark/v2/excel/ExcelInferSchema.scala @@ -68,7 +68,7 @@ class ExcelInferSchema(val options: ExcelOptions) extends Serializable { if (i < next.length) inferField(rowSoFar(i), next(i)) else compatibleType(rowSoFar(i), NullType).getOrElse(StringType) ) - .to[Vector] + .toVector private def mergeRowTypes(first: Vector[DataType], second: Vector[DataType]): Vector[DataType] = { first.zipAll(second, NullType, NullType).map { case (a, b) =>