Skip to content

Commit

Permalink
Cross-publish for different Spark versions
Browse files Browse the repository at this point in the history
  • Loading branch information
nightscape committed Dec 11, 2021
1 parent 206cda2 commit 71f1c51
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 13 deletions.
16 changes: 15 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,29 @@ on:
jobs:
publish:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
scala: ["2.12.12", "2.13.7"]
spark: ["2.4.8", "3.0.3", "3.1.2", "3.2.0"]
exclude:
- scala: "2.13.7"
spark: "2.4.8"
- scala: "2.13.7"
spark: "3.0.3"
- scala: "2.13.7"
spark: "3.1.2"
steps:
- uses: actions/[email protected]
with:
fetch-depth: 0
- name: GPG import
run: openssl aes-256-cbc -pbkdf2 -k "${{ secrets.PGP_PASSPHRASE }}" -in private-key.pem.enc -out private-key.pem -d && gpg --import --no-tty --batch --yes private-key.pem
- name: Publish
run: sbt ci-release
run: sbt -Dspark.testVersion=${{ matrix.spark }} ++${{ matrix.scala }} ci-release
env:
CI_RELEASE: publishSigned
CI_SNAPSHOT_RELEASE: publish
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }}
PGP_SECRET: ${{ secrets.PGP_SECRET }}
Expand Down
9 changes: 4 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name := "spark-excel"

organization := "com.crealytics"

crossScalaVersions := Seq("2.12.15")
crossScalaVersions := Seq("2.12.15", "2.13.7")

scalaVersion := crossScalaVersions.value.head

Expand Down Expand Up @@ -47,10 +47,9 @@ libraryDependencies ++= Seq(
"org.scalatestplus" %% "scalacheck-1-15" % "3.2.10.0" % Test,
"org.scalacheck" %% "scalacheck" % "1.15.4" % Test,
"com.github.alexarchambault" %% "scalacheck-shapeless_1.15" % "1.3.0" % Test,
"com.github.nightscape" %% "spark-testing-base" % "c2bc44caf4" % Test,
// "com.holdenkarau" %% "spark-testing-base" % s"${testSparkVersion.value}_0.7.4" % Test,
"org.scalamock" %% "scalamock-scalatest-support" % "3.6.0" % Test
)
"org.scalamock" %% "scalamock" % "5.1.0" % Test
) ++ (if (scalaVersion.value.startsWith("2.12")) Seq("com.github.nightscape" %% "spark-testing-base" % "9496d55" % Test) else Seq())

// Custom source layout for Spark Data Source API 2
Compile / unmanagedSourceDirectories := {
Expand Down Expand Up @@ -83,7 +82,7 @@ Compile / unmanagedSourceDirectories := {

Test / fork := true
Test / parallelExecution := false
javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:MaxPermSize=2048M", "-XX:+CMSClassUnloadingEnabled")
javaOptions ++= Seq("-Xms512M", "-Xmx2048M")

publishMavenStyle := true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ trait AreaDataLocator extends DataLocator {
val colInd = columnIndices(workbook)
sheet.iterator.asScala
.filter(r => rowInd.contains(r.getRowNum))
.map(_.cellIterator().asScala.filter(c => colInd.contains(c.getColumnIndex)).to[Vector])
.map(_.cellIterator().asScala.filter(c => colInd.contains(c.getColumnIndex)).toVector)
}

override def toSheet(
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ case class ExcelRelation(
with PrunedScan {
type SheetRow = Seq[Cell]

lazy val excerpt: List[SheetRow] = workbookReader.withWorkbook(dataLocator.readFrom(_).take(excerptSize).to[List])
lazy val excerpt: List[SheetRow] = workbookReader.withWorkbook(dataLocator.readFrom(_).take(excerptSize).toList)

lazy val headerColumnForName = headerColumns.map(c => c.name -> c).toMap

Expand Down Expand Up @@ -74,7 +74,7 @@ case class ExcelRelation(
None
}.get
)
val result = rows.to[Vector]
val result = rows.toVector
parallelize(result.map(Row.fromSeq))
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/crealytics/spark/excel/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ package object excel {
val (startRow, endRow) = table.cellRange.rowRange
val (startColumn, endColumn) = table.cellRange.columnRange
val tableRows = sheet.rows.filter(r => r.index.exists((startRow to endRow).contains))
tableRows.map(_.cells.filter(_.index.exists((startColumn to endColumn).contains)).map(_.value).to[Seq])
tableRows.map(_.cells.filter(_.index.exists((startColumn to endColumn).contains)).map(_.value).toSeq)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ trait DataLocator {
colInd
.filter(_ < r.getLastCellNum())
.map(r.getCell(_, MissingCellPolicy.CREATE_NULL_AS_BLANK))
.to[Vector]
.toVector
}
})

Expand All @@ -52,7 +52,7 @@ trait DataLocator {
colInd
.filter(_ < r.getLastCellNum())
.map(r.getCell(_, MissingCellPolicy.CREATE_NULL_AS_BLANK))
.to[Vector]
.toVector
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class ExcelInferSchema(val options: ExcelOptions) extends Serializable {
if (i < next.length) inferField(rowSoFar(i), next(i))
else compatibleType(rowSoFar(i), NullType).getOrElse(StringType)
)
.to[Vector]
.toVector

private def mergeRowTypes(first: Vector[DataType], second: Vector[DataType]): Vector[DataType] = {
first.zipAll(second, NullType, NullType).map { case (a, b) =>
Expand Down

0 comments on commit 71f1c51

Please sign in to comment.