Skip to content

Commit

Permalink
Add UTF8 abstraction in the TASTy format (#19090)
Browse files Browse the repository at this point in the history
We add a `Utf8` encoding to the grammar. This should not to be confused
with the `UTF8` name tag. This mistake was made in the `Comment` format.
We also add corresponding `writeUtf8` and `readUtf8` methods to the
`TastyBuffer`.

This is also useful for #18948
  • Loading branch information
bishabosha authored Nov 27, 2023
2 parents b1b0372 + 486af2f commit 78c3721
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@ object CommentPickler:

def pickleComment(addr: Addr, comment: Comment): Unit =
if addr != NoAddr then
val bytes = comment.raw.getBytes(StandardCharsets.UTF_8).nn
val length = bytes.length
buf.writeAddr(addr)
buf.writeNat(length)
buf.writeBytes(bytes, length)
buf.writeUtf8(comment.raw)
buf.writeLongInt(comment.span.coords)

def traverse(x: Any): Unit = x match
Expand Down
10 changes: 3 additions & 7 deletions compiler/src/dotty/tools/dotc/core/tasty/CommentUnpickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,9 @@ class CommentUnpickler(reader: TastyReader) {
val comments = new HashMap[Addr, Comment]
while (!isAtEnd) {
val addr = readAddr()
val length = readNat()
if (length > 0) {
val bytes = readBytes(length)
val position = new Span(readLongInt())
val rawComment = new String(bytes, StandardCharsets.UTF_8)
comments(addr) = Comment(position, rawComment)
}
val rawComment = readUtf8()
val position = new Span(readLongInt())
comments(addr) = Comment(position, rawComment)
}
comments
}
Expand Down
15 changes: 4 additions & 11 deletions compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,8 @@ import collection.mutable
import core.Symbols.ClassSymbol
import Decorators.*

object TastyPickler {

private val versionStringBytes = {
val compilerString = s"Scala ${config.Properties.simpleVersionString}"
compilerString.getBytes(java.nio.charset.StandardCharsets.UTF_8)
}

}
object TastyPickler:
private val versionString = s"Scala ${config.Properties.simpleVersionString}"

class TastyPickler(val rootCls: ClassSymbol) {

Expand Down Expand Up @@ -48,13 +42,12 @@ class TastyPickler(val rootCls: ClassSymbol) {
val uuidHi: Long = otherSectionHashes.fold(0L)(_ ^ _)

val headerBuffer = {
val buf = new TastyBuffer(header.length + TastyPickler.versionStringBytes.length + 32)
val buf = new TastyBuffer(header.length + TastyPickler.versionString.length + 32)
for (ch <- header) buf.writeByte(ch.toByte)
buf.writeNat(MajorVersion)
buf.writeNat(MinorVersion)
buf.writeNat(ExperimentalVersion)
buf.writeNat(TastyPickler.versionStringBytes.length)
buf.writeBytes(TastyPickler.versionStringBytes, TastyPickler.versionStringBytes.length)
buf.writeUtf8(TastyPickler.versionString)
buf.writeUncompressedLong(uuidLow)
buf.writeUncompressedLong(uuidHi)
buf
Expand Down
11 changes: 11 additions & 0 deletions tasty/src/dotty/tools/tasty/TastyBuffer.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package dotty.tools.tasty

import util.Util.dble
import java.nio.charset.StandardCharsets

object TastyBuffer {

Expand Down Expand Up @@ -115,6 +116,16 @@ class TastyBuffer(initialSize: Int) {
writeBytes(bytes, 8)
}

/** Write a UTF8 string encoded as `Nat UTF8-CodePoint*`,
* where the `Nat` is the length of the code-points bytes.
*/
def writeUtf8(x: String): Unit = {
val bytes = x.getBytes(StandardCharsets.UTF_8)
val length = bytes.length
writeNat(length)
writeBytes(bytes, length)
}

// -- Address handling --------------------------------------------

/** Write natural number `x` right-adjusted in a field of `width` bytes
Expand Down
7 changes: 4 additions & 3 deletions tasty/src/dotty/tools/tasty/TastyFormat.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Micro-syntax:
Nat = LongInt -- non-negative value, fits in an Int without overflow
Digit = 0 | ... | 127
StopDigit = 128 | ... | 255 -- value = digit - 128
Utf8 = Nat UTF8-CodePoint*
```
Macro-format:
Expand All @@ -24,12 +25,12 @@ Macro-format:
nameTable_Length Name* Section*
Header = 0x5CA1AB1F
UUID = Byte*16 -- random UUID
VersionString = Length UTF8-CodePoint* -- string that represents the compiler that produced the TASTy
VersionString = Utf8 -- string that represents the compiler that produced the TASTy
Section = NameRef Length Bytes
Length = Nat -- length of rest of entry in bytes
Name = UTF8 Length UTF8-CodePoint*
Name = UTF8 Utf8
QUALIFIED Length qualified_NameRef selector_NameRef -- A.B
EXPANDED Length qualified_NameRef selector_NameRef -- A$$B, semantically a NameKinds.ExpandedName
EXPANDPREFIX Length qualified_NameRef selector_NameRef -- A$B, prefix of expanded name, see NamedKinds.ExpandPrefixName
Expand Down Expand Up @@ -265,7 +266,7 @@ All elements of a position section are serialized as Ints
Standard Section: "Comments" Comment*
```none
Comment = UTF8 LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
Comment = Utf8 LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
```
Standard Section: "Attributes" Attribute*
Expand Down
10 changes: 10 additions & 0 deletions tasty/src/dotty/tools/tasty/TastyReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dotty.tools.tasty
import collection.mutable

import TastyBuffer._
import java.nio.charset.StandardCharsets

/** A byte array buffer that can be filled with bytes or natural numbers in TASTY format,
* and that supports reading and patching addresses represented as natural numbers.
Expand Down Expand Up @@ -104,6 +105,15 @@ class TastyReader(val bytes: Array[Byte], start: Int, end: Int, val base: Int =
x
}

/** Read a UTF8 string encoded as `Nat UTF8-CodePoint*`,
* where the `Nat` is the length of the code-points bytes.
*/
def readUtf8(): String = {
val length = readNat()
if (length == 0) ""
else new String(readBytes(length), StandardCharsets.UTF_8)
}

/** Read a natural number and return as a NameRef */
def readNameRef(): NameRef = NameRef(readNat())

Expand Down

0 comments on commit 78c3721

Please sign in to comment.