From f5ddd80f4bede64ea523ca2f50cf4befe5f9512e Mon Sep 17 00:00:00 2001 From: Alec Schaefer Date: Thu, 3 Oct 2024 15:01:17 -0400 Subject: [PATCH] Fixes for re-running migration script on same destination db (#246) * add reset flag * add --checksum to rsync options --- op-chain-ops/cmd/celo-migrate/db.go | 18 ++++++++++ op-chain-ops/cmd/celo-migrate/main.go | 34 +++++++++++++------ op-chain-ops/cmd/celo-migrate/non-ancients.go | 13 ++++--- 3 files changed, 50 insertions(+), 15 deletions(-) diff --git a/op-chain-ops/cmd/celo-migrate/db.go b/op-chain-ops/cmd/celo-migrate/db.go index 4bade2af8413..180b9e541dd0 100644 --- a/op-chain-ops/cmd/celo-migrate/db.go +++ b/op-chain-ops/cmd/celo-migrate/db.go @@ -113,3 +113,21 @@ func getHeadHeader(dbpath string) (*types.Header, error) { } return headHeader, nil } + +func cleanupNonAncientDb(dir string) error { + log.Info("Cleaning up non-ancient data in new db") + + files, err := os.ReadDir(dir) + if err != nil { + return fmt.Errorf("failed to read directory: %w", err) + } + for _, file := range files { + if file.Name() != "ancient" { + err := os.RemoveAll(filepath.Join(dir, file.Name())) + if err != nil { + return fmt.Errorf("failed to remove file: %w", err) + } + } + } + return nil +} diff --git a/op-chain-ops/cmd/celo-migrate/main.go b/op-chain-ops/cmd/celo-migrate/main.go index f335c0cc8d99..e8d1b87c7a8b 100644 --- a/op-chain-ops/cmd/celo-migrate/main.go +++ b/op-chain-ops/cmd/celo-migrate/main.go @@ -94,6 +94,11 @@ var ( Usage: "Memory limit in MiB, should be set lower than the available amount of memory in your system to prevent out of memory errors", Value: 7500, } + reset = &cli.BoolFlag{ + Name: "reset", + Usage: "Delete everything in the destination directory aside from /ancients. This is useful if you need to re-run the full migration but do not want to repeat the lengthy ancients migration. If you'd like to reset the entire destination directory, you can delete it manually.", + Value: false, + } preMigrationFlags = []cli.Flag{ oldDBPathFlag, @@ -101,6 +106,7 @@ var ( batchSizeFlag, bufferSizeFlag, memoryLimitFlag, + reset, } fullMigrationFlags = append( preMigrationFlags, @@ -116,11 +122,12 @@ var ( ) type preMigrationOptions struct { - oldDBPath string - newDBPath string - batchSize uint64 - bufferSize uint64 - memoryLimit int64 + oldDBPath string + newDBPath string + batchSize uint64 + bufferSize uint64 + memoryLimit int64 + resetNonAncients bool } type stateMigrationOptions struct { @@ -141,11 +148,12 @@ type fullMigrationOptions struct { func parsePreMigrationOptions(ctx *cli.Context) preMigrationOptions { return preMigrationOptions{ - oldDBPath: ctx.String(oldDBPathFlag.Name), - newDBPath: ctx.String(newDBPathFlag.Name), - batchSize: ctx.Uint64(batchSizeFlag.Name), - bufferSize: ctx.Uint64(bufferSizeFlag.Name), - memoryLimit: ctx.Int64(memoryLimitFlag.Name), + oldDBPath: ctx.String(oldDBPathFlag.Name), + newDBPath: ctx.String(newDBPathFlag.Name), + batchSize: ctx.Uint64(batchSizeFlag.Name), + bufferSize: ctx.Uint64(bufferSizeFlag.Name), + memoryLimit: ctx.Int64(memoryLimitFlag.Name), + resetNonAncients: ctx.Bool(reset.Name), } } @@ -270,6 +278,12 @@ func runPreMigration(opts preMigrationOptions) ([]*rawdb.NumberHash, uint64, err return nil, 0, fmt.Errorf("failed to create new db path: %w", err) } + if opts.resetNonAncients { + if err = cleanupNonAncientDb(opts.newDBPath); err != nil { + return nil, 0, fmt.Errorf("failed to cleanup non-ancient db: %w", err) + } + } + var numAncientsNewBefore uint64 var numAncientsNewAfter uint64 var strayAncientBlocks []*rawdb.NumberHash diff --git a/op-chain-ops/cmd/celo-migrate/non-ancients.go b/op-chain-ops/cmd/celo-migrate/non-ancients.go index 6627deb15abd..5ad184e2dd24 100644 --- a/op-chain-ops/cmd/celo-migrate/non-ancients.go +++ b/op-chain-ops/cmd/celo-migrate/non-ancients.go @@ -24,17 +24,18 @@ func copyDbExceptAncients(oldDbPath, newDbPath string) error { // Convert output to string outputStr := string(output) + opts := []string{"-v", "-a", "--exclude=ancient", "--checksum", "--delete"} + // Check for supported options - var cmd *exec.Cmd // Prefer --info=progress2 over --progress if strings.Contains(outputStr, "--info") { - cmd = exec.Command("rsync", "-v", "-a", "--info=progress2", "--exclude=ancient", "--delete", oldDbPath+"/", newDbPath) + opts = append(opts, "--info=progress2") } else if strings.Contains(outputStr, "--progress") { - cmd = exec.Command("rsync", "-v", "-a", "--progress", "--exclude=ancient", "--delete", oldDbPath+"/", newDbPath) - } else { - cmd = exec.Command("rsync", "-v", "-a", "--exclude=ancient", "--delete", oldDbPath+"/", newDbPath) + opts = append(opts, "--progress") } + cmd := exec.Command("rsync", append(opts, oldDbPath+"/", newDbPath)...) + // rsync copies any file with a different timestamp or size. // // '--exclude=ancient' excludes the ancient directory from the copy @@ -46,6 +47,8 @@ func copyDbExceptAncients(oldDbPath, newDbPath string) error { // // '--whole-file' This is the default when both the source and destination are specified as local paths, which they are here (oldDbPath and newDbPath). // This option disables rsync’s delta-transfer algorithm, which causes all transferred files to be sent whole. The delta-transfer algorithm is normally used when the destination is a remote system. + // + // '--checksum' This forces rsync to compare the checksums of all files to determine if they are the same. This is slows down the transfer but ensures that source and destination directories end up with the same contents (excluding /ancients). log.Info("Running rsync command", "command", cmd.String()) cmd.Stdout = os.Stdout