Skip to content

Commit

Permalink
fix(restore): don't blacklist not restored tables
Browse files Browse the repository at this point in the history
This approach was error-prone and difficult to test
(e.g. Scylla cluster might have some leftover cdc tables
from previous releases that are not present in the fresh clusters).
It also allows for a more unified mechanism of excluding
raft managed tables (replicated locally) from being restored.

Fixes #3998
  • Loading branch information
Michal-Leszczynski committed Sep 17, 2024
1 parent 8f2e302 commit 7666c9e
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 41 deletions.
19 changes: 19 additions & 0 deletions pkg/scyllaclient/client_scylla.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,25 @@ func (c *Client) KeyspacesByType(ctx context.Context) (map[KeyspaceType][]string
return out, nil
}

// AllTables returns all tables grouped by keyspace.
func (c *Client) AllTables(ctx context.Context) (map[string][]string, error) {
resp, err := c.scyllaOps.ColumnFamilyNameGet(&operations.ColumnFamilyNameGetParams{Context: ctx})
if err != nil {
return nil, err
}
out := make(map[string][]string)
for _, kst := range resp.Payload {
parts := strings.Split(kst, ":")
if len(parts) != 2 {
return nil, errors.Errorf("GET /column_family/name: expected exactly 1 colon in '<keyspace>:<table>', got %d", len(parts)-1)
}
ks := parts[0]
t := parts[1]
out[ks] = append(out[ks], t)
}
return out, nil
}

// Tables returns a slice of table names in a given keyspace.
func (c *Client) Tables(ctx context.Context, keyspace string) ([]string, error) {
resp, err := c.scyllaOps.ColumnFamilyNameGet(&operations.ColumnFamilyNameGetParams{Context: ctx})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
"*.*",
"!system.*",
"!system_schema.*",
"!system_distributed_everywhere.cdc_generation_descriptions",
"!system_distributed_everywhere.cdc_generation_descriptions_v2",
"!system_distributed.cdc_streams_descriptions",
"!system_distributed.cdc_streams_descriptions_v2",
"!system_distributed.cdc_generation_timestamps",
"!*.*_scylla_cdc_log",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
"*.*",
"!system.*",
"!system_schema.*",
"!system_distributed_everywhere.cdc_generation_descriptions",
"!system_distributed_everywhere.cdc_generation_descriptions_v2",
"!system_distributed.cdc_streams_descriptions",
"!system_distributed.cdc_streams_descriptions_v2",
"!system_distributed.cdc_generation_timestamps",
"!*.*_scylla_cdc_log",
Expand Down
2 changes: 0 additions & 2 deletions pkg/service/restore/testdata/get_target/tables.target.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
"!ks1.table2",
"!system.*",
"!system_schema.*",
"!system_distributed_everywhere.cdc_generation_descriptions",
"!system_distributed_everywhere.cdc_generation_descriptions_v2",
"!system_distributed.cdc_streams_descriptions",
"!system_distributed.cdc_streams_descriptions_v2",
"!system_distributed.cdc_generation_timestamps",
"!*.*_scylla_cdc_log",
Expand Down
108 changes: 73 additions & 35 deletions pkg/service/restore/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"path"
"regexp"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -68,43 +69,12 @@ func (w *worker) initTarget(ctx context.Context, properties json.RawMessage) err
t.Keyspace = []string{"system_schema"}
}
if t.RestoreTables {
// Skip restoration of those tables regardless of the '--keyspace' param
doNotRestore := []string{
"system", // system.* tables are recreated on every cluster and shouldn't even be backed-up
"system_schema", // Schema restoration is only possible with '--restore-schema' flag
// Don't restore tables related to CDC.
// Currently, it is forbidden to alter those tables, so SM wouldn't be able to ensure their data consistency.
// Moreover, those tables usually contain data with small TTL value,
// so their contents would probably expire right after restore has ended.
"system_distributed_everywhere.cdc_generation_descriptions",
"system_distributed_everywhere.cdc_generation_descriptions_v2",
"system_distributed.cdc_streams_descriptions",
"system_distributed.cdc_streams_descriptions_v2",
"system_distributed.cdc_generation_timestamps",
"*.*_scylla_cdc_log", // All regular CDC tables have "_scylla_cdc_log" suffix
}
if err := IsRestoreAuthAndServiceLevelsFromSStablesSupported(ctx, w.client); err != nil {
w.logger.Info(ctx, "Restore of auth and service levels will be skipped", "error", err)
doNotRestore = append(doNotRestore,
"system_auth",
"system_distributed.service_levels",
)
}

for _, ks := range doNotRestore {
t.Keyspace = append(t.Keyspace, "!"+ks)
}

// Filter out all materialized views and secondary indexes. They are not a part of restore procedure at the moment.
// See https://docs.scylladb.com/stable/operating-scylla/procedures/backup-restore/restore.html#repeat-the-following-steps-for-each-node-in-the-cluster.
views, err := query.GetAllViews(w.clusterSession)
notRestored, err := skipRestorePatterns(ctx, w.client, w.clusterSession)
if err != nil {
return errors.Wrap(err, "get cluster views")
}

for _, viewName := range views.List() {
t.Keyspace = append(t.Keyspace, "!"+viewName)
return errors.Wrap(err, "find not restored tables")
}
w.logger.Info(ctx, "Extended excluded tables pattern", "pattern", notRestored)
t.Keyspace = append(t.Keyspace, notRestored...)
}

status, err := w.client.Status(ctx)
Expand Down Expand Up @@ -183,6 +153,74 @@ func (w *worker) initTarget(ctx context.Context, properties json.RawMessage) err
return nil
}

func skipRestorePatterns(ctx context.Context, client *scyllaclient.Client, session gocqlx.Session) ([]string, error) {
keyspaces, err := client.KeyspacesByType(ctx)
if err != nil {
return nil, errors.Wrap(err, "get keyspaces by type")
}
tables, err := client.AllTables(ctx)
if err != nil {
return nil, errors.Wrap(err, "get all tables")
}

var skip []string
// Skip local data.
// Note that this also covers the raft based tables (e.g. system and system_schema).
for _, ks := range keyspaces[scyllaclient.KeyspaceTypeAll] {
if !slices.Contains(keyspaces[scyllaclient.KeyspaceTypeNonLocal], ks) {
skip = append(skip, ks)
}
}

// Skip outdated tables.
// Note that even though system_auth is not used in Scylla 6.0,
// it might still be present there (leftover after upgrade).
// That's why SM should always skip known outdated tables so that backups
// from older Scylla versions don't cause unexpected problems.
if err := IsRestoreAuthAndServiceLevelsFromSStablesSupported(ctx, client); err != nil {
if errors.Is(err, ErrRestoreAuthAndServiceLevelsUnsupportedScyllaVersion) {
skip = append(skip, "system_auth", "system_distributed.service_levels")
} else {
return nil, errors.Wrap(err, "check auth and service levels restore support")
}
}

// Skip system cdc tables
systemCDCTableRegex := regexp.MustCompile(`(^|_)cdc(_|$)`)
for ks, tabs := range tables {
// Local keyspaces were already excluded
if !slices.Contains(keyspaces[scyllaclient.KeyspaceTypeNonLocal], ks) {
continue
}
// Here we only skip system cdc tables
if slices.Contains(keyspaces[scyllaclient.KeyspaceTypeUser], ks) {
continue
}
for _, t := range tabs {
if systemCDCTableRegex.MatchString(t) {
skip = append(skip, ks+"."+t)
}
}
}

// Skip user cdc tables
skip = append(skip, "*.*_scylla_cdc_log")

// Skip views
views, err := query.GetAllViews(session)
if err != nil {
return nil, errors.Wrap(err, "get cluster views")
}
skip = append(skip, views.List()...)

// Exclude collected patterns
out := make([]string, 0, len(skip))
for _, p := range skip {
out = append(out, "!"+p)
}
return out, nil
}

// ErrRestoreSchemaUnsupportedScyllaVersion means that restore schema procedure is not safe for used Scylla configuration.
var ErrRestoreSchemaUnsupportedScyllaVersion = errors.Errorf("restore into cluster with given ScyllaDB version and consistent_cluster_management is not supported. " +
"See https://manager.docs.scylladb.com/stable/restore/restore-schema.html for a workaround.")
Expand Down

0 comments on commit 7666c9e

Please sign in to comment.