Skip to content

Commit

Permalink
Feature/transform fields (#294)
Browse files Browse the repository at this point in the history
* Implement overwrite function in extract-json feature.
* Added test to verify mapped properties
* Added copyright
  • Loading branch information
pravinbhat authored Aug 29, 2024
1 parent 0b1f50e commit 7d69c6b
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 3 deletions.
7 changes: 7 additions & 0 deletions src/main/java/com/datastax/cdm/feature/ExtractJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class ExtractJson extends AbstractFeature {

private String targetColumnName = "";
private Integer targetColumnIndex = -1;
private boolean overwriteTarget = false;

@Override
public boolean loadProperties(IPropertyHelper helper) {
Expand All @@ -49,6 +50,8 @@ public boolean loadProperties(IPropertyHelper helper) {

originColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_ORIGIN_COLUMN_NAME);
targetColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING);
overwriteTarget = helper.getBoolean(KnownProperties.EXTRACT_JSON_TARGET_OVERWRITE);

// Convert columnToFieldMapping to targetColumnName and originJsonFieldName
if (!targetColumnName.isBlank()) {
String[] parts = targetColumnName.split("\\:");
Expand Down Expand Up @@ -146,6 +149,10 @@ public String getTargetColumnName() {
return isEnabled ? targetColumnName : "";
}

public boolean overwriteTarget() {
return overwriteTarget;
}

private String getColumnName(IPropertyHelper helper, String colName) {
String columnName = CqlTable.unFormatName(helper.getString(colName));
return (null == columnName) ? "" : columnName;
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/com/datastax/cdm/job/DiffJobSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public class DiffJobSession extends CopyJobSession {
boolean logDebug = logger.isDebugEnabled();
boolean logTrace = logger.isTraceEnabled();
private ExtractJson extractJsonFeature;
private boolean overwriteTarget;

public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkConf sc) {
super(originSession, targetSession, sc);
Expand Down Expand Up @@ -111,6 +112,7 @@ public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkC
}

extractJsonFeature = (ExtractJson) this.targetSession.getCqlTable().getFeature(Featureset.EXTRACT_JSON);
overwriteTarget = extractJsonFeature.isEnabled() && extractJsonFeature.overwriteTarget();

logger.info("CQL -- origin select: {}", this.originSession.getOriginSelectByPartitionRangeStatement().getCQL());
logger.info("CQL -- target select: {}", this.targetSession.getTargetSelectByPKStatement().getCQL());
Expand Down Expand Up @@ -270,7 +272,13 @@ private String isDifferent(Record record) {
logger.trace("PK {}, targetIndex {} skipping constant column {}", pk, targetIndex,
targetColumnNames.get(targetIndex));
return; // nothing to compare in origin
} else if (targetIndex == extractJsonFeature.getTargetColumnIndex()) {
}

targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow);
if (targetIndex == extractJsonFeature.getTargetColumnIndex()) {
if (!overwriteTarget && null != targetAsOriginType) {
return; // skip validation when target has data
}
originIndex = extractJsonFeature.getOriginColumnIndex();
origin = extractJsonFeature.extract(originRow.getString(originIndex));
} else {
Expand Down Expand Up @@ -301,7 +309,6 @@ private String isDifferent(Record record) {
+ explodeMapKeyIndex + ", valueIndex:" + explodeMapValueIndex + ")");
}
}
targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow);

if (logDebug)
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,15 @@ public enum PropertyType {
public static final String EXTRACT_JSON_EXCLUSIVE = "spark.cdm.feature.extractJson.exclusive";
public static final String EXTRACT_JSON_ORIGIN_COLUMN_NAME = "spark.cdm.feature.extractJson.originColumn";
public static final String EXTRACT_JSON_TARGET_COLUMN_MAPPING = "spark.cdm.feature.extractJson.propertyMapping";
public static final String EXTRACT_JSON_TARGET_OVERWRITE = "spark.cdm.feature.extractJson.overwrite";

static {
types.put(EXTRACT_JSON_EXCLUSIVE, PropertyType.BOOLEAN);
defaults.put(EXTRACT_JSON_EXCLUSIVE, "false");
types.put(EXTRACT_JSON_ORIGIN_COLUMN_NAME, PropertyType.STRING);
types.put(EXTRACT_JSON_TARGET_COLUMN_MAPPING, PropertyType.STRING);
}
types.put(EXTRACT_JSON_TARGET_OVERWRITE, PropertyType.BOOLEAN);
defaults.put(EXTRACT_JSON_TARGET_OVERWRITE, "false"); }

// ==========================================================================
// Guardrail Feature
Expand Down
6 changes: 6 additions & 0 deletions src/resources/cdm-detailed.properties
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,16 @@ spark.cdm.perfops.ratelimit.target 20000
# - If the specified JSON property does not exist in the JSON content, the Target column
# will be set to null.
# Note: This feature currently supports extraction of only one JSON property.
#
# .overwrite Default is false. This property only applies to Validation run (NA for Migration)
# When set to true, the extracted JSON value will overwrite any existing value in the
# Target column during Validation. False will skip validation if the Target column has
# any non-null value.
#-----------------------------------------------------------------------------------------------------------
#spark.cdm.feature.extractJson.exclusive false
#spark.cdm.feature.extractJson.originColumn origin_columnname_with_json_content
#spark.cdm.feature.extractJson.propertyMapping origin_json_propertyname:target_columnname
#spark.cdm.feature.extractJson.overwrite false

#===========================================================================================================
# Guardrail feature manages records that exceed guardrail checks. The Guardrail job will generate a
Expand Down
14 changes: 14 additions & 0 deletions src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class ExtractJsonTest {

String standardOriginName = "content";
String standardTargetName = "age";
String mappedTargetName = "personAge:person_age";

@BeforeEach
public void setup() {
Expand Down Expand Up @@ -98,9 +99,22 @@ public void loadProperties() {
assertAll(
() -> assertTrue(loaded, "properties are loaded and valid"),
() -> assertTrue(feature.isEnabled()),
() -> assertFalse(feature.overwriteTarget()),
() -> assertEquals(standardTargetName, feature.getTargetColumnName())
);
}

@Test
public void loadPropertiesWithMapping() {
when(propertyHelper.getString(KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING)).thenReturn(mappedTargetName);
boolean loaded = feature.loadProperties(propertyHelper);

assertAll(
() -> assertTrue(loaded, "properties are loaded and valid"),
() -> assertTrue(feature.isEnabled()),
() -> assertEquals("person_age", feature.getTargetColumnName())
);
}

@Test
public void loadPropertiesException() {
Expand Down
33 changes: 33 additions & 0 deletions src/test/java/com/datastax/cdm/feature/TrackRunTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datastax.cdm.feature;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

class TrackRunTest {

@Test
void test() {
assertEquals("MIGRATE", TrackRun.RUN_TYPE.MIGRATE.name());
assertEquals("DIFF_DATA", TrackRun.RUN_TYPE.DIFF_DATA.name());

assertEquals(2, TrackRun.RUN_TYPE.values().length);
assertEquals(5, TrackRun.RUN_STATUS.values().length);
}

}

0 comments on commit 7d69c6b

Please sign in to comment.