diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 3efdc3fd514..540378f23f8 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -84,6 +84,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.Search; @@ -788,6 +789,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context)); } + @Override + public LogicalPlan visitReplace(Replace node, AnalysisContext context) { + throw getOnlyForCalciteException("Replace"); + } + @Override public LogicalPlan visitJoin(Join node, AnalysisContext context) { throw getOnlyForCalciteException("Join"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 84f7bdbd4a6..e26f0d28307 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -72,6 +72,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -244,6 +245,10 @@ public T visitRename(Rename node, C context) { return visitChildren(node, context); } + public T visitReplace(Replace node, C context) { + return visitChildren(node, context); + } + public T visitEval(Eval node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java new file mode 100644 index 00000000000..0bc43854078 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -0,0 +1,97 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class Replace extends UnresolvedPlan { + private final UnresolvedExpression pattern; + private final UnresolvedExpression replacement; + private final List fieldList; + private UnresolvedPlan child; + + public Replace( + UnresolvedExpression pattern, UnresolvedExpression replacement, List fieldList) { + this.pattern = pattern; + this.replacement = replacement; + this.fieldList = fieldList; + validate(); + } + + public void validate() { + if (pattern == null) { + throw new IllegalArgumentException("Pattern expression cannot be null in Replace command"); + } + if (replacement == null) { + throw new IllegalArgumentException( + "Replacement expression cannot be null in Replace command"); + } + + // Validate pattern is a string literal + if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); + } + + // Validate replacement is a string literal + if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Replacement must be a string literal in Replace command"); + } + + if (fieldList == null || fieldList.isEmpty()) { + throw new IllegalArgumentException( + "Field list cannot be empty in Replace command. Use IN clause to specify the field."); + } + + Set uniqueFields = new HashSet<>(); + List duplicates = + fieldList.stream() + .map(field -> field.getField().toString()) + .filter(fieldName -> !uniqueFields.add(fieldName)) + .collect(Collectors.toList()); + + if (!duplicates.isEmpty()) { + throw new IllegalArgumentException( + String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); + } + } + + @Override + public Replace attach(UnresolvedPlan child) { + if (null == this.child) { + this.child = child; + } else { + this.child.attach(child); + } + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitReplace(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index faa8a8fa6f9..b6f13cbd437 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -112,6 +112,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Search; @@ -144,6 +145,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + RexNode patternNode = rexVisitor.analyze(node.getPattern(), context); + RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context); + + List projectList = new ArrayList<>(); + List newFieldNames = new ArrayList<>(); + + // First add all original fields + for (String fieldName : fieldNames) { + RexNode fieldRef = context.relBuilder.field(fieldName); + projectList.add(fieldRef); + newFieldNames.add(fieldName); + } + + // Then add new fields with replaced content using new_ prefix + for (Field field : node.getFieldList()) { + String fieldName = field.getField().toString(); + RexNode fieldRef = context.relBuilder.field(fieldName); + + RexNode replaceCall = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); + projectList.add(replaceCall); + newFieldNames.add(NEW_FIELD_PREFIX + fieldName); + } + + context.relBuilder.project(projectList, newFieldNames); + return context.relBuilder.peek(); + } + private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); diff --git a/docs/category.json b/docs/category.json index 4eb954062a8..16b6916e2e9 100644 --- a/docs/category.json +++ b/docs/category.json @@ -63,6 +63,7 @@ "user/ppl/cmd/rex.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/timechart.rst", - "user/ppl/cmd/search.rst" + "user/ppl/cmd/search.rst", + "user/ppl/cmd/replace.rst" ] } diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst new file mode 100644 index 00000000000..f2a193e54ec --- /dev/null +++ b/docs/user/ppl/cmd/replace.rst @@ -0,0 +1,107 @@ +============= +replace +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``replace`` command to replace text in one or more fields in the search result. +* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country') +* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0') + + +Version +======= +3.3.0 + + +Syntax +============ +replace '' WITH '' IN [, ]... + +Note: This command is only available when Calcite engine is enabled. + +* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). +* replacement: mandatory. The text you want to replace with. +* field list: mandatory. One or more field names where the replacement should occur. + + +Example 1: Replace text in one field +==================================== + +The example shows replacing text in one field. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state; + fetched rows / total rows = 4/4 + +-------+-----------+ + | state | new_state | + |-------+-----------| + | IL | Illinois | + | TN | TN | + | VA | VA | + | MD | MD | + +-------+-----------+ + + +Example 2: Replace text in multiple fields +========================================== + +The example shows replacing text in multiple fields. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address; + fetched rows / total rows = 4/4 + +-------+----------------------+-----------+----------------------+ + | state | address | new_state | new_address | + |-------+----------------------+-----------+----------------------| + | IL | 880 Holmes Lane | Illinois | 880 Holmes Lane | + | TN | 671 Bristol Street | TN | 671 Bristol Street | + | VA | 789 Madison Street | VA | 789 Madison Street | + | MD | 467 Hutchinson Court | MD | 467 Hutchinson Court | + +-------+----------------------+-----------+----------------------+ + + +Example 3: Replace with IN clause and other commands +==================================================== + +The example shows using replace with other commands. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state; + fetched rows / total rows = 3/3 + +-------+-----+-----------+ + | state | age | new_state | + |-------+-----+-----------| + | IL | 32 | Illinois | + | TN | 36 | TN | + | MD | 33 | MD | + +-------+-----+-----------+ + +Example 4: Pattern matching with LIKE and replace +================================================= + +Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. + +PPL query:: + + os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address; + fetched rows / total rows = 1/1 + +-----------------+-------+--------+-----+--------+-----------------+ + | address | state | gender | age | city | new_address | + |-----------------+-------+--------+-----+--------+-----------------| + | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | + +-----------------+-------+--------+-----+--------+-----------------+ + +Note +==== +* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 09fad5c853d..76eac15273f 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited - `trendline command `_ + - `replace command `_ + - `where command `_ * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index d94bea7be77..a6f72b80e89 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -88,6 +88,7 @@ CalciteRegexCommandIT.class, CalciteRexCommandIT.class, CalciteRenameCommandIT.class, + CalciteReplaceCommandIT.class, CalciteResourceMonitorIT.class, CalciteSearchCommandIT.class, CalciteSettingsIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java new file mode 100644 index 00000000000..34eb31d97d8 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -0,0 +1,250 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteReplaceCommandIT extends PPLIntegTestCase { + + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testReplaceWithFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | fields name, age," + + " new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, schema("name", "string"), schema("age", "int"), schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", 70, "United States"), + rows("Hello", 30, "United States"), + rows("John", 25, "Canada"), + rows("Jane", 20, "Canada")); + } + + @Test + public void testMultipleReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | replace 'Jane' WITH" + + " 'Joseph' IN name", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_name", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "Jake"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "Hello"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "John"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Joseph")); + } + + @Test + public void testReplaceWithSort() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'US' WITH 'United States' IN country | sort new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testReplaceWithWhereClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | where country = 'US' | replace 'US' WITH 'United States' IN country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testEmptyStringReplacement() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH '' IN country", TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, ""), + rows("Hello", "USA", "New York", 4, 2023, 30, ""), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada")); + } + + @Test + public void testMultipleFieldsInClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country,state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "New York"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Quebec")); + } + + @Test + public void testReplaceNonExistentField() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN non_existent_field", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains( + e, + "field [non_existent_field] not found; input fields are: [name, country, state, month," + + " year, age, _id, _index, _score, _maxscore, _sort, _routing]"); + } + + @Test + public void testReplaceAfterFieldRemoved() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | fields name, age | replace 'USA' WITH 'United States' IN" + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "field [country] not found; input fields are: [name, age]"); + } + + @Test + public void testMissingInClause() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States'", + TEST_INDEX_STATE_COUNTRY))); + + verifyErrorMessageContains(e, "[] is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: 'IN'"); + } + + @Test + public void testDuplicateFieldsInReplace() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country, state," + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "Duplicate fields [country] in Replace command"); + } + + @Test + public void testNonStringLiteralPattern() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 23 WITH 'test' IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } + + @Test + public void testNonStringLiteralReplacement() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'test' WITH 45 IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 0bc7b784338..dff96256e85 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -78,6 +78,7 @@ commands | regexCommand | timechartCommand | rexCommand + | replaceCommand ; commandName @@ -115,6 +116,7 @@ commandName | REGEX | APPEND | REX + | REPLACE ; searchCommand @@ -200,6 +202,10 @@ renameCommand : RENAME renameClasue (COMMA? renameClasue)* ; +replaceCommand + : REPLACE pattern=stringLiteral WITH replacement=stringLiteral IN fieldList + ; + statsCommand : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 774eb73dff6..e6bfeb97e37 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -94,6 +94,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -398,6 +399,25 @@ public UnresolvedPlan visitRenameCommand(RenameCommandContext ctx) { .collect(Collectors.toList())); } + /** Replace command. */ + @Override + public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandContext ctx) { + UnresolvedExpression pattern = internalVisitExpression(ctx.pattern); + UnresolvedExpression replacement = internalVisitExpression(ctx.replacement); + + List fieldList = + ctx.fieldList().fieldExpression().stream() + .map(field -> (Field) internalVisitExpression(field)) + .collect(Collectors.toList()); + + return new Replace(pattern, replacement, fieldList); + } + + private String removeQuotes(String text) { + // Remove both single and double quotes + return text.replaceAll("^[\"']|[\"']$", ""); + } + /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d5c55d10258..c325878e728 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -77,6 +77,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.Search; @@ -276,6 +277,25 @@ public String visitRename(Rename node, String context) { return StringUtils.format("%s | rename %s", child, renames); } + @Override + public String visitReplace(Replace node, String context) { + // Get the child query string + String child = node.getChild().get(0).accept(this, context); + + // Get pattern and replacement expressions + String pattern = visitExpression(node.getPattern()); + String replacement = visitExpression(node.getReplacement()); + + // Get field list + String fieldListStr = + " IN " + + node.getFieldList().stream().map(Field::toString).collect(Collectors.joining(", ")); + + // Build the replace command string + return StringUtils.format( + "%s | replace %s WITH %s%s", child, pattern, replacement, fieldListStr); + } + /** Build {@link LogicalAggregation}. */ @Override public String visitAggregation(Aggregation node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java new file mode 100644 index 00000000000..95d37e9dcf7 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -0,0 +1,241 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import java.util.Collections; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.tree.Replace; + +public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { + + public CalcitePPLReplaceTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testBasicReplace() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" + + " COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" + + " COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" + + " COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" + + " COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" + + " COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" + + " COMM=0.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30; new_JOB=EMPLOYEE\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10; new_JOB=EMPLOYEE\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testMultipleFieldsReplace() { + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | replace \"20\" WITH \"RESEARCH\"" + + " IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)]," + + " new_DEPTNO=[REPLACE($7, '20':VARCHAR, 'RESEARCH':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`, " + + "REPLACE(`DEPTNO`, '20', 'RESEARCH') `new_DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceSameValueInMultipleFields() { + // In EMP table, both JOB and MGR fields contain numeric values + String ppl = "source=EMP | replace \"7839\" WITH \"CEO\" IN MGR, EMPNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_MGR=[REPLACE($3, '7839':VARCHAR, 'CEO':VARCHAR)]," + + " new_EMPNO=[REPLACE($0, '7839':VARCHAR, 'CEO':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`MGR`, '7839', 'CEO') `new_MGR`, " + + "REPLACE(`EMPNO`, '7839', 'CEO') `new_EMPNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithPipeline() { + String ppl = + "source=EMP | where JOB = 'CLERK' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | sort SAL"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)])\n" + + " LogicalFilter(condition=[=($2, 'CLERK':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` = 'CLERK'\n" + + "ORDER BY `SAL`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutWithKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutInKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" JOB"; + getRelNode(ppl); + } + + @Test(expected = RuntimeException.class) + public void testReplaceWithExpressionShouldFail() { + String ppl = "source=EMP | replace EMPNO + 1 WITH \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithInvalidFieldShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN INVALID_FIELD"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMultipleInKeywordsShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB IN ENAME"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingQuotesShouldFail() { + String ppl = "source=EMP | replace CLERK WITH EMPLOYEE IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingReplacementValueShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH IN JOB"; + getRelNode(ppl); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullPatternShouldFail() { + Replace replace = + new Replace(null, new Literal("EMPLOYEE", DataType.STRING), Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullReplacementShouldFail() { + Replace replace = + new Replace(new Literal("CLERK", DataType.STRING), null, Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNonStringPatternShouldFail() { + Replace replace = + new Replace( + new Literal(123, DataType.INTEGER), + new Literal("EMPLOYEE", DataType.STRING), + Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNonStringReplacementShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), + new Literal(456, DataType.INTEGER), + Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullFieldListShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), new Literal("EMPLOYEE", DataType.STRING), null); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithEmptyFieldListShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), + new Literal("EMPLOYEE", DataType.STRING), + Collections.emptyList()); + replace.validate(); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 5dfc73f5483..be5ccf87870 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -554,6 +554,49 @@ public void testGrok() { anonymize("source=t | grok email '.+@%{HOSTNAME:host}' | fields email, host")); } + @Test + public void testReplaceCommandSingleField() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname")); + } + + @Test + public void testReplaceCommandMultipleFields() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname1, fieldname2")); + } + + @Test(expected = Exception.class) + public void testReplaceCommandWithoutInShouldFail() { + anonymize("source=EMP | replace \"value\" WITH \"newvalue\""); + } + + @Test + public void testReplaceCommandSpecialCharactersInFields() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=user.name, fieldArgs=[])," + + " Field(field=user.email, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN user.name, user.email")); + } + + @Test + public void testReplaceCommandWithWildcards() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN fieldname")); + } + + @Test + public void testReplaceCommandWithMultipleWildcards() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"*TEST*\" WITH \"*NEW*\" IN fieldname1, fieldname2")); + } + @Test public void testPatterns() { when(settings.getSettingValue(Key.PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN");