-
Notifications
You must be signed in to change notification settings - Fork 181
Add replace command with Calcite #4248
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.ast.tree; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import java.util.HashSet; | ||
| import java.util.List; | ||
| import java.util.Set; | ||
| import java.util.stream.Collectors; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.Setter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
| import org.opensearch.sql.ast.expression.DataType; | ||
| import org.opensearch.sql.ast.expression.Field; | ||
| import org.opensearch.sql.ast.expression.Literal; | ||
| import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
|
||
| @Getter | ||
| @Setter | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = false) | ||
| public class Replace extends UnresolvedPlan { | ||
| private final UnresolvedExpression pattern; | ||
| private final UnresolvedExpression replacement; | ||
| private final List<Field> fieldList; | ||
| private UnresolvedPlan child; | ||
|
|
||
| public Replace( | ||
| UnresolvedExpression pattern, UnresolvedExpression replacement, List<Field> fieldList) { | ||
| this.pattern = pattern; | ||
| this.replacement = replacement; | ||
| this.fieldList = fieldList; | ||
| validate(); | ||
| } | ||
|
|
||
| public void validate() { | ||
| if (pattern == null) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The validation logic for the pattern expression hardcodes checks for mathematical operators ( Something like this: if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) {
throw new IllegalArgumentException("Replace pattern must be a string literal.");
}
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call! Updated in the next revision. |
||
| throw new IllegalArgumentException("Pattern expression cannot be null in Replace command"); | ||
| } | ||
| if (replacement == null) { | ||
| throw new IllegalArgumentException( | ||
| "Replacement expression cannot be null in Replace command"); | ||
| } | ||
|
|
||
| // Validate pattern is a string literal | ||
| if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) { | ||
| throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); | ||
| } | ||
|
|
||
| // Validate replacement is a string literal | ||
| if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) { | ||
| throw new IllegalArgumentException("Replacement must be a string literal in Replace command"); | ||
| } | ||
|
|
||
| if (fieldList == null || fieldList.isEmpty()) { | ||
RyanL1997 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| throw new IllegalArgumentException( | ||
| "Field list cannot be empty in Replace command. Use IN clause to specify the field."); | ||
| } | ||
|
|
||
| Set<String> uniqueFields = new HashSet<>(); | ||
| List<String> duplicates = | ||
| fieldList.stream() | ||
| .map(field -> field.getField().toString()) | ||
| .filter(fieldName -> !uniqueFields.add(fieldName)) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| if (!duplicates.isEmpty()) { | ||
| throw new IllegalArgumentException( | ||
| String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Replace attach(UnresolvedPlan child) { | ||
| if (null == this.child) { | ||
| this.child = child; | ||
| } else { | ||
| this.child.attach(child); | ||
| } | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public List<UnresolvedPlan> getChild() { | ||
| return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); | ||
| } | ||
|
|
||
| @Override | ||
| public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) { | ||
| return nodeVisitor.visitReplace(this, context); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,6 +112,7 @@ | |
| import org.opensearch.sql.ast.tree.Regex; | ||
| import org.opensearch.sql.ast.tree.Relation; | ||
| import org.opensearch.sql.ast.tree.Rename; | ||
| import org.opensearch.sql.ast.tree.Replace; | ||
| import org.opensearch.sql.ast.tree.Rex; | ||
| import org.opensearch.sql.ast.tree.SPath; | ||
| import org.opensearch.sql.ast.tree.Search; | ||
|
|
@@ -144,6 +145,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalciteP | |
|
|
||
| private final CalciteRexNodeVisitor rexVisitor; | ||
| private final CalciteAggCallVisitor aggVisitor; | ||
| private static final String NEW_FIELD_PREFIX = "new_"; | ||
|
|
||
| public CalciteRelNodeVisitor() { | ||
| this.rexVisitor = new CalciteRexNodeVisitor(this); | ||
|
|
@@ -2180,6 +2182,40 @@ public RelNode visitValues(Values values, CalcitePlanContext context) { | |
| } | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visitReplace(Replace node, CalcitePlanContext context) { | ||
| visitChildren(node, context); | ||
|
|
||
| List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); | ||
| RexNode patternNode = rexVisitor.analyze(node.getPattern(), context); | ||
| RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context); | ||
|
|
||
| List<RexNode> projectList = new ArrayList<>(); | ||
| List<String> newFieldNames = new ArrayList<>(); | ||
|
|
||
| // First add all original fields | ||
| for (String fieldName : fieldNames) { | ||
| RexNode fieldRef = context.relBuilder.field(fieldName); | ||
| projectList.add(fieldRef); | ||
| newFieldNames.add(fieldName); | ||
| } | ||
|
|
||
| // Then add new fields with replaced content using new_ prefix | ||
| for (Field field : node.getFieldList()) { | ||
| String fieldName = field.getField().toString(); | ||
| RexNode fieldRef = context.relBuilder.field(fieldName); | ||
|
|
||
| RexNode replaceCall = | ||
| context.relBuilder.call( | ||
| SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); | ||
| projectList.add(replaceCall); | ||
| newFieldNames.add(NEW_FIELD_PREFIX + fieldName); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks it does not check existing field and add suffix number as written in the doc.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes every replace command adds a
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was wondering the following doc description is right since this logic doesn't check existence of the field. (Is it something automatically done?)
|
||
| } | ||
|
|
||
| context.relBuilder.project(projectList, newFieldNames); | ||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
||
| private void buildParseRelNode(Parse node, CalcitePlanContext context) { | ||
| RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); | ||
| ParseMethod parseMethod = node.getParseMethod(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| ============= | ||
ykmr1224 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| replace | ||
| ============= | ||
|
|
||
| .. rubric:: Table of contents | ||
|
|
||
| .. contents:: | ||
| :local: | ||
| :depth: 2 | ||
|
|
||
|
|
||
| Description | ||
ykmr1224 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ============ | ||
| | Using ``replace`` command to replace text in one or more fields in the search result. | ||
| * The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country') | ||
| * If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0') | ||
|
|
||
|
|
||
| Version | ||
| ======= | ||
| 3.3.0 | ||
|
|
||
|
|
||
| Syntax | ||
| ============ | ||
| replace '<pattern>' WITH '<replacement>' IN <field-name>[, <field-name>]... | ||
|
|
||
| Note: This command is only available when Calcite engine is enabled. | ||
|
|
||
| * pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). | ||
| * replacement: mandatory. The text you want to replace with. | ||
| * field list: mandatory. One or more field names where the replacement should occur. | ||
ykmr1224 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| Example 1: Replace text in one field | ||
| ==================================== | ||
|
|
||
| The example shows replacing text in one field. | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state; | ||
| fetched rows / total rows = 4/4 | ||
| +-------+-----------+ | ||
| | state | new_state | | ||
| |-------+-----------| | ||
| | IL | Illinois | | ||
| | TN | TN | | ||
| | VA | VA | | ||
| | MD | MD | | ||
| +-------+-----------+ | ||
|
|
||
|
|
||
| Example 2: Replace text in multiple fields | ||
| ========================================== | ||
|
|
||
| The example shows replacing text in multiple fields. | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address; | ||
| fetched rows / total rows = 4/4 | ||
| +-------+----------------------+-----------+----------------------+ | ||
| | state | address | new_state | new_address | | ||
| |-------+----------------------+-----------+----------------------| | ||
| | IL | 880 Holmes Lane | Illinois | 880 Holmes Lane | | ||
| | TN | 671 Bristol Street | TN | 671 Bristol Street | | ||
| | VA | 789 Madison Street | VA | 789 Madison Street | | ||
| | MD | 467 Hutchinson Court | MD | 467 Hutchinson Court | | ||
| +-------+----------------------+-----------+----------------------+ | ||
|
|
||
|
|
||
| Example 3: Replace with IN clause and other commands | ||
| ==================================================== | ||
|
|
||
| The example shows using replace with other commands. | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state; | ||
| fetched rows / total rows = 3/3 | ||
| +-------+-----+-----------+ | ||
| | state | age | new_state | | ||
| |-------+-----+-----------| | ||
| | IL | 32 | Illinois | | ||
| | TN | 36 | TN | | ||
| | MD | 33 | MD | | ||
| +-------+-----+-----------+ | ||
|
|
||
| Example 4: Pattern matching with LIKE and replace | ||
| ================================================= | ||
|
|
||
| Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address; | ||
| fetched rows / total rows = 1/1 | ||
| +-----------------+-------+--------+-----+--------+-----------------+ | ||
| | address | state | gender | age | city | new_address | | ||
| |-----------------+-------+--------+-----+--------+-----------------| | ||
| | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | | ||
| +-----------------+-------+--------+-----+--------+-----------------+ | ||
|
|
||
| Note | ||
| ==== | ||
| * For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. | ||
Uh oh!
There was an error while loading. Please reload this page.