Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Replace;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.Search;
Expand Down Expand Up @@ -788,6 +789,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con
return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context));
}

@Override
public LogicalPlan visitReplace(Replace node, AnalysisContext context) {
throw getOnlyForCalciteException("Replace");
}

@Override
public LogicalPlan visitJoin(Join node, AnalysisContext context) {
throw getOnlyForCalciteException("Join");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Replace;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.SPath;
Expand Down Expand Up @@ -244,6 +245,10 @@ public T visitRename(Rename node, C context) {
return visitChildren(node, context);
}

public T visitReplace(Replace node, C context) {
return visitChildren(node, context);
}

public T visitEval(Eval node, C context) {
return visitChildren(node, context);
}
Expand Down
97 changes: 97 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/Replace.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

@Getter
@Setter
@ToString
@EqualsAndHashCode(callSuper = false)
public class Replace extends UnresolvedPlan {
private final UnresolvedExpression pattern;
private final UnresolvedExpression replacement;
private final List<Field> fieldList;
private UnresolvedPlan child;

public Replace(
UnresolvedExpression pattern, UnresolvedExpression replacement, List<Field> fieldList) {
this.pattern = pattern;
this.replacement = replacement;
this.fieldList = fieldList;
validate();
}

public void validate() {
if (pattern == null) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The validation logic for the pattern expression hardcodes checks for mathematical operators (+, -, *, /) on L44-47. This is may miss other invalid expressions in my opinion. Maybe consider validating that pattern is a string literal by checking its type (e.g., Literal with DataType.STRING)?

Something like this:

if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) {
    throw new IllegalArgumentException("Replace pattern must be a string literal.");
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call! Updated in the next revision.

throw new IllegalArgumentException("Pattern expression cannot be null in Replace command");
}
if (replacement == null) {
throw new IllegalArgumentException(
"Replacement expression cannot be null in Replace command");
}

// Validate pattern is a string literal
if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) {
throw new IllegalArgumentException("Pattern must be a string literal in Replace command");
}

// Validate replacement is a string literal
if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) {
throw new IllegalArgumentException("Replacement must be a string literal in Replace command");
}

if (fieldList == null || fieldList.isEmpty()) {
throw new IllegalArgumentException(
"Field list cannot be empty in Replace command. Use IN clause to specify the field.");
}

Set<String> uniqueFields = new HashSet<>();
List<String> duplicates =
fieldList.stream()
.map(field -> field.getField().toString())
.filter(fieldName -> !uniqueFields.add(fieldName))
.collect(Collectors.toList());

if (!duplicates.isEmpty()) {
throw new IllegalArgumentException(
String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates)));
}
}

@Override
public Replace attach(UnresolvedPlan child) {
if (null == this.child) {
this.child = child;
} else {
this.child.attach(child);
}
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitReplace(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Replace;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Search;
Expand Down Expand Up @@ -144,6 +145,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalciteP

private final CalciteRexNodeVisitor rexVisitor;
private final CalciteAggCallVisitor aggVisitor;
private static final String NEW_FIELD_PREFIX = "new_";

public CalciteRelNodeVisitor() {
this.rexVisitor = new CalciteRexNodeVisitor(this);
Expand Down Expand Up @@ -2180,6 +2182,40 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
}
}

@Override
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
visitChildren(node, context);

List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
RexNode patternNode = rexVisitor.analyze(node.getPattern(), context);
RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context);

List<RexNode> projectList = new ArrayList<>();
List<String> newFieldNames = new ArrayList<>();

// First add all original fields
for (String fieldName : fieldNames) {
RexNode fieldRef = context.relBuilder.field(fieldName);
projectList.add(fieldRef);
newFieldNames.add(fieldName);
}

// Then add new fields with replaced content using new_ prefix
for (Field field : node.getFieldList()) {
String fieldName = field.getField().toString();
RexNode fieldRef = context.relBuilder.field(fieldName);

RexNode replaceCall =
context.relBuilder.call(
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
projectList.add(replaceCall);
newFieldNames.add(NEW_FIELD_PREFIX + fieldName);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks it does not check existing field and add suffix number as written in the doc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes every replace command adds a new_ column and do not conflict with existing column names. Let me know if you think we should change the behavior.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering the following doc description is right since this logic doesn't check existence of the field. (Is it something automatically done?)

* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0')

}

context.relBuilder.project(projectList, newFieldNames);
return context.relBuilder.peek();
}

private void buildParseRelNode(Parse node, CalcitePlanContext context) {
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
ParseMethod parseMethod = node.getParseMethod();
Expand Down
3 changes: 2 additions & 1 deletion docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"user/ppl/cmd/rex.rst",
"user/ppl/cmd/stats.rst",
"user/ppl/cmd/timechart.rst",
"user/ppl/cmd/search.rst"
"user/ppl/cmd/search.rst",
"user/ppl/cmd/replace.rst"
]
}
107 changes: 107 additions & 0 deletions docs/user/ppl/cmd/replace.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
=============
replace
=============

.. rubric:: Table of contents

.. contents::
:local:
:depth: 2


Description
============
| Using ``replace`` command to replace text in one or more fields in the search result.
* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country')
* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0')


Version
=======
3.3.0


Syntax
============
replace '<pattern>' WITH '<replacement>' IN <field-name>[, <field-name>]...

Note: This command is only available when Calcite engine is enabled.

* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
* replacement: mandatory. The text you want to replace with.
* field list: mandatory. One or more field names where the replacement should occur.


Example 1: Replace text in one field
====================================

The example shows replacing text in one field.

PPL query::

os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state;
fetched rows / total rows = 4/4
+-------+-----------+
| state | new_state |
|-------+-----------|
| IL | Illinois |
| TN | TN |
| VA | VA |
| MD | MD |
+-------+-----------+


Example 2: Replace text in multiple fields
==========================================

The example shows replacing text in multiple fields.

PPL query::

os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address;
fetched rows / total rows = 4/4
+-------+----------------------+-----------+----------------------+
| state | address | new_state | new_address |
|-------+----------------------+-----------+----------------------|
| IL | 880 Holmes Lane | Illinois | 880 Holmes Lane |
| TN | 671 Bristol Street | TN | 671 Bristol Street |
| VA | 789 Madison Street | VA | 789 Madison Street |
| MD | 467 Hutchinson Court | MD | 467 Hutchinson Court |
+-------+----------------------+-----------+----------------------+


Example 3: Replace with IN clause and other commands
====================================================

The example shows using replace with other commands.

PPL query::

os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state;
fetched rows / total rows = 3/3
+-------+-----+-----------+
| state | age | new_state |
|-------+-----+-----------|
| IL | 32 | Illinois |
| TN | 36 | TN |
| MD | 33 | MD |
+-------+-----+-----------+

Example 4: Pattern matching with LIKE and replace
=================================================

Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs.

PPL query::

os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address;
fetched rows / total rows = 1/1
+-----------------+-------+--------+-----+--------+-----------------+
| address | state | gender | age | city | new_address |
|-----------------+-------+--------+-----+--------+-----------------|
| 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane |
+-----------------+-------+--------+-----+--------+-----------------+

Note
====
* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged.
2 changes: 2 additions & 0 deletions docs/user/ppl/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited

- `trendline command <cmd/trendline.rst>`_

- `replace command <cmd/replace.rst>`_

- `where command <cmd/where.rst>`_

* **Functions**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
CalciteRegexCommandIT.class,
CalciteRexCommandIT.class,
CalciteRenameCommandIT.class,
CalciteReplaceCommandIT.class,
CalciteResourceMonitorIT.class,
CalciteSearchCommandIT.class,
CalciteSettingsIT.class,
Expand Down
Loading
Loading