diff --git a/core/build.gradle b/core/build.gradle index c583c9c646..95873bddaa 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -54,6 +54,7 @@ dependencies { api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + api group: 'com.jayway.jsonpath', name: 'json-path', version: '2.9.0' api group: 'com.google.code.gson', name: 'gson', version: '2.8.9' api group: 'com.tdunning', name: 't-digest', version: '3.3' api project(':common') diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index cde00fcc92..f284820639 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -687,6 +687,10 @@ public static FunctionExpression jsonValid(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions); } + public static FunctionExpression jsonExtract(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_EXTRACT, expressions); + } + public static FunctionExpression stringToJson(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON, value); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 366321bed2..4fccb95ded 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -207,6 +207,7 @@ public enum BuiltinFunctionName { /** Json Functions. */ JSON_VALID(FunctionName.of("json_valid")), JSON(FunctionName.of("json")), + JSON_EXTRACT(FunctionName.of("json_extract")), /** GEOSPATIAL Functions. */ GEOIP(FunctionName.of("geoip")), diff --git a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java index 75f134aa4e..a9aa499897 100644 --- a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java @@ -23,6 +23,7 @@ public class JsonFunctions { public void register(BuiltinFunctionRepository repository) { repository.register(jsonValid()); repository.register(jsonFunction()); + repository.register(jsonExtract()); } private DefaultFunctionResolver jsonValid() { @@ -35,4 +36,12 @@ private DefaultFunctionResolver jsonFunction() { BuiltinFunctionName.JSON.getName(), impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING)); } + + private DefaultFunctionResolver jsonExtract() { + return define( + BuiltinFunctionName.JSON_EXTRACT.getName(), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING, STRING), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING, STRING, STRING)); + } } diff --git a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java index f38fe59789..6c2da05369 100644 --- a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java @@ -12,6 +12,11 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.InvalidJsonException; +import com.jayway.jsonpath.InvalidPathException; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.PathNotFoundException; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; @@ -79,6 +84,57 @@ public static ExprValue castJson(ExprValue json) { return processJsonNode(jsonNode); } + /** + * Extract value of JSON string at given JSON path. + * + * @param json JSON string (e.g. "{\"hello\": \"world\"}"). + * @param paths list of JSON path (e.g. "$.hello") + * @return ExprValue of value at given path of json string. + */ + public static ExprValue extractJson(ExprValue json, ExprValue... paths) { + List resultList = new ArrayList<>(paths.length); + + for (ExprValue path : paths) { + if (json.isNull() || json.isMissing()) { + return json; + } + + String jsonString = json.stringValue(); + String jsonPath = path.stringValue(); + + resultList.add(extractJsonPath(jsonString, jsonPath)); + } + + if (resultList.size() == 1) { + return resultList.getFirst(); + } else { + return new ExprCollectionValue(resultList); + } + } + + private static ExprValue extractJsonPath(String json, String path) { + if (json.isEmpty() || json.equals("null")) { + return LITERAL_NULL; + } + + try { + Object results = JsonPath.parse(json).read(path); + return ExprValueUtils.fromObjectValue(results); + } catch (PathNotFoundException ignored) { + return LITERAL_NULL; + } catch (InvalidPathException invalidPathException) { + final String errorFormat = "JSON path '%s' is not valid. Error details: %s"; + throw new SemanticCheckException( + String.format(errorFormat, path, invalidPathException.getMessage()), + invalidPathException); + } catch (InvalidJsonException invalidJsonException) { + final String errorFormat = "JSON string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException( + String.format(errorFormat, json, invalidJsonException.getMessage()), + invalidJsonException); + } + } + private static ExprValue processJsonNode(JsonNode jsonNode) { switch (jsonNode.getNodeType()) { case ARRAY: diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index bba8475c11..2d03329c20 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -13,15 +13,18 @@ import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.TestInstantiationException; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprDoubleValue; +import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprNullValue; @@ -32,6 +35,7 @@ import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.LiteralExpression; @@ -216,5 +220,188 @@ void json_returnsSemanticCheckException() { SemanticCheckException.class, () -> DSL.castJson(expr).valueOf(), "Expected to throw SemanticCheckException when calling castJson with " + expr)); + + // invalid type + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("invalid")).valueOf()); + + // missing bracket + assertThrows(SemanticCheckException.class, () -> DSL.castJson(DSL.literal("{{[}}")).valueOf()); + + // missing quote + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("\"missing quote")).valueOf()); + } + + @Test + void json_extract_search() { + ExprValue expected = new ExprIntegerValue(1); + assert_equals_extract_json(expected, "{\"a\":1}", "$.a"); + } + + @Test + void json_extract_search_arrays() { + String jsonArray = "{\"a\":[1,2.3,\"abc\",true,null,{\"c\":{\"d\":1}},[1,2,3]]}"; + List expectedExprValues = + List.of( + new ExprIntegerValue(1), + new ExprFloatValue(2.3), + new ExprStringValue("abc"), + LITERAL_TRUE, + LITERAL_NULL, + ExprTupleValue.fromExprValueMap( + Map.of("c", ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))))), + new ExprCollectionValue( + List.of( + new ExprIntegerValue(1), new ExprIntegerValue(2), new ExprIntegerValue(3)))); + + // extract specific index from JSON list + for (int i = 0; i < expectedExprValues.size(); i++) { + String path = String.format("$.a[%d]", i); + assert_equals_extract_json(expectedExprValues.get(i), jsonArray, path); + } + + // extract nested object + ExprValue nestedExpected = + ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))); + assert_equals_extract_json(nestedExpected, jsonArray, "$.a[5].c"); + + // extract * from JSON list + ExprValue starExpected = new ExprCollectionValue(expectedExprValues); + assert_equals_extract_json(starExpected, jsonArray, "$.a[*]"); + } + + @Test + void json_extract_returns_null() { + List jsonStrings = + List.of( + "{\"a\":\"1\",\"b\":\"2\"}", + "{\"a\":1,\"b\":{\"c\":2,\"d\":3}}", + "{\"arr1\": [1,2,3], \"arr2\": [4,5,6]}", + "[1, 2, 3, 4]", + "[{\"a\":1,\"b\":2}, {\"c\":3,\"d\":2}]", + "\"abc\"", + "1234", + "12.34", + "true", + "false", + ""); + + jsonStrings.forEach( + str -> assert_equals_extract_json(LITERAL_NULL, str, "$.a.path_not_found_key")); + + // null string literal + assert_equals_extract_json(LITERAL_NULL, "null", "$.a"); + + // null json + assertEquals( + LITERAL_NULL, DSL.jsonExtract(DSL.literal(LITERAL_NULL), DSL.literal("$.a")).valueOf()); + + // missing json + assertEquals( + LITERAL_MISSING, + DSL.jsonExtract(DSL.literal(LITERAL_MISSING), DSL.literal("$.a")).valueOf()); + + // array out of bounds + assert_equals_extract_json(LITERAL_NULL, "{\"a\":[1,2,3]}", "$.a[4]"); + } + + @Test + void json_extract_throws_SemanticCheckException() { + // invalid path + SemanticCheckException invalidPathError = + assertThrows( + SemanticCheckException.class, + () -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal("$a")).valueOf()); + assertEquals( + "JSON path '$a' is not valid. Error details: Illegal character at position 1 expected" + + " '.' or '['", + invalidPathError.getMessage()); + + // invalid json + SemanticCheckException invalidJsonError = + assertThrows( + SemanticCheckException.class, + () -> + DSL.jsonExtract( + DSL.literal("{\"invalid\":\"json\", \"string\"}"), DSL.literal("$.a")) + .valueOf()); + assertTrue( + invalidJsonError + .getMessage() + .startsWith( + "JSON string '{\"invalid\":\"json\", \"string\"}' is not valid. Error" + + " details:")); + } + + @Test + void json_extract_throws_ExpressionEvaluationException() { + // null path + assert_throws_extract_json(ExpressionEvaluationException.class, "{\"a\":1}", LITERAL_NULL); + + // missing path + assert_throws_extract_json(ExpressionEvaluationException.class, "{\"a\":1}", LITERAL_MISSING); + } + + @Test + void json_extract_search_list_of_paths() { + final String objectJson = + "{\"foo\": \"foo\", \"fuzz\": true, \"bar\": 1234, \"bar2\": 12.34, \"baz\": null, " + + "\"obj\": {\"internal\": \"value\"}, \"arr\": [\"string\", true, null]}"; + + // scalar results with one invalid path + ExprValue expected_scalar_results = + new ExprCollectionValue( + List.of(new ExprStringValue("foo"), new ExprFloatValue(12.34), LITERAL_NULL)); + + assert_equals_extract_json(expected_scalar_results, objectJson, "$.foo", "$.bar2", "$.potato"); + + ExprValue expected_multivalued_results = + new ExprCollectionValue( + List.of( + new ExprCollectionValue( + List.of(new ExprStringValue("string"), LITERAL_TRUE, LITERAL_NULL)), + ExprTupleValue.fromExprValueMap(Map.of("internal", new ExprStringValue("value"))), + new ExprFloatValue(12.34))); + + // path returns array and struct + assert_equals_extract_json( + expected_multivalued_results, objectJson, "$.arr", "$.obj", "$.bar2"); + + // path returns multivalued result + assert_equals_extract_json( + expected_multivalued_results, objectJson, "$.arr[*]", "$.obj", "$.bar2"); + } + + private static void assert_equals_extract_json(ExprValue expected, Object json, Object... paths) { + ExprValue actual = execute_extract_json(json, paths); + assertEquals(expected, actual); + } + + private static void assert_throws_extract_json( + Class expectedError, Object json, Object... paths) { + assertThrows(expectedError, () -> execute_extract_json(json, paths)); + } + + private static ExprValue execute_extract_json(Object json, Object[] paths) { + Expression jsonExpr = object_to_expr(json); + List pathExpressions = + Arrays.stream(paths).map(JsonFunctionsTest::object_to_expr).toList(); + + return switch (paths.length) { + case 1 -> DSL.jsonExtract(jsonExpr, pathExpressions.getFirst()).valueOf(); + case 2 -> DSL.jsonExtract(jsonExpr, pathExpressions.getFirst(), pathExpressions.get(1)) + .valueOf(); + case 3 -> DSL.jsonExtract( + jsonExpr, pathExpressions.getFirst(), pathExpressions.get(1), pathExpressions.get(2)) + .valueOf(); + default -> throw new TestInstantiationException("Invalid number of paths provided."); + }; + } + + private static Expression object_to_expr(Object val) { + return (val instanceof String) + ? DSL.literal(ExprValueUtils.stringValue((String) val)) + : DSL.literal((ExprValue) val); } } diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index 77d9d00f45..54cd190650 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -22,18 +22,19 @@ Return type: BOOLEAN Example:: - > source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid - fetched rows / total rows = 6/6 - +---------------------+---------------------------------+----------+ - | test_name | json_string | is_valid | - |---------------------|---------------------------------|----------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | - | json object | {"a":"1","b":"2"} | True | - | json array | [1, 2, 3, 4] | True | - | json scalar string | "abc" | True | - | json empty string | | True | - | json invalid object | {"invalid":"json", "string"} | False | - +---------------------+---------------------------------+----------+ + os> source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid + fetched rows / total rows = 7/7 + +---------------------+--------------------------------------+----------+ + | test_name | json_string | is_valid | + |---------------------+--------------------------------------+----------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | True | + | json object | {"a":"1","b":"2"} | True | + | json array | [1, 2, 3, 4] | True | + | json scalar string | "abc" | True | + | json empty string | | True | + | json invalid object | {"invalid":"json", "string"} | False | + +---------------------+--------------------------------------+----------+ JSON ---------- @@ -49,14 +50,73 @@ Return type: BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY Example:: - > source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json - fetched rows / total rows = 5/5 - +---------------------+---------------------------------+-------------------------+ - | test_name | json_string | json | - |---------------------|---------------------------------|-------------------------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {a:"1",b:{c:"2",d:"3"}} | - | json object | {"a":"1","b":"2"} | {a:"1",b:"2"} | - | json array | [1, 2, 3, 4] | [1,2,3,4] | - | json scalar string | "abc" | "abc" | - | json empty string | | null | - +---------------------+---------------------------------+-------------------------+ + os> source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json + fetched rows / total rows = 6/6 + +--------------------+--------------------------------------+-------------------------------------------+ + | test_name | json_string | json | + |--------------------+--------------------------------------+-------------------------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {'a': '1', 'b': {'c': '2', 'd': '3'}} | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | {'a': '1', 'b': [{'c': '2'}, {'c': '3'}]} | + | json object | {"a":"1","b":"2"} | {'a': '1', 'b': '2'} | + | json array | [1, 2, 3, 4] | [1,2,3,4] | + | json scalar string | "abc" | abc | + | json empty string | | null | + +--------------------+--------------------------------------+-------------------------------------------+ + +JSON_EXTRACT +____________ + +Description +>>>>>>>>>>> + +Usage: `json_extract(doc, path[, path[, path])` Extracts a JSON value from a json document based on the path(s) specified. + +Argument type: STRING, STRING[, STRING[, STRING]] + +Return type: STRING/BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY + +- Up to 3 paths can be provided, and results of all possible `path`s will be returned in an ARRAY. +- If only one `path` is provided, returns an ARRAY if `path` points to multiple results (e.g. $.a[*]) or if the `path` points to an array. +- Return null if `path` is not valid, or if JSON `doc` is MISSING or NULL. +- If multiple paths are provided with paths that are not valid, will return an ARRAY where results of invalid paths are null. +- Throws SemanticCheckException if `doc` or any `path` is malformed. +- Throws ExpressionEvaluationException if any `path` is missing. + +Example:: + + os> source=json_test | where json_valid(json_string) | eval json_extract=json_extract(json_string, '$.b') | fields test_name, json_string, json_extract + fetched rows / total rows = 6/6 + +--------------------+--------------------------------------+-------------------------+ + | test_name | json_string | json_extract | + |--------------------+--------------------------------------+-------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {'c': '2', 'd': '3'} | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [{'c': '2'},{'c': '3'}] | + | json object | {"a":"1","b":"2"} | 2 | + | json array | [1, 2, 3, 4] | null | + | json scalar string | "abc" | null | + | json empty string | | null | + +--------------------+--------------------------------------+-------------------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.b[1].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | 3 | + +------------------+--------------------------------------+--------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.b[*].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [2,3] | + +------------------+--------------------------------------+--------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.a', '$.b[*].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [1,[2,3]] | + +------------------+--------------------------------------+--------------+ diff --git a/doctest/test_data/json_test.json b/doctest/test_data/json_test.json index 7494fc4aa9..63e7f15011 100644 --- a/doctest/test_data/json_test.json +++ b/doctest/test_data/json_test.json @@ -1,4 +1,5 @@ {"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} {"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} {"test_name":"json array", "json_string":"[1, 2, 3, 4]"} {"test_name":"json scalar string", "json_string":"\"abc\""} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java index b6a2d5e4aa..157e1d5b02 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java @@ -17,6 +17,7 @@ import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; public class JsonFunctionsIT extends PPLIntegTestCase { @Override @@ -46,7 +47,8 @@ public void test_json_valid() throws IOException { rows("json scalar double"), rows("json scalar boolean true"), rows("json scalar boolean false"), - rows("json empty string")); + rows("json empty string"), + rows("json nested list")); } @Test @@ -88,7 +90,10 @@ public void test_cast_json() throws IOException { rows("json scalar double", 2.99792458e8), rows("json scalar boolean true", true), rows("json scalar boolean false", false), - rows("json empty string", null)); + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); } @Test @@ -120,7 +125,24 @@ public void test_json() throws IOException { rows("json scalar double", 2.99792458e8), rows("json scalar boolean true", true), rows("json scalar boolean false", false), - rows("json empty string", null)); + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); + } + + @Test + public void test_json_throws_SemanticCheckException() throws IOException { + // Invalid json provided + try { + executeQuery( + String.format( + "source=%s | where not json_valid(json_string) | eval" + + " casted=json(json_string) | fields test_name, casted", + TEST_INDEX_JSON_TEST)); + } catch (ResponseException invalidJsonException) { + assertTrue(invalidJsonException.getMessage().contains("SemanticCheckException")); + } } @Test @@ -183,4 +205,101 @@ public void test_cast_json_scalar_to_type() throws IOException { verifyDataRows( result, rows("json scalar boolean true", true), rows("json scalar boolean false", false)); } + + @Test + public void test_json_extract() throws IOException { + JSONObject result; + result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | eval" + + " extracted=json_extract(json_string, '$.b') | fields test_name, extracted", + TEST_INDEX_JSON_TEST)); + verifySchema( + result, schema("test_name", null, "string"), schema("extracted", null, "undefined")); + verifyDataRows( + result, + rows("json nested object", new JSONObject(Map.of("c", "3"))), + rows("json object", "2"), + rows("json array", null), + rows("json nested array", null), + rows("json scalar string", null), + rows("json scalar int", null), + rows("json scalar float", null), + rows("json scalar double", null), + rows("json scalar boolean true", null), + rows("json scalar boolean false", null), + rows("json empty string", null), + rows("json nested list", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3"))))); + } + + @Test + public void test_json_extract_multiple_paths() throws IOException { + JSONObject resultTwoPaths = + executeQuery( + String.format( + "source=%s | where test_name = 'json nested list' | eval" + + " extracted=json_extract(json_string, '$.a', '$.b') | fields test_name," + + " extracted", + TEST_INDEX_JSON_TEST)); + verifySchema( + resultTwoPaths, + schema("test_name", null, "string"), + schema("extracted", null, "undefined")); + verifyDataRows( + resultTwoPaths, + rows( + "json nested list", + List.of("1", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3")))))); + + JSONObject resultThreePaths = + executeQuery( + String.format( + "source=%s | where test_name = 'json nested list' | eval" + + " extracted=json_extract(json_string, '$.a', '$.b[0].c', '$.b[1].c') | fields" + + " test_name, extracted", + TEST_INDEX_JSON_TEST)); + verifySchema( + resultThreePaths, + schema("test_name", null, "string"), + schema("extracted", null, "undefined")); + verifyDataRows(resultThreePaths, rows("json nested list", List.of("1", "2", "3"))); + } + + @Test + public void test_json_extract_throws_SemanticCheckException() throws IOException { + // Invalid json provided + try { + executeQuery( + String.format( + "source=%s | where not json_valid(json_string) | eval" + + " extracted=json_extract(json_string, '$.a') | fields test_name, extracted", + TEST_INDEX_JSON_TEST)); + } catch (ResponseException invalidJsonException) { + assertTrue(invalidJsonException.getMessage().contains("SemanticCheckException")); + } + + // Invalid path provided + try { + executeQuery( + String.format( + "source=%s | where test_name = 'json nested list' | eval" + + " extracted=json_extract(json_string, '$a') | fields test_name, extracted", + TEST_INDEX_JSON_TEST)); + } catch (ResponseException invalidPathException) { + assertTrue(invalidPathException.getMessage().contains("SemanticCheckException")); + } + + // Invalid path with multiple paths provided + try { + executeQuery( + String.format( + "source=%s | where test_name = 'json nested list' | eval" + + " extracted=json_extract(json_string, '$a', '$.b') | fields test_name," + + " extracted", + TEST_INDEX_JSON_TEST)); + } catch (ResponseException invalidPathException) { + assertTrue(invalidPathException.getMessage().contains("SemanticCheckException")); + } + } } diff --git a/integ-test/src/test/resources/json_test.json b/integ-test/src/test/resources/json_test.json index 6fd9211229..9d6984720e 100644 --- a/integ-test/src/test/resources/json_test.json +++ b/integ-test/src/test/resources/json_test.json @@ -24,3 +24,5 @@ {"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} {"index":{"_id":"12"}} {"test_name":"json null", "json_string":null} +{"index":{"_id":"13"}} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 0307fb4ca1..a338e4fa3b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -334,6 +334,7 @@ CIDRMATCH: 'CIDRMATCH'; // JSON FUNCTIONS JSON_VALID: 'JSON_VALID'; JSON: 'JSON'; +JSON_EXTRACT: 'JSON_EXTRACT'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 451edeb29b..54a5e7e57c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -709,6 +709,7 @@ positionFunctionName jsonFunctionName : JSON + | JSON_EXTRACT ; // operators