diff --git a/compiler/test/stdlib/json.test.gr b/compiler/test/stdlib/json.test.gr new file mode 100644 index 000000000..9dd0dbf77 --- /dev/null +++ b/compiler/test/stdlib/json.test.gr @@ -0,0 +1,1101 @@ +/* +Copyright (c) 2016 Maciej Hirsz + +The MIT License (MIT) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +A Number of the tests in this file are taken from +https://github.com/maciejhirsz/json-rust/blob/master/tests/number.rs + +A few tests are taken from +http://www.json.org/JSON_checker/ + +*/ +module JsonTest + +from "json" include Json +from "result" include Result +from "buffer" include Buffer +from "char" include Char +from "list" include List +use Json.* +module Validation { + // Valid + assert Result.isOk( + parse( + "[\r\n \"JSON Test Pattern pass1\",\r\n {\"object with 1 member\":[\"array with 1 element\"]},\r\n {},\r\n [],\r\n -42,\r\n true,\r\n false,\r\n null,\r\n {\r\n \"integer\": 1234567890,\r\n \"real\": -9876.543210,\r\n \"e\": 0.123456789e-12,\r\n \"E\": 1.234567890E+34,\r\n \"\": 23456789012E66,\r\n \"zero\": 0,\r\n \"one\": 1,\r\n \"space\": \" \",\r\n \"quote\": \"\\\"\",\r\n \"backslash\": \"\\\\\",\r\n \"controls\": \"\\b\\f\\n\\r\\t\",\r\n \"slash\": \"\/ & \\\/\",\r\n \"alpha\": \"abcdefghijklmnopqrstuvwyz\",\r\n \"ALPHA\": \"ABCDEFGHIJKLMNOPQRSTUVWYZ\",\r\n \"digit\": \"0123456789\",\r\n \"0123456789\": \"digit\",\r\n \"special\": \"`1~!@#$%^&*()_+-={':[,]}|;.<\/>?\",\r\n \"hex\": \"\\u0123\\u4567\\u89AB\\uCDEF\\uabcd\\uef4A\",\r\n \"true\": true,\r\n \"false\": false,\r\n \"null\": null,\r\n \"array\":[ ],\r\n \"object\":{ },\r\n \"address\": \"50 St. James Street\",\r\n \"url\": \"http:\/\/www.JSON.org\/\",\r\n \"comment\": \"\/\/ \/* *\/\": \" \",\r\n \" s p a c e d \" :[1,2 , 3\r\n\r\n,\r\n\r\n4 , 5 , 6 ,7 ],\"compact\":[1,2,3,4,5,6,7],\r\n \"jsontext\": \"{\\\"object with 1 member\\\":[\\\"array with 1 element\\\"]}\",\r\n \"quotes\": \"" \\u0022 %22 0x22 034 "\",\r\n \"\\\/\\\\\\\"\\uCAFE\\uBABE\\uAB98\\uFCDE\\ubcda\\uef4A\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:',.\/<>?\"\r\n: \"A key can be any string\"\r\n },\r\n 0.5 ,98.6\r\n,\r\n99.44\r\n,\r\n\r\n1066,\r\n1e1,\r\n0.1e1,\r\n1e-1,\r\n1e00,2e+00,2e-00\r\n,\"rosebud\"]" + ) + ) + assert Result.isOk( + parse( + "{\r\n \"Image\": {\r\n \"Width\": 800,\r\n \"Height\": 600,\r\n \"Title\": \"View from 15th Floor\",\r\n \"Thumbnail\": {\r\n \"Url\": \"http:\/\/www.example.com\/image\/481989943\",\r\n \"Height\": 125,\r\n \"Width\": 100\r\n },\r\n \"Animated\" : false,\r\n \"IDs\": [116, 943, 234, 38793]\r\n }\r\n }" + ) + ) + assert Result.isOk( + parse( + "{ + \"JSON Test Pattern pass3\": { + \"The outermost value\": \"must be an object or array.\", + \"In this test\": \"It is an object.\" + } + }" + ) + ) + let depth = 256 + let text = Buffer.make(2052) + for (let mut i = 0; i < depth; i += 1) { + Buffer.addString("[{\"a\":", text) + } + Buffer.addString("null", text) + for (let mut i = 0; i < depth; i += 1) { + Buffer.addString("}]", text) + } + assert Result.isOk(parse(Buffer.toString(text))) + let text = Buffer.make(101) + Buffer.addString("8", text) + for (let mut i = 0; i < 100; i += 1) { + Buffer.addString("0", text) + } + assert Result.isOk(parse(Buffer.toString(text))) + // Invalid + for (let mut i = 0x0000; i <= 0x0001F; i += 1) { + assert Result.isErr(parse("\"" ++ Char.toString(Char.fromCode(i)) ++ "\"")) + } + assert Result.isErr(parse("[\"Unclosed array\"")) + assert Result.isErr(parse("{unquoted_key: \"keys must be quoted\"}")) + assert Result.isErr(parse("[\"extra comma\",]")) + assert Result.isErr(parse("[\"double extra comma\",,]")) + assert Result.isErr(parse("[ , \"<-- missing value\"]")) + assert Result.isErr(parse("[\"Comma after the close\"],")) + assert Result.isErr(parse("[\"Extra close\"]]")) + assert Result.isErr(parse("{\"Extra comma\": true,}")) + assert Result.isErr( + parse("{\"Extra value after close\": true} \"misplaced quoted value\"") + ) + assert Result.isErr(parse("{\"Illegal expression\": 1 + 2}")) + assert Result.isErr(parse("{\"Illegal invocation\": alert()}")) + assert Result.isErr(parse("{\"Numbers cannot have leading zeroes\": 013}")) + assert Result.isErr(parse("{\"Numbers cannot be hex\": 0x14}")) + assert Result.isErr(parse("[\"Illegal backslash escape: \\x15\"]")) + assert Result.isErr(parse("[\\naked]")) + assert Result.isErr(parse("[\"Illegal backslash escape: \\017\"]")) + assert Result.isErr(parse("{\"Missing colon\" null}")) + assert Result.isErr(parse("{\"Double colon\":: null}")) + assert Result.isErr(parse("{\"Comma instead of colon\", null}")) + assert Result.isErr(parse("[\"Colon instead of comma\": false]")) + assert Result.isErr(parse("[\"Bad value\", truth]")) + assert Result.isErr(parse("['single quote']")) + assert Result.isErr(parse("[\" tab character in string \"]")) + assert Result.isErr(parse("[\"tab\\ character\\ in\\ string\\ \"]")) + assert Result.isErr(parse("[\"line\nbreak\"]")) + assert Result.isErr(parse("[\"line\\\nbreak\"]")) + assert Result.isErr(parse("[0e]")) + assert Result.isErr(parse("[0e+]")) + assert Result.isErr(parse("[0e+-1]")) + assert Result.isErr(parse("{\"Comma instead if closing brace\": true,")) + assert Result.isErr(parse("[\"mismatch\"}")) + assert Result.isErr(parse("1.")) + assert Result.isErr(parse(".05")) + assert Result.isErr(parse("-01")) + assert Result.isErr(parse("01")) + assert Result.isErr(parse("0e")) + assert Result.isErr(parse("0e-")) + assert Result.isErr(parse("0e+")) + assert Result.isErr(parse("[,]")) + assert Result.isErr(parse("[1,]")) + assert Result.isErr(parse("[,1]")) + assert Result.isErr(parse("[,1,]")) + assert Result.isErr(parse("\"\\uD834 \\uDD1E\"")) + assert Result.isErr(parse("{}error")) + assert Result.isErr(parse("[]error")) + assert Result.isErr(parse("]error")) + assert Result.isErr(parse("]")) + assert Result.isErr(parse("}")) + assert Result.isErr(parse("{")) + assert Result.isErr(parse("\"k\":\"v\"")) + assert Result.isErr(parse("[")) + assert Result.isErr(parse("{\"k")) + assert Result.isErr(parse("{\"k\":")) + assert Result.isErr(parse("{\"k\":\"v")) + assert Result.isErr(parse("{\"k\":\"v\"")) + // UTF-16 surrogate pairs should be the high half followed by the low half. + + // Two high surrogates. + assert match (parse("\"\\uD801\\uD801\"")) { + Err(InvalidUTF16SurrogatePair(_)) => true, + _ => false, + } + + // Inverted low and high surrogate order. + assert match (parse("\"\\uDC37\\uD801\"")) { + Err(InvalidUTF16SurrogatePair(_)) => true, + _ => false, + } + + // Single low surrogate + assert match (parse("\"\\uDC37\"")) { + Err(InvalidUTF16SurrogatePair(_)) => true, + _ => false, + } + + // Single high surrogate + assert match (parse("\"\\uD801\"")) { + Err(UnexpectedToken(_)) => true, + _ => false, + } + + // High surrogate + non surrogate code point + assert match (parse("\"\\uD801\\u0524\"")) { + Err(InvalidUTF16SurrogatePair(_)) => true, + _ => false, + } + + // Low surrogate + non surrogate code point + assert match (parse("\"\\uDC37\\u0524\"")) { + Err(InvalidUTF16SurrogatePair(_)) => true, + _ => false, + } +} +module Parse { + // Constants + assert parse("true") == Ok(JsonBoolean(true)) + assert parse("false") == Ok(JsonBoolean(false)) + assert parse("null") == Ok(JsonNull) + // Numbers + assert parse("0") == Ok(JsonNumber(0)) + assert match (parse("-0")) { + Ok(JsonNumber(n)) => 1.0 / n == -Infinity, + _ => false, + } + assert match (parse("-0.0")) { + Ok(JsonNumber(n)) => 1.0 / n == -Infinity, + _ => false, + } + assert parse("3.141592653589793") == Ok(JsonNumber(3.141592653589793)) + assert parse("0.05") == Ok(JsonNumber(0.05)) + // These tests are not testing accuracy, grain uses f64 which these exhaust its just checking that we can handle parsing correctly and equivalently to grain + assert parse("2.22507385850720113605740979670913197593481954635164564e-308") == + Ok(JsonNumber(2.22507385850720113605740979670913197593481954635164564e-308)) + assert parse("1e999999999999999999999999999999999999999999999999999999999999") == + Ok( + JsonNumber(1e999999999999999999999999999999999999999999999999999999999999), + ) + assert parse("42") == Ok(JsonNumber(42)) + assert parse("-42") == Ok(JsonNumber(-42)) + assert parse("5e2") == Ok(JsonNumber(5e2)) + assert parse("5E2") == Ok(JsonNumber(5e2)) + assert parse("5e+2") == Ok(JsonNumber(5e+2)) + assert parse("5E+2") == Ok(JsonNumber(5e+2)) + assert parse("5e-2") == Ok(JsonNumber(5e-2)) + assert parse("5E-2") == Ok(JsonNumber(5e-2)) + assert parse("18446744073709551616") == Ok(JsonNumber(18446744073709551616)) + assert parse("18446744073709551616") == Ok(JsonNumber(18446744073709551616)) + assert parse("1152921504606846976") == Ok(JsonNumber(1152921504606846976)) + assert parse("-10") == Ok(JsonNumber(-10)) + assert parse("-2") == Ok(JsonNumber(-2)) + assert parse("-1") == Ok(JsonNumber(-1)) + assert parse("1") == Ok(JsonNumber(1)) + assert parse("2") == Ok(JsonNumber(2)) + assert parse("10") == Ok(JsonNumber(10)) + assert parse("100") == Ok(JsonNumber(100)) + assert parse("1000") == Ok(JsonNumber(1000)) + assert parse("0.0") == Ok(JsonNumber(0.0)) + assert parse("0.1") == Ok(JsonNumber(0.1)) + assert parse("0.123") == Ok(JsonNumber(0.123)) + assert parse("0.9") == Ok(JsonNumber(0.9)) + assert parse("1.123") == Ok(JsonNumber(1.123)) + assert parse("0e0") == Ok(JsonNumber(0.0)) + assert parse("1e0") == Ok(JsonNumber(1.0)) + assert parse("1e1") == Ok(JsonNumber(10.0)) + assert parse("1E1") == Ok(JsonNumber(10.0)) + assert parse("1e2") == Ok(JsonNumber(100.0)) + assert parse("1e3") == Ok(JsonNumber(1000.0)) + assert parse("-1e2") == Ok(JsonNumber(-100.0)) + assert parse("1e-1") == Ok(JsonNumber(0.1)) + assert parse("1.23e-4") == Ok(JsonNumber(0.000123)) + assert parse("1E200") == Ok(JsonNumber(1E200)) + assert parse("1E-200") == Ok(JsonNumber(1E-200)) + assert parse("85070591730234615884290395931651604481") == + Ok(JsonNumber(85070591730234615884290395931651604481)) + assert parse("1.797693134862315708145274237317043567981e+308") == + Ok(JsonNumber(1.797693134862315708145274237317043567981e+308)) + assert parse("1.121333") == Ok(JsonNumber(1.121333)) + assert parse("1.001") == Ok(JsonNumber(1.001)) + // Strings + assert parse("\"\"") == Ok(JsonString("")) + assert parse("\"\\r\\n\\t\\b\\f\\\\\\/\\\"\"") == + Ok(JsonString("\r\n\t\u{8}\u{c}\\/\"")) + assert parse("\"\\u2764\\ufe0f\"") == Ok(JsonString("❀️")) + assert parse("\"\\uD834\\uDD1E\"") == Ok(JsonString("π„ž")) + assert parse("\"ASCII Hello world!\"") == Ok(JsonString("ASCII Hello world!")) + assert parse("\"Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!\"") == + Ok(JsonString("Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!")) + assert parse("\"A \\\"quoted\\\" string\"") == + Ok(JsonString("A \"quoted\" string")) + assert parse("\"\\uD801\\uDC37\"") == Ok(JsonString("𐐷")) + assert parse("\"🀘🏻\"") == Ok(JsonString("🀘🏻")) + assert parse("\"🀘🏻\"") == Ok(JsonString("🀘🏻")) + assert parse("\"🏴󠁧󠁒󠁷󠁬󠁳󠁿\"") == Ok(JsonString("🏴󠁧󠁒󠁷󠁬󠁳󠁿")) + assert parse( + "\"\\u6000 \\ud800\\udc82 \\ud83e\\udd18\\ud83c\\udffb \\ud83c\\udff4\\udb40\\udc67\\udb40\\udc62\\udb40\\udc77\\udb40\\udc6c\\udb40\\udc73\\udb40\\udc7f\"" + ) == + Ok(JsonString("ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿")) + // Array + assert parse("[]") == Ok(JsonArray([])) + assert parse("[[]]") == Ok(JsonArray([JsonArray([])])) + assert parse("[ ]") == Ok(JsonArray([])) + assert parse("[1]") == Ok(JsonArray([JsonNumber(1)])) + assert parse("[10, \"foo\", true, null]") == + Ok( + JsonArray( + [JsonNumber(10), JsonString("foo"), JsonBoolean(true), JsonNull], + ), + ) + // Object + assert parse("{}") == Ok(JsonObject([])) + assert parse(" + { + \"foo\": \"bar\", + \"num\": 10 + } + ") == + Ok(JsonObject([("foo", JsonString("bar")), ("num", JsonNumber(10))])) + // Note: Grain-json does not worry about duplicate keys + assert parse( + " + { + \"foo\": \"bar\", + \"num\": 10, + \"foo\": 1 + } + " + ) == + Ok( + JsonObject( + [ + ("foo", JsonString("bar")), + ("num", JsonNumber(10)), + ("foo", JsonNumber(1)), + ], + ), + ) + // Nesting + assert parse("{\"foo\": [1, 2, 3]}") == + Ok( + JsonObject( + [("foo", JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]))], + ), + ) + assert parse( + " + { + \"l10n\": [ { + \"product\": { + \"inStock\": { + \"DE\": \"Lieferung innerhalb von 1-3 Werktagen\" + } + } + } ] + }" + ) == + Ok( + JsonObject( + [ + ( + "l10n", + JsonArray( + [ + JsonObject( + [ + ( + "product", + JsonObject( + [ + ( + "inStock", + JsonObject( + [ + ( + "DE", + JsonString( + "Lieferung innerhalb von 1-3 Werktagen", + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ], + ), + ), + ], + ), + ) + assert parse("{ \"pi\": 3.14 }") == Ok(JsonObject([("pi", JsonNumber(3.14))])) + assert parse("[100, 200, false, null, \"foo\"]") == + Ok( + JsonArray( + [ + JsonNumber(100), + JsonNumber(200), + JsonBoolean(false), + JsonNull, + JsonString("foo"), + ], + ), + ) + assert parse("{ \"Hello\" : \"World!\" }") == + Ok(JsonObject([("Hello", JsonString("World!"))])) + + assert parse("{\"a\":\"A\",\"b\":\"B\"}") == + Ok(JsonObject([("a", JsonString("A")), ("b", JsonString("B"))])) + + assert parse( + "{ \"a\" : { \"0\": false, \"1\":true }, \"b\" : [\"A\",\"B\",\"C\"] }" + ) == + Ok( + JsonObject( + [ + ( + "a", + JsonObject([("0", JsonBoolean(false)), ("1", JsonBoolean(true))]), + ), + ("b", JsonArray([JsonString("A"), JsonString("B"), JsonString("C")])), + ], + ), + ) + + assert parse("[1,\"2\",true,false,null]") == + Ok( + JsonArray( + [ + JsonNumber(1), + JsonString("2"), + JsonBoolean(true), + JsonBoolean(false), + JsonNull, + ], + ), + ) + + assert parse("[[[[[[[[[[]]]]]]]]]]") == + Ok( + JsonArray( + [ + JsonArray( + [ + JsonArray( + [ + JsonArray( + [ + JsonArray( + [ + JsonArray( + [ + JsonArray( + [JsonArray([JsonArray([JsonArray([])])])], + ), + ], + ), + ], + ), + ], + ), + ], + ), + ], + ), + ], + ), + ) + + assert parse( + "{\"1\":{\"2\":{\"3\":{\"4\":{\"5\":{\"6\":{\"7\":{\"8\":{\"9\":{\"10\":{}}}}}}}}}}}" + ) == + Ok( + JsonObject( + [ + ( + "1", + JsonObject( + [ + ( + "2", + JsonObject( + [ + ( + "3", + JsonObject( + [ + ( + "4", + JsonObject( + [ + ( + "5", + JsonObject( + [ + ( + "6", + JsonObject( + [ + ( + "7", + JsonObject( + [ + ( + "8", + JsonObject( + [ + ( + "9", + JsonObject( + [ + ( + "10", + JsonObject([]), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ) + + assert parse("[1,2,3]") == + Ok(JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)])) + assert parse("[\"a\",\"b\",\"c\"]") == + Ok(JsonArray([JsonString("a"), JsonString("b"), JsonString("c")])) + // White spaces + assert parse("\tnull \r\n") == Ok(JsonNull) + assert parse(" {\r}") == Ok(JsonObject([])) + assert parse("[1,\n2\n,3\n]\n") == + Ok(JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)])) + assert parse("{ \"a\" : \"A\" , \"b\" : \"B\" }") == + Ok(JsonObject([("a", JsonString("A")), ("b", JsonString("B"))])) +} + +module ToString { + // Emitting + + // Simple values of each type + assert toString(JsonNull, format=Compact) == Ok("null") + assert toString(JsonBoolean(true), format=Compact) == Ok("true") + assert toString(JsonBoolean(false), format=Compact) == Ok("false") + assert toString(JsonString(""), format=Compact) == Ok("\"\"") + assert toString(JsonNumber(0), format=Compact) == Ok("0") + assert toString(JsonArray([]), format=Compact) == Ok("[]") + assert toString(JsonObject([]), format=Compact) == Ok("{}") + + assert toString(JsonNull) == toString(JsonNull, format=Compact) + assert toString(JsonBoolean(true)) == + toString(JsonBoolean(true), format=Compact) + assert toString(JsonBoolean(false)) == + toString(JsonBoolean(false), format=Compact) + assert toString(JsonString("")) == toString(JsonString(""), format=Compact) + assert toString(JsonNumber(0)) == toString(JsonNumber(0), format=Compact) + assert toString(JsonArray([])) == toString(JsonArray([]), format=Compact) + assert toString(JsonObject([])) == toString(JsonObject([]), format=Compact) + + assert toString(JsonNull, format=Pretty) == Ok("null\n") + assert toString(JsonBoolean(true), format=Pretty) == Ok("true\n") + assert toString(JsonBoolean(false), format=Pretty) == Ok("false\n") + assert toString(JsonString(""), format=Pretty) == Ok("\"\"\n") + assert toString(JsonNumber(0), format=Pretty) == Ok("0\n") + assert toString(JsonArray([]), format=Pretty) == Ok("[]\n") + assert toString(JsonObject([]), format=Pretty) == Ok("{}\n") + + // Various strings. Escapes, emojis etc. + assert toString(JsonString("ASCII Hello world!")) == + Ok("\"ASCII Hello world!\"") + + assert toString(JsonString("Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!")) == + Ok("\"Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!\"") + assert toString(JsonString("A \"quoted\" string")) == + Ok("\"A \\\"quoted\\\" string\"") + assert toString(JsonString("🀘🏻")) == Ok("\"🀘🏻\"") + assert toString(JsonString("🀘🏻")) == Ok("\"🀘🏻\"") + assert toString(JsonString("🏴󠁧󠁒󠁷󠁬󠁳󠁿")) == Ok("\"🏴󠁧󠁒󠁷󠁬󠁳󠁿\"") + assert toString(JsonString("ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿")) == Ok("\"ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿\"") + assert toString(JsonString("ASCII Hello world!")) == + Ok("\"ASCII Hello world!\"") + + assert toString(JsonString("Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!")) == + Ok("\"Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!\"") + assert toString(JsonString("A \"quoted\" string")) == + Ok("\"A \\\"quoted\\\" string\"") + assert toString(JsonString("🀘🏻")) == Ok("\"🀘🏻\"") + assert toString(JsonString("🀘🏻")) == Ok("\"🀘🏻\"") + assert toString(JsonString("🏴󠁧󠁒󠁷󠁬󠁳󠁿")) == Ok("\"🏴󠁧󠁒󠁷󠁬󠁳󠁿\"") + assert toString(JsonString("ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿")) == Ok("\"ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿\"") + assert toString(JsonString("ASCII Hello world!"), format=Pretty) == + Ok("\"ASCII Hello world!\"\n") + + assert toString(JsonString("Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!"), format=Pretty) == + Ok("\"Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!\"\n") + assert toString(JsonString("A \"quoted\" string"), format=Pretty) == + Ok("\"A \\\"quoted\\\" string\"\n") + assert toString(JsonString("🀘🏻"), format=Pretty) == Ok("\"🀘🏻\"\n") + assert toString(JsonString("🀘🏻"), format=Pretty) == Ok("\"🀘🏻\"\n") + assert toString(JsonString("🏴󠁧󠁒󠁷󠁬󠁳󠁿"), format=Pretty) == Ok("\"🏴󠁧󠁒󠁷󠁬󠁳󠁿\"\n") + assert toString(JsonString("ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿"), format=Pretty) == + Ok("\"ζ€€ 𐂂 🀘🏻 🏴󠁧󠁒󠁷󠁬󠁳󠁿\"\n") + assert toString(JsonString("ASCII Hello world!"), format=PrettyAndSafe) == + Ok("\"ASCII Hello world!\"\n") + assert toString( + JsonString("Unicode γ“γ‚“γ«γ‘γ―δΈ–η•Œ!"), + format=PrettyAndSafe + ) == + Ok("\"Unicode \\u3053\\u3093\\u306b\\u3061\\u306f\\u4e16\\u754c!\"\n") + assert toString(JsonString("A \"quoted\" string"), format=PrettyAndSafe) == + Ok("\"A \\\"quoted\\\" string\"\n") + assert toString(JsonString("🀘🏻"), format=PrettyAndSafe) == + Ok("\"\\ud83e\\udd18\\ud83c\\udffb\"\n") + assert toString(JsonString("🏴󠁧󠁒󠁷󠁬󠁳󠁿"), format=PrettyAndSafe) == + Ok( + "\"\\ud83c\\udff4\\udb40\\udc67\\udb40\\udc62\\udb40\\udc77\\udb40\\udc6c\\udb40\\udc73\\udb40\\udc7f\"\n", + ) + + // Note that number tests are sensitive both to compiler's interpretation of + // the constants as different number tags (simple numbers, float32, float64) + // and internal details of JSON formatting. We definitely want them to fail + // when the latter changes. The former shouldn't be an issue assuming here + // constants with decimal point or exponentials are float64. + assert toString(JsonNumber(-10)) == Ok("-10") + assert toString(JsonNumber(-2)) == Ok("-2") + assert toString(JsonNumber(-1)) == Ok("-1") + assert toString(JsonNumber(1)) == Ok("1") + assert toString(JsonNumber(2)) == Ok("2") + assert toString(JsonNumber(10)) == Ok("10") + assert toString(JsonNumber(100)) == Ok("100") + assert toString(JsonNumber(1000)) == Ok("1000") + assert toString(JsonNumber(0.0)) == Ok("0.0") + assert toString(JsonNumber(0.1)) == Ok("0.1") + assert toString(JsonNumber(0.123)) == Ok("0.123") + assert toString(JsonNumber(0.9)) == Ok("0.9") + assert toString(JsonNumber(1.123)) == Ok("1.123") + assert toString(JsonNumber(0e0)) == Ok("0.0") + assert toString(JsonNumber(1e0)) == Ok("1.0") + assert toString(JsonNumber(1e1)) == Ok("10.0") + assert toString(JsonNumber(1E1)) == Ok("10.0") + assert toString(JsonNumber(1e2)) == Ok("100.0") + assert toString(JsonNumber(1e3)) == Ok("1000.0") + assert toString(JsonNumber(-1e2)) == Ok("-100.0") + assert toString(JsonNumber(1e-1)) == Ok("0.1") + assert toString(JsonNumber(1.23e-4)) == Ok("0.000123") + + // Rationals + assert toString(JsonNumber(1/3)) == Ok("0.3333333333333333") + assert toString(JsonNumber(2/3)) == Ok("0.6666666666666666") + assert toString(JsonNumber(3/3)) == Ok("1") + + // Big numbers + assert toString(JsonNumber(1152921504606846976)) == Ok("1152921504606846976") + assert toString(JsonNumber(1152921504606847000.0)) == + Ok("1152921504606847000.0") + + // Invalid numbers + assert match (toString(JsonNumber(NaN))) { + Err(InvalidNumber(_)) => true, + _ => false, + } + + assert match (toString(JsonNumber(Infinity))) { + Err(InvalidNumber(_)) => true, + _ => false, + } + + assert match (toString(JsonNumber(-Infinity))) { + Err(InvalidNumber(_)) => true, + _ => false, + } + + // Pretty printing + + let simplePrimitives = [ + JsonNull, + JsonBoolean(true), + JsonBoolean(false), + JsonNumber(2), + JsonString("abc"), + ] + + let comprehensiveNestingCombinations = [ + JsonArray([]), + JsonArray([JsonNumber(1)]), + JsonArray([JsonNumber(1), JsonNumber(2)]), + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + JsonArray( + [ + JsonArray([]), + JsonArray([JsonNumber(1)]), + JsonArray([JsonNumber(1), JsonNumber(2)]), + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + ], + ), + JsonArray([JsonArray([JsonArray([])])]), + JsonObject([]), + JsonObject([("a", JsonString("A"))]), + JsonObject([("a", JsonString("A")), ("b", JsonString("B"))]), + JsonObject( + [("a", JsonString("A")), ("b", JsonString("B")), ("c", JsonString("C"))], + ), + JsonObject( + [("a", JsonObject([("b", JsonObject([("c", JsonObject([]))]))]))], + ), + JsonObject( + [ + ( + "arrays", + JsonArray( + [ + JsonArray([]), + JsonArray([JsonNumber(1)]), + JsonArray([JsonNumber(1), JsonNumber(2)]), + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + ], + ), + ), + ( + "objects", + JsonObject( + [ + ("a", JsonString("A")), + ("b", JsonString("B")), + ("c", JsonString("C")), + ], + ), + ), + ], + ), + ] + + let comprehensiveJsonObject = JsonObject( + [ + ("primitives", JsonArray(simplePrimitives)), + ("nesting", JsonArray(comprehensiveNestingCombinations)), + ], + ) + + // Formatting - Indentation + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n 1\n]") + assert toString( + JsonArray([JsonArray([JsonNumber(1)])]), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n [\n 1\n ]\n]") + assert toString( + JsonArray([JsonArray([JsonNumber(1)])]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n[\n1\n]\n]") + + // Formatting - line endings + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n1\n]") + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[1]") + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: CarriageReturnLineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\r\n1\r\n]") + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: CarriageReturn, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\r1\r]") + + // Formatting - finish with new line + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n1\n]") + assert toString( + JsonArray([JsonNumber(1)]), + format=Custom{ + indentation: IndentWithSpaces(0), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: true, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n1\n]\n") + + // Formatting - array format + assert toString( + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[1,2,3]") + assert toString( + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: SpacedArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[1, 2, 3]") + assert toString( + JsonArray([JsonNumber(1), JsonNumber(2), JsonNumber(3)]), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("[\n 1,\n 2,\n 3\n]") + + // Formatting - object format + assert toString( + JsonObject( + [("one", JsonNumber(1)), ("two", JsonNumber(2)), ("three", JsonNumber(3))], + ), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("{\"one\":1,\"two\":2,\"three\":3}") + assert toString( + JsonObject( + [("one", JsonNumber(1)), ("two", JsonNumber(2)), ("three", JsonNumber(3))], + ), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: SpacedObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("{\"one\": 1, \"two\": 2, \"three\": 3}") + assert toString( + JsonObject( + [("one", JsonNumber(1)), ("two", JsonNumber(2)), ("three", JsonNumber(3))], + ), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ) == + Ok("{\n \"one\": 1,\n \"two\": 2,\n \"three\": 3\n}") + // Formatting - escaping + assert toString( + JsonString("\nr🌾"), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: true + } + ) == + Ok("\"\\nr\\ud83c\\udf3e\"") + assert toString( + JsonString("\nrπŸŒΎΒ€"), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: true + } + ) == + Ok("\"\\nr\\ud83c\\udf3e\\u0080\"") + assert toString( + JsonString("rπŸŒΎΒ€"), + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: true + } + ) == + Ok("\"r\\ud83c\\udf3e\\u0080\"") + assert toString( + JsonString(" + toString( + json, + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ), + comprehensiveNestingCombinations + ) == + [ + Ok("[]"), + Ok("[1]"), + Ok("[1,2]"), + Ok("[1,2,3]"), + Ok("[[],[1],[1,2],[1,2,3]]"), + Ok("[[[]]]"), + Ok("{}"), + Ok("{\"a\":\"A\"}"), + Ok("{\"a\":\"A\",\"b\":\"B\"}"), + Ok("{\"a\":\"A\",\"b\":\"B\",\"c\":\"C\"}"), + Ok("{\"a\":{\"b\":{\"c\":{}}}}"), + Ok( + "{\"arrays\":[[],[1],[1,2],[1,2,3]],\"objects\":{\"a\":\"A\",\"b\":\"B\",\"c\":\"C\"}}", + ), + ] + + assert List.map( + json => + toString( + json, + format=Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false + } + ), + comprehensiveNestingCombinations + ) == + [ + Ok("[]"), + Ok("[\n 1\n]"), + Ok("[\n 1,\n 2\n]"), + Ok("[\n 1,\n 2,\n 3\n]"), + Ok( + "[\n [],\n [\n 1\n ],\n [\n 1,\n 2\n ],\n [\n 1,\n 2,\n 3\n ]\n]", + ), + Ok("[\n [\n []\n ]\n]"), + Ok("{}"), + Ok("{\n \"a\": \"A\"\n}"), + Ok("{\n \"a\": \"A\",\n \"b\": \"B\"\n}"), + Ok("{\n \"a\": \"A\",\n \"b\": \"B\",\n \"c\": \"C\"\n}"), + Ok("{\n \"a\": {\n \"b\": {\n \"c\": {}\n }\n }\n}"), + Ok( + "{\n \"arrays\": [\n [],\n [\n 1\n ],\n [\n 1,\n 2\n ],\n [\n 1,\n 2,\n 3\n ]\n ],\n \"objects\": {\n \"a\": \"A\",\n \"b\": \"B\",\n \"c\": \"C\"\n }\n}", + ), + ] + + // Round trips + assert Result.map(parse, toString(comprehensiveJsonObject)) == + Ok(Ok(comprehensiveJsonObject)) + + assert Result.map(parse, toString(comprehensiveJsonObject, format=Compact)) == + Ok(Ok(comprehensiveJsonObject)) + + assert Result.map(parse, toString(comprehensiveJsonObject, format=Pretty)) == + Ok(Ok(comprehensiveJsonObject)) + + assert Result.map( + parse, + toString(comprehensiveJsonObject, format=CompactAndSafe) + ) == + Ok(Ok(comprehensiveJsonObject)) + + assert Result.map( + parse, + toString(comprehensiveJsonObject, format=PrettyAndSafe) + ) == + Ok(Ok(comprehensiveJsonObject)) +} diff --git a/compiler/test/suites/stdlib.re b/compiler/test/suites/stdlib.re index 2d9438b73..4530c5a44 100644 --- a/compiler/test/suites/stdlib.re +++ b/compiler/test/suites/stdlib.re @@ -84,6 +84,7 @@ describe("stdlib", ({test, testSkip}) => { assertStdlib("int16.test"); assertStdlib("int32.test"); assertStdlib("int64.test"); + assertStdlib("json.test"); assertStdlib("uint8.test"); assertStdlib("uint16.test"); assertStdlib("uint32.test"); diff --git a/stdlib/json.gr b/stdlib/json.gr new file mode 100644 index 000000000..9578a8b62 --- /dev/null +++ b/stdlib/json.gr @@ -0,0 +1,2084 @@ +/** + * JSON (JavaScript Object Notation) parsing, printing, and access utilities. + * + * @example from "json" include Json + * @example Json.parse("{\"currency\":\"€\",\"price\":99.99}") + * @example + * print( + * toString( + * format=Pretty, + * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) + * ) + * ) + */ +module Json + +from "runtime/bigint" include Bigint as BI +from "runtime/dataStructures" include DataStructures +from "runtime/numbers" include Numbers +from "runtime/numberUtils" include NumberUtils +from "runtime/string" include String as RuntimeString +from "runtime/unsafe/tags" include Tags +from "runtime/unsafe/wasmi32" include WasmI32 +from "runtime/unsafe/wasmi64" include WasmI64 +from "runtime/unsafe/wasmf64" include WasmF64 +from "runtime/atof/parse" include Parse as Atof +from "buffer" include Buffer +from "char" include Char +from "string" include String +from "list" include List +from "uint8" include Uint8 +use RuntimeString.{ toString as runtimeToString, getCodePoint } +use Numbers.{ coerceNumberToWasmI32 } +use DataStructures.{ tagSimpleNumber, untagSimpleNumber } + +// Primitive offsets +// TODO(#703): Get these offsets from the runtime +@unsafe +let _INT64_BOXED_VALUE_OFFSET = 8n +@unsafe +let _Float64_BOXED_VALUE_OFFSET = 8n + +/** + * Data structure representing JSON in Grain. + * + * @example + * assert Json.parse("{\"currency\":\"€\",\"price\":99.99}") == JsonObject([ + * ("currency", JsonString("€")), + * ("price", JsonNumber(99.99)), + * ]) + * + * @example + * assert Json.parse("{\n\"currency\":\"€\",\n\"price\":99.99\n}") == JsonObject([ + * ("currency", JsonString("€")), + * ("price", JsonNumber(99.99)), + * ]) + */ +provide enum rec Json { + JsonNull, + JsonBoolean(Bool), + JsonNumber(Number), + JsonString(String), + JsonArray(List), + // Note that JsonObject here is deliberately defined as a simple list of key value pair tuples as opposed + // to for example a Map in order to accommodate the fact that the ECMA-404 standard doesn't prohibit + // duplicate names in Objects. Such JSON should be representable by the JSON data structure for lossless + // processing. This also simplifies implementation by not requiring a purpose built data structure and + // has the benefit of List's immutability. It's a conscious decision that sacrifices ease of use of the + // API for lossless handing of these edge cases with intention of later building more ergonomic APIs on a + // higher level of abstraction. + JsonObject(List<(String, Json)>), +} + +/** + * Represents errors for cases where a `Json` data structure cannot be represented as a + * JSON string. + */ +provide enum JsonToStringError { + /** + * The `Json` data structure contains a number value of `NaN`, `Infinity`, or `-Infinity`. + */ + InvalidNumber(String), +} + +/** + * Controls how indentation is output in custom formatting. + */ +provide enum IndentationFormat { + /** + * No indentation is emitted. + * + * ```json + * { + * "currency": "€", + * "price": 99.9 + * } + * ``` + */ + NoIndentation, + /** + * Tabs are emitted. + * + * ```json + * { + * "currency": "€", + * "price": 99.9 + * } + * ``` + */ + IndentWithTab, + /** + * The desired number of spaces are emitted. + * + * `IndentWithSpaces(2)` + * ```json + * { + * "currency": "€", + * "price": 99.9 + * } + * ``` + * + * `IndentWithSpaces(4)` + * ```json + * { + * "currency": "€", + * "price": 99.9 + * } + * ``` + */ + IndentWithSpaces(Number), +} + +/** + * Controls how arrays are output in custom formatting. + */ +provide enum ArrayFormat { + /** + * Arrays are emitted in a compact manner. + * + * ```json + * [] + * ``` + * + * ```json + * [1] + * ``` + * + * ```json + * [1,2,3] + * ``` + */ + CompactArrayEntries, + /** + * Arrays are emitted with spaces between elements. + * + * ```json + * [ ] + * ``` + * + * ```json + * [1] + * ``` + * + * ```json + * [1, 2, 3] + * ``` + */ + SpacedArrayEntries, + /** + * Arrays are emitted with newlines and indentation between each element. + * + * ```json + * [] + * ``` + * + * ```json + * [ + * 1 + * ] + * ``` + * + * ```json + * [ + * 1, + * 2, + * 3 + * ] + * ``` + */ + OneArrayEntryPerLine, +} + +/** + * Controls how objects are output in custom formatting. + */ +provide enum ObjectFormat { + /** + * Objects are emitted in a compact manner. + * + * ```json + * {} + * ``` + * + * ```json + * {"a":1} + * ``` + * + * ```json + * {"a":1,"b":2,"c":3} + * ``` + */ + CompactObjectEntries, + /** + * Objects are emitted with spaces between entries. + * + * ```json + * { } + * ``` + * + * ```json + * {"a": 1} + * ``` + * + * ```json + * {"a": 1, "b": 2, "c": 3} + * ``` + */ + SpacedObjectEntries, + /** + * Objects are emitted with each entry on a new line. + * + * ``` + * {} + * ``` + * + * ``` + * { + * "a": 1 + * } + * ``` + * + * ``` + * { + * "a": 1, + * "b": 2, + * "c": 3 + * } + * ``` + */ + OneObjectEntryPerLine, +} + +/** + * Controls how line endings are output in custom formatting. + */ +provide enum LineEnding { + /** + * No line endings will be emitted. + */ + NoLineEnding, + /** + * A `\n` will be emitted at the end of each line. + */ + LineFeed, + /** + * A `\r\n` will be emitted at the end of each line. + */ + CarriageReturnLineFeed, + /** + * A `\r` will be emitted at the end of each line. + */ + CarriageReturn, +} + +/* + * Allows fine-grained control of formatting in JSON output. + */ +record FormattingSettings { + indentation: IndentationFormat, + arrayFormat: ArrayFormat, + objectFormat: ObjectFormat, + lineEnding: LineEnding, + finishWithNewLine: Bool, + escapeAllControlPoints: Bool, + escapeHTMLUnsafeSequences: Bool, + escapeNonASCII: Bool, +} + +/** + * Allows control of formatting in JSON output. + */ +provide enum FormattingChoices { + /** + * Recommended human readable formatting. + * + * Escapes all control points for the sake of clarity, but outputs unicode + * codepoints directly so the result needs to be treated as proper unicode and + * is not safe to be transported in ASCII encoding. + * + * Roughly Equivalent to: + * ```grain + * Custom{ + * indentation: IndentWithSpaces(2), + * arrayFormat: OneArrayEntryPerLine, + * objectFormat: OneObjectEntryPerLine, + * lineEnding: LineFeed, + * finishWithNewLine: true, + * escapeAllControlPoints: true, + * escapeHTMLUnsafeSequences: false, + * escapeNonASCII: false, + * } + * ``` + * + * ```json + * { + * "currency": "€", + * "price": 99.9, + * "currencyDescription": "EURO\u007f", + * } + * ``` + */ + Pretty, + /** + * Compact formatting that minimizes the size of resulting JSON at cost of not + * being easily human readable. + * + * Only performs minimal string escaping as required by the ECMA-404 standard, + * so the result needs to be treated as proper unicode and is not safe to be + * transported in ASCII encoding. + * + * Roughly Equivalent to: + * ```grain + * Custom{ + * indentation: NoIndentation, + * arrayFormat: CompactArrayEntries, + * objectFormat: CompactObjectEntries, + * lineEnding: NoLineEnding, + * finishWithNewLine: false, + * escapeAllControlPoints: false, + * escapeHTMLUnsafeSequences: false, + * escapeNonASCII: false, + * } + * ``` + * + * ```json + * {"currency":"€","price":99.9,"currencyDescription":"EURO␑"} + * ``` + */ + Compact, + /** + * Pretty and conservative formatting to maximize compatibility and + * embeddability of the resulting JSON. + * + * Should be safe to copy and paste directly into HTML and to be transported in + * plain ASCII. + * + * Roughly Equivalent to: + * ```grain + * Custom{ + * indentation: IndentWithSpaces(2), + * arrayFormat: OneArrayEntryPerLine, + * objectFormat: OneObjectEntryPerLine, + * lineEnding: LineFeed, + * finishWithNewLine: true, + * escapeAllControlPoints: true, + * escapeHTMLUnsafeSequences: true, + * escapeNonASCII: true, + * } + * ``` + * + * ```json + * { + * "currency": "\u20ac", + * "price": 99.9, + * "currencyDescription": "EURO\u007f", + * } + * ``` + */ + PrettyAndSafe, + /** + * Compact and conservative formatting to maximize compatibility and + * embeddability of the resulting JSON. + * + * Should be safe to copy and paste directly into HTML and to transported in + * plain ASCII. + * + * Roughly Equivalent to: + * ```grain + * Custom{ + * indentation: NoIndentation, + * arrayFormat: CompactArrayEntries, + * objectFormat: CompactObjectEntries, + * lineEnding: NoLineEnding, + * finishWithNewLine: false, + * escapeAllControlPoints: true, + * escapeHTMLUnsafeSequences: true, + * escapeNonASCII: true, + * } + * ``` + * + * ```json + * {"currency":"\u20ac","price":99.9,"currencyDescription":"EURO\u007f"} + * ``` + */ + CompactAndSafe, + /** + * Allows for fined grained control of the formatting output. + */ + Custom{ + indentation: IndentationFormat, + arrayFormat: ArrayFormat, + objectFormat: ObjectFormat, + lineEnding: LineEnding, + finishWithNewLine: Bool, + escapeAllControlPoints: Bool, + escapeHTMLUnsafeSequences: Bool, + escapeNonASCII: Bool, + }, +} + +record JsonWriterConfig { + format: FormattingSettings, + buffer: Buffer.Buffer, + emitEscapedQuotedString: String => Void, + printNewLine: Option<() => Void>, + printIndentation: Option Void>, +} + +// The idea for this type is to allow reusing a bit of work done in preparing for printing JSON. +// For now this is not exposed and remains an internal implementation detail. +// It may make sense in the future to expose it and let the user reuse a writer for multiple +// JSON emit operations without reallocating new closures and buffers each time. +record JsonWriter { + emit: Json => Option, +} + +let emitUTF16EscapeSequence = (codePoint: Number, buffer: Buffer.Buffer) => { + // Emit the "\u" followed by hexadecimal representation of the codepoint + // with fixed length of 4 hexadecimal digits corresponding to the two byte + // codepoint. No checks are performed here if the codepoint is in the + // "Basic Multilingual Plane" (0000-FFFF) as this function is only called + // internally. + // An alternative was to this implementation was to use NumberUtils.itoa32, + // but this avoids unnecessary heap allocations. As a possible future + // optimization this loop could be unrolled possibly even converted to be + // branchless and SIMD optimized, but it could be a bit of an overkill as + // this codepath is only for escape sequences, which probably aren't all + // that common occurrence. + + Buffer.addChar('\\', buffer) + Buffer.addChar('u', buffer) + // Loop over the four digit from most to least significant. + for (let mut digitIndex = 3; digitIndex >= 0; digitIndex -= 1) { + // Use bit masking and shifting to extract from the codepoint a number + // with just the bits corresponding to this hexadecimal digit. + let shift = digitIndex * 4 + let mask = 0b1111 << shift + let digit = (codePoint & mask) >>> shift + + // Digit now is a number in the range 0..15 and we need to translate it + // into a unicode codepoint representing the hexadecimal digit + // (0..9/a..f). We can use the fact that digits and latin letters in + // ASCII and by extension in Unicode are adjacent and ordered. + let hexDigitCodePoint = if (digit <= 9) { + // 48 is codepoint for char '0' + digit + 48 + } else { + // 97 is codepoint for char 'a' + // But we also need to subtract 10 from it because we need + // the 10..15 number range translated to 0..5 in order to + // serve as an index in the ASCI range 'a'..'f'. + digit + 87 + } + + Buffer.addCharFromCodePoint(hexDigitCodePoint, buffer) + } +} + +let emitEscapedUnicodeSequence = (codePoint: Number, buffer: Buffer.Buffer) => { + // See the String section in the ECMA-404 doc. + // If the code point is "in the Basic Multilingual Plane", that is in range + // 0..65535. Greater values need to be split into two UTF-16 chunks. + if (codePoint <= 0xFFFF) { + emitUTF16EscapeSequence(codePoint, buffer) + } else { + // The following three lines are copied from String module of Grain's + // stdlib. It would be nice to share more code. On the other hand it + // may make sense to just have these few instructions directly here + // from the performance standpoint so we can print millions of emojis + // per second πŸ˜„. + + // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF + let uPrime = codePoint - 0x10000 + let highSurrogate = ((uPrime & 0b11111111110000000000) >>> 10) + 0xD800 + // High surrogate + let lowSurrogate = (uPrime & 0b00000000001111111111) + 0xDC00 + // Low surrogate + + emitUTF16EscapeSequence(highSurrogate, buffer) + emitUTF16EscapeSequence(lowSurrogate, buffer) + } +} + +let emitEscapedCodePoint = (codePoint: Number, buffer: Buffer.Buffer) => { + match (codePoint) { + 0x0008 => { // backspace + Buffer.addChar('\\', buffer) + Buffer.addChar('b', buffer) + }, + 0x0009 => { // tab + Buffer.addChar('\\', buffer) + Buffer.addChar('t', buffer) + }, + 0x000A => { // line feed + Buffer.addChar('\\', buffer) + Buffer.addChar('n', buffer) + }, + 0x000C => { // form feed + Buffer.addChar('\\', buffer) + Buffer.addChar('f', buffer) + }, + 0x000D => { // carriage return + Buffer.addChar('\\', buffer) + Buffer.addChar('r', buffer) + }, + 0x0022 => { // quotation mark + Buffer.addChar('\\', buffer) + Buffer.addChar('"', buffer) + }, + 0x005C => { // backslash or "Reverse Solidus" + Buffer.addChar('\\', buffer) + Buffer.addChar('\\', buffer) + }, + _ => { + emitEscapedUnicodeSequence(codePoint, buffer) + }, + } +} + +let printNull = (buffer: Buffer.Buffer) => Buffer.addString("null", buffer) + +let printBool = (b: Bool, buffer: Buffer.Buffer) => { + if (b) { + Buffer.addString("true", buffer) + } else { + Buffer.addString("false", buffer) + } +} + +@unsafe +let printNumberWasmI32 = (value: WasmI32, buffer: Buffer.Buffer) => { + let s = NumberUtils.itoa32(value, 10n) + Buffer.addString(s, buffer) +} + +@unsafe +let printNumberWasmI64 = (value: WasmI64, buffer: Buffer.Buffer) => { + let s = NumberUtils.itoa64(value, 10n) + Buffer.addString(s, buffer) +} + +@unsafe +let isFinite = (value: WasmF64) => { + use WasmF64.{ (==), (-) } + value - value == 0.0W +} + +@unsafe +let isNaN = (value: WasmF64) => { + use WasmF64.{ (!=) } + value != value +} + +@unsafe +let printNumberWasmF64 = (value: WasmF64, buffer: Buffer.Buffer) => { + if (isFinite(value)) { + let s = NumberUtils.dtoa(value) + Buffer.addString(s, buffer) + None + } else { + use WasmF64.{ (<) } + // JSON standard doesn't allow NaN or infinite values in numbers, + // but WASM f64 (IEEE 754-2008), as well as Grain's number types do + // (Float64 as well as Number). This is the only reason that the + // formatting needs to return a Result and not just a String + // directly. Other possible choices were to throw exceptions or to + // continue formatting without representing these values correctly + // (like JavaScript's JSON.stringify). + if (isNaN(value)) { + Some(InvalidNumber("NaN is not allowed in JsonNumber")) + } else if (value < 0.0W) { + Some(InvalidNumber("-Infinity is not allowed in JsonNumber")) + } else { + Some(InvalidNumber("Infinity is not allowed in JsonNumber")) + } + } +} + +@unsafe +let printNumber = (value: Number, buffer: Buffer.Buffer) => { + use WasmI32.{ (&), (==), (!=), (<<), (>>) } + + let ptr = WasmI32.fromGrain(value) + let ret = if ((ptr & 1n) != 0n) { + printNumberWasmI32(untagSimpleNumber(value), buffer) + None + } else if ((ptr & 7n) == Tags._GRAIN_GENERIC_HEAP_TAG_TYPE) { + let tag = WasmI32.load(ptr, 0n) + match (tag) { + t when t == Tags._GRAIN_BOXED_NUM_HEAP_TAG => { + let numberTag = WasmI32.load(ptr, 4n) + match (numberTag) { + t when t == Tags._GRAIN_INT64_BOXED_NUM_TAG => { + let asWasmI64 = WasmI64.load(ptr, _INT64_BOXED_VALUE_OFFSET) + printNumberWasmI64(asWasmI64, buffer) + None + }, + t when t == Tags._GRAIN_BIGINT_BOXED_NUM_TAG => { + Buffer.addString(BI.bigIntToString10(ptr), buffer) + None + }, + t when t == Tags._GRAIN_RATIONAL_BOXED_NUM_TAG => { + // JSON does not support rationals as a compromise + // we coerce them to an f64 and print that + // this means there is a slight loss in precision + let asFloat64 = Numbers.coerceNumberToFloat64(value) + let ptr = WasmI32.fromGrain(asFloat64) + let asWasmF64 = WasmF64.load(ptr, _Float64_BOXED_VALUE_OFFSET) + printNumberWasmF64(asWasmF64, buffer) + }, + t when t == Tags._GRAIN_FLOAT64_BOXED_NUM_TAG => { + let asWasmF64 = WasmF64.load(ptr, _Float64_BOXED_VALUE_OFFSET) + printNumberWasmF64(asWasmF64, buffer) + }, + _ => { + fail "Impossible: Json.toString encountered an unknown number tag" + }, + } + }, + _ => { + fail "Impossible: Json.toString encountered an unknown number tag" + }, + } + } else { + fail "Impossible: Json.toString encountered an unknown number tag" + } + // This keeps the gc from prematurely freeing the value + ignore(value) + ret +} + +// Note that this compromises on peak performance by also handling +// the compact printing case, merging these two together greatly simplifies the amount +// of code we need to maintain so it seems worth it. +let rec printElement = ( + json: Json, + implHelper: JsonWriterConfig, + indentationLevel: Number, +) => { + let buffer = implHelper.buffer + match (json) { + JsonNull => { + printNull(buffer) + return None + }, + JsonBoolean(b) => { + printBool(b, buffer) + return None + }, + JsonNumber(n) => return printNumber(n, buffer), + JsonString(s) => { + implHelper.emitEscapedQuotedString(s) + return None + }, + JsonArray(elems) => { + match (elems) { + [] => { + Buffer.addChar('[', buffer) + if (implHelper.format.arrayFormat == SpacedArrayEntries) { + Buffer.addChar(' ', buffer) + } + Buffer.addChar(']', buffer) + return None + }, + [e] => { + let format = implHelper.format + + Buffer.addChar('[', buffer) + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + + let elemLevel = indentationLevel + 1 + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(elemLevel), + None => void, + } + } + + match (printElement(e, implHelper, elemLevel)) { + None => void, + err => return err, + } + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(indentationLevel), + None => void, + } + } + + Buffer.addChar(']', buffer) + + return None + }, + [initialHead, ...initialRest] => { + let format = implHelper.format + + Buffer.addChar('[', buffer) + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + + let mut currentHead = initialHead + let mut currentRest = initialRest + + let elemLevel = indentationLevel + 1 + + for (let mut index = 0;; index += 1) { + if (index > 0) { + Buffer.addChar(',', buffer) + if (format.arrayFormat == SpacedArrayEntries) { + Buffer.addChar(' ', buffer) + } + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + } + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(elemLevel), + None => void, + } + } + + match (printElement(currentHead, implHelper, elemLevel)) { + None => void, + err => return err, + } + + match (currentRest) { + [] => break, + [newHead, ...newRest] => { + currentHead = newHead + currentRest = newRest + }, + } + } + + if (format.arrayFormat == OneArrayEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(indentationLevel), + None => void, + } + } + + Buffer.addChar(']', buffer) + + return None + }, + } + }, + JsonObject(entries) => { + match (entries) { + [] => { + Buffer.addChar('{', buffer) + if (implHelper.format.objectFormat == SpacedObjectEntries) { + Buffer.addChar(' ', buffer) + } + Buffer.addChar('}', buffer) + return None + }, + [(key, value)] => { + let format = implHelper.format + + Buffer.addChar('{', buffer) + + let elemLevel = indentationLevel + 1 + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(elemLevel), + None => void, + } + } + + implHelper.emitEscapedQuotedString(key) + + Buffer.addChar(':', buffer) + match (format.objectFormat) { + CompactObjectEntries => void, + SpacedObjectEntries | OneObjectEntryPerLine => { + Buffer.addChar(' ', buffer) + }, + } + + match (printElement(value, implHelper, elemLevel)) { + None => void, + err => return err, + } + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(indentationLevel), + None => void, + } + } + + Buffer.addChar('}', buffer) + + return None + }, + [initialHead, ...initialRest] => { + let format = implHelper.format + + Buffer.addChar('{', buffer) + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + + let mut currentHead = initialHead + let mut currentRest = initialRest + + let elemLevel = indentationLevel + 1 + + for (let mut index = 0;; index += 1) { + if (index > 0) { + Buffer.addChar(',', buffer) + if (format.objectFormat == SpacedObjectEntries) { + Buffer.addChar(' ', buffer) + } + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + } + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(elemLevel), + None => void, + } + } + + let (key, value) = currentHead + + implHelper.emitEscapedQuotedString(key) + + Buffer.addChar(':', buffer) + match (format.objectFormat) { + CompactObjectEntries => void, + SpacedObjectEntries | OneObjectEntryPerLine => { + Buffer.addChar(' ', buffer) + }, + } + + match (printElement(value, implHelper, elemLevel)) { + None => void, + err => return err, + } + + match (currentRest) { + [] => break, + [newHead, ...newRest] => { + currentHead = newHead + currentRest = newRest + }, + } + } + + if (format.objectFormat == OneObjectEntryPerLine) { + match (implHelper.printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + match (implHelper.printIndentation) { + Some(printIndentation) => printIndentation(indentationLevel), + None => void, + } + } + + Buffer.addChar('}', buffer) + + return None + }, + } + }, + } +} + +let isCodePointInBasicMultilingualPlane = (code: Number) => + code >= 0x0000 && code <= 0xFFFF + +let isHighSurrogate = (code: Number) => code >= 0xD800 && code <= 0xDBFF + +let isLowSurrogate = (code: Number) => code >= 0xDC00 && code <= 0xDFFF + +let combineSurrogatePairToCodePoint = ( + highSurrogate: Number, + lowSurrogate: Number, +) => { + // If this was a method exposed by itself in a library then it should check the + // ranges of the input surrogates, but here it's necessary because checks are made + // as part of the parsing logic. + ((highSurrogate - 0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000 +} + +let makeJsonWriter = (format: FormattingSettings, buffer: Buffer.Buffer) => { + let printNewLine = match (format.lineEnding) { + NoLineEnding => None, + LineFeed => Some(() => { + Buffer.addChar('\n', buffer) + }), + CarriageReturnLineFeed => Some(() => { + Buffer.addChar('\r', buffer) + Buffer.addChar('\n', buffer) + }), + CarriageReturn => Some(() => { + Buffer.addChar('\r', buffer) + }), + } + + let printIndentation = match (format.indentation) { + IndentWithTab => Some(indentationLevel => { + for (let mut count = 0; count < indentationLevel; count += 1) { + Buffer.addChar('\t', buffer) + } + }), + // Implement fast path, for common indentation level to avoid closure + IndentWithSpaces(spacesPerIndentation) when spacesPerIndentation == 2 => + Some(indentationLevel => { + let spaceCount = indentationLevel * 2 + for (let mut count = 0; count < spaceCount; count += 1) { + Buffer.addChar(' ', buffer) + } + }), + // Implement fast path, for common indentation level to avoid closure + IndentWithSpaces(spacesPerIndentation) when spacesPerIndentation == 4 => + Some(indentationLevel => { + let spaceCount = indentationLevel * 4 + for (let mut count = 0; count < spaceCount; count += 1) { + Buffer.addChar(' ', buffer) + } + }), + IndentWithSpaces(spacesPerIndentation) => Some(indentationLevel => { + let spaceCount = indentationLevel * spacesPerIndentation + for (let mut count = 0; count < spaceCount; count += 1) { + Buffer.addChar(' ', buffer) + } + }), + NoIndentation => None, + } + + // A possible optimization to make this faster would be to + // prepare a different closure for each combination of escaping options. + // This way unnecessary branching is avoided. + // The most important thing is that the non pretty printed format is optimized for + // as this is where the performance is most likely to matter. + + // In every case code points 0..31 must be escaped as + // required by ECMA-404 (the so called "C0" control point group). + + // For the non pretty printed case it is fastest to escape only what is + // strictly required to avoid increasing output size + // But for pretty printing or compatibility it may be desirable to escape other control points + // or even everything other than printable ASCII characters. + // for this reason options for this control has been exposed otherwise + // just a sane default would suffice. + // Additionally many JSON libraries escape additional two character + // sequences for direct embedding into html for example. This is + // specifically to avoid emitting the sequence "". + // The lazy approach would be to just escape the slash (which can become + // "\\/", not necessarily "\u002F"). This more conservative approach only + // escapes it when needed, but requires to keep track of the previous code + // point in the iteration so it's more complicated and handled separately. + let emitCodePoint = if ( + !format.escapeAllControlPoints && + !format.escapeNonASCII + ) { + (codePoint: Number) => { + if (codePoint > 31 && codePoint != 0x0022 && codePoint != 0x005C) { + Buffer.addCharFromCodePoint(codePoint, buffer) + } else { + emitEscapedCodePoint(codePoint, buffer) + } + } + } else if (!format.escapeAllControlPoints && format.escapeNonASCII) { + // If desired, escape all non ASCII code points. So the only non + // escaped code points are those in the range of ASCII characters + // from 31 to 127. + (codePoint: Number) => { + if ( + codePoint > 31 && + codePoint != 0x0022 && + codePoint != 0x005C && + codePoint < 128 + ) { + Buffer.addCharFromCodePoint(codePoint, buffer) + } else { + emitEscapedCodePoint(codePoint, buffer) + } + } + } else if (format.escapeAllControlPoints && !format.escapeNonASCII) { + // If desired, in addition to the required 0..31 control points, + // also escape unicode control point group C1 (128-159). + // There could be more control points or otherwise escape worthy + // codepoints, but covering that would be overkill. + (codePoint: Number) => { + if ( + codePoint > 31 && + codePoint != 0x0022 && + codePoint != 0x005C && + codePoint < 127 || + codePoint > 159 + ) { + Buffer.addCharFromCodePoint(codePoint, buffer) + } else { + emitEscapedCodePoint(codePoint, buffer) + } + } + } else { + // And this is just the combination of both flags, which means + // doing almost the same as for the case above for + // escapeNonASCII=true, but also escape the ASCII control codepoint + // 127 (Delete). + (codePoint: Number) => { + if ( + codePoint > 31 && + codePoint != 0x0022 && + codePoint != 0x005C && + codePoint < 127 + ) { + // fast path for chars that never need any escaping + Buffer.addCharFromCodePoint(codePoint, buffer) + } else { + emitEscapedCodePoint(codePoint, buffer) + } + } + } + + let emitEscapedQuotedString = if (!format.escapeHTMLUnsafeSequences) { + (s: String) => { + Buffer.addChar('"', buffer) + + // Note that it's important for performance that the closure passed to forEachCodePoint + // is not allocated inline here, but just once when creating the writer. + + String.forEachCodePoint(emitCodePoint, s) + + Buffer.addChar('"', buffer) + } + } else { + // Special handling for the escapeHTMLUnsafeSequences flag. + // Escaping a sequence requires keeping track of previous characters, + // which is difficult and suboptimal when using a function to iterate + // the input string. So we don't want to pay the price in other cases. + // This cannot be done just in the emitCodePoint function. + // It could be possible to implement more optimally, but would + // complicate things even more than this. + (s: String) => { + Buffer.addChar('"', buffer) + + let mut prevCodePoint = 0 + + String.forEachCodePoint(codePoint => { + if (codePoint == 47) { + if (prevCodePoint == 60) { + Buffer.addChar('\\', buffer) + Buffer.addChar('/', buffer) + } else { + // otherwise just emit the slash as-is + Buffer.addChar('/', buffer) + } + } else { + emitCodePoint(codePoint) + } + + prevCodePoint = codePoint + }, s) + + Buffer.addChar('"', buffer) + } + } + + let implHelper = { + format, + buffer, + emitEscapedQuotedString, + printNewLine, + printIndentation, + }: JsonWriterConfig + + { emit: json => { + match (printElement(json, implHelper, 0)) { + None => void, + err => return err, + } + if (format.finishWithNewLine) { + match (printNewLine) { + Some(printNewLine) => printNewLine(), + None => void, + } + } + return None + }, }: JsonWriter +} + +/** + * Converts the `Json` data structure into a JSON string with specific formatting settings. + * + * @param format: Formatting options + * @param json: The `Json` data structure to convert + * @returns `Ok(str)` containing the JSON string or `Err(err)` if the provided `Json` data structure cannot be converted to a string + * + * @example + * assert toString( + * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))] + * ) == Ok("{\"currency\":\"€\",\"price\":99.9}") + * @example + * assert toString( + * format=Compact + * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) + * ) == Ok("{\"currency\":\"€\",\"price\":99.9}") + * @example + * assert toString( + * format=Pretty, + * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) + * ) == Ok("{ + * \"currency\": \"€\", + * \"price\": 99.9 + * }") + * @example + * assert toString( + * format=Custom{ + * indentation: NoIndentation, + * arrayFormat: CompactArrayEntries, + * objectFormat: CompactObjectEntries, + * lineEnding: NoLineEnding, + * finishWithNewLine: false, + * escapeAllControlPoints: true, + * escapeHTMLUnsafeSequences: true, + * escapeNonASCII: true, + * }, + * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) + * ) == Ok("{\"currency\":\"\\u20ac\",\"price\":99.9}") + * + * @since v0.6.0 + */ +provide let toString = (format=Compact, json: Json) => { + let buf = Buffer.make(16) + let format = match (format) { + Pretty => + { + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: true, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false, + }, + Compact => + { + indentation: NoIndentation, + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false, + }, + PrettyAndSafe => + { + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: true, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: true, + escapeNonASCII: true, + }, + CompactAndSafe => + { + indentation: NoIndentation, + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: true, + escapeNonASCII: true, + }, + Custom{ + indentation, + arrayFormat, + objectFormat, + lineEnding, + finishWithNewLine, + escapeAllControlPoints, + escapeHTMLUnsafeSequences, + escapeNonASCII, + } => + { + indentation, + arrayFormat, + objectFormat, + lineEnding, + finishWithNewLine, + escapeAllControlPoints, + escapeHTMLUnsafeSequences, + escapeNonASCII, + }, + } + let writer = makeJsonWriter(format, buf) + let error = writer.emit(json) + + match (error) { + None => Ok(Buffer.toString(buf)), + Some(e) => Err(e), + } +} + +/** + * Represents errors for JSON parsing along with a human readable message. + */ +provide enum JsonParseError { + UnexpectedEndOfInput(String), + UnexpectedToken(String), + InvalidUTF16SurrogatePair(String), +} + +/* + * Internal data structure used during parsing. + */ +record JsonParserState { + string: String, + bufferParse: Buffer.Buffer, + mut currentCodePoint: Number, + mut pos: Number, + mut bytePos: Number, +} + +let isInterTokenWhiteSpace = (codePoint: Number) => { + match (codePoint) { + 0x0009 => true, // tab + 0x000A => true, // line feed + 0x000D => true, // carriage return + 0x0020 => true, // space + _ => false, + } +} + +let _END_OF_INPUT = -1 + +@unsafe +let rec readCodePoint = (bytePosition: Number, string: String) => { + use WasmI32.{ (+), (<) } + + let strPtr = WasmI32.fromGrain(string) + + let byteSize = WasmI32.load(strPtr, 4n) + + let bytePositionW32 = coerceNumberToWasmI32(bytePosition) + + let ptr = strPtr + 8n + bytePositionW32 + + if (bytePositionW32 < byteSize) { + let codePoint = getCodePoint(ptr) + tagSimpleNumber(codePoint) + } else { + _END_OF_INPUT + } +} + +let codePointUTF8ByteCount = (usv: Number) => { + if (!Char.isValid(usv)) { + fail "Impossible: JSON parser encountered an invalid unicode scalar value in codePointUTF8ByteCount" + } + + if (usv <= 0x7f) { + 1 + } else if (usv <= 0x7ff) { + 2 + } else if (usv <= 0xffff) { + 3 + } else { + 4 + } +} + +let isAtEndOfInput = (parserState: JsonParserState) => { + parserState.currentCodePoint == _END_OF_INPUT +} + +let next = (parserState: JsonParserState) => { + let mut c = parserState.currentCodePoint + if (c != _END_OF_INPUT) { + parserState.bytePos += codePointUTF8ByteCount(c) + + c = readCodePoint(parserState.bytePos, parserState.string) + + parserState.currentCodePoint = c + parserState.pos += 1 + } + c +} + +let skipWhiteSpace = (parserState: JsonParserState) => { + // isAtEndOfInput is not strictly necessary here + // could remove as an optimization + while ( + isInterTokenWhiteSpace(parserState.currentCodePoint) && + !isAtEndOfInput(parserState) + ) { + next(parserState) + void + } +} + +let buildUnexpectedTokenError = (parserState: JsonParserState, detail: String) => { + let codePoint = parserState.currentCodePoint + let pos = parserState.pos + if (codePoint == _END_OF_INPUT) { + UnexpectedEndOfInput( + "Unexpected token at position " ++ runtimeToString(pos) ++ ": " ++ detail, + ) + } else { + UnexpectedToken( + "Unexpected token at position " ++ runtimeToString(pos) ++ ": " ++ detail, + ) + } +} + +@unsafe +let toHex = (n: Number) => { + let x = coerceNumberToWasmI32(n) + NumberUtils.itoa32(x, 16n) +} + +let toHexWithZeroPadding = (n: Number, padTo: Number) => { + // Note that this function is only called in exceptional cases so no effort + // was made to optimize it. + let mut result = toHex(n) + for (let mut i = String.length(result); i < padTo; i += 1) { + result = "0" ++ result + } + result +} + +let formatCodePointOrEOF = (codePoint: Number) => { + if (codePoint >= 32 && codePoint <= 126) { + // If the codepoint is in the range of printable ASCII characters, then + // display the character itself . Whether it's a good idea to display + // all of them, especially space is up for debate. + "'" ++ Char.toString(Char.fromCode(codePoint)) ++ "'" + } else if (codePoint == -1) { + // Special case for value used by the parsing code to avoid heap allocations. + "end of input" + } else { + // Format any other code point as hexadecimal value. + "U+" ++ toHexWithZeroPadding(codePoint, 4) + } +} + +let expectCodePointAndAdvance = ( + expectedCodePoint: Number, + parserState: JsonParserState, +) => { + let c = parserState.currentCodePoint + if (c == expectedCodePoint) { + next(parserState) + None + } else { + let detail = "expected " ++ + formatCodePointOrEOF(expectedCodePoint) ++ + ", found " ++ + formatCodePointOrEOF(c) + Some(buildUnexpectedTokenError(parserState, detail)) + } +} +let atoiFast = buffer => { + let bufLen = Buffer.length(buffer) + let mut result = 0 + for (let mut i = 0; i < bufLen; i += 1) { + use Uint8.{ (-) } + result = (result << 1) + + (result << 3) + + Uint8.toNumber(Buffer.getUint8(i, buffer) - 48us) + } + result +} +let rec parseValue = (parserState: JsonParserState) => { + skipWhiteSpace(parserState) + + let result = match (parserState.currentCodePoint) { + 0x7B => parseObject(parserState), // '{' + 0x5B => parseArray(parserState), // '[' + 0x22 => parseStringValue(parserState), // '"' + 0x74 => parseTrueValue(parserState), // 't' + 0x66 => parseFalseValue(parserState), // 'f' + 0x6E => parseNullValue(parserState), // 'n' + // Numbers + 0x30 => parseNumberValue(parserState), // '0' + 0x31 => parseNumberValue(parserState), // '1' + 0x32 => parseNumberValue(parserState), // '2' + 0x33 => parseNumberValue(parserState), // '3' + 0x34 => parseNumberValue(parserState), // '4' + 0x35 => parseNumberValue(parserState), // '5' + 0x36 => parseNumberValue(parserState), // '6' + 0x37 => parseNumberValue(parserState), // '7' + 0x38 => parseNumberValue(parserState), // '8' + 0x39 => parseNumberValue(parserState), // '9' + 0x2D => parseNumberValue(parserState), // '-' + c => { + let detail = "expected start of a JSON value, found " ++ + formatCodePointOrEOF(c) + Err(buildUnexpectedTokenError(parserState, detail)) + }, + } + + skipWhiteSpace(parserState) + + result +} +and parseNullValue = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x6E, parserState)) { + // 'n' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x75, parserState)) { + // 'u' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x6C, parserState)) { + // 'l' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x6C, parserState)) { + // 'l' + Some(e) => Err(e), + None => Ok(JsonNull), + } + }, + } + }, + } + }, + } +} +and parseTrueValue = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x74, parserState)) { + // 't' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x72, parserState)) { + // 'r' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x75, parserState)) { + // 'u' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x65, parserState)) { + // 'e' + Some(e) => Err(e), + None => Ok(JsonBoolean(true)), + } + }, + } + }, + } + }, + } +} +and parseFalseValue = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x66, parserState)) { + // 'f' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x61, parserState)) { + // 'a' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x6C, parserState)) { + // 'l' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x73, parserState)) { + // 's' + Some(e) => Err(e), + None => { + match (expectCodePointAndAdvance(0x65, parserState)) { + // 'e' + Some(e) => Err(e), + None => Ok(JsonBoolean(false)), + } + }, + } + }, + } + }, + } + }, + } +} +and parseString = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x22, parserState)) { + // '"' + Some(e) => return Err(e), + None => { + let mut done = false + let buffer = parserState.bufferParse + Buffer.clear(buffer) + + while (!done) { + match (parserState.currentCodePoint) { + 0x22 => { // '"' + next(parserState) + done = true + break + }, + -1 => { + // just end the loop without setting done to true + break + }, + 0x5C => { // '\' + // Keep the starting position for better error reporting. + let escapeStartPos = parserState.pos + + next(parserState) + + match (parserState.currentCodePoint) { + 0x22 => { // '"' + Buffer.addChar('"', buffer) + ignore(next(parserState)) + }, + 0x5C => { // '\' + Buffer.addChar('\\', buffer) + ignore(next(parserState)) + }, + 0x2F => { // '/' + Buffer.addChar('/', buffer) + ignore(next(parserState)) + }, + 0x62 => { // letter 'b' as in Backspace + // emit backspace control code + Buffer.addChar('\u{08}', buffer) + ignore(next(parserState)) + }, + 0x66 => { // letter 'f' as in Form Feed + // emit Form Feed control code + Buffer.addChar('\u{0C}', buffer) + ignore(next(parserState)) + }, + 0x6E => { // letter 'n' as in New line + // emit Line Feed control code + Buffer.addChar('\u{0A}', buffer) + ignore(next(parserState)) + }, + 0x72 => { // letter 'r' as in carriage Return + // emit Carriage Return control code + Buffer.addChar('\u{0D}', buffer) + ignore(next(parserState)) + }, + 0x74 => { // letter 't' as in Tab + // emit Tab control code + Buffer.addChar('\u{09}', buffer) + ignore(next(parserState)) + }, + 0x75 => { // 'u' (start of hexadecimal UTF-16 escape sequence) + next(parserState) + + // The escape sequence can either be a standalone code point or + // a UTF-16 surrogate pair made of two code units that have to + // be combined to form a code point. This is legacy of + // JavaScript's UTF-16 string representation, despite JSON + // mandating UTF-8 (kind of, as stated in rfc8259: "JSON text + // exchanged between systems that are not part of a closed + // ecosystem MUST be encoded using UTF-8"). + // This would be easy to do using a function for shared logic, + // but in order to avoid heap allocation I've chosen to instead + // use a loop and local state. + + let mut highSurrogate = -1 + + while (true) { + let mut codeUnit = 0 + + for ( + let mut digitIndex = 3; + digitIndex >= 0; + digitIndex -= 1 + ) { + let hexDigitCodePoint = parserState.currentCodePoint + + let mut digit = hexDigitCodePoint + + if (hexDigitCodePoint >= 48 && hexDigitCodePoint <= 57) { // 0..9 + digit -= 48 + } else if ( + hexDigitCodePoint >= 65 && + hexDigitCodePoint <= 70 + ) { // A..F + digit -= 55 // (65 - 10) + } else if ( + hexDigitCodePoint >= 97 && + hexDigitCodePoint <= 102 + ) { // a..f + digit -= 87 // (97 - 10) + } else { + let digitsSoFar = 3 - digitIndex + let detail = + "expected exactly 4 hexadecimal digits in the UTF-16 escape sequence, found only " ++ + runtimeToString(digitsSoFar) + return Err(buildUnexpectedTokenError(parserState, detail)) + } + + let shift = digitIndex * 4 + codeUnit = codeUnit | digit << shift + + ignore(next(parserState)) + } + + if (highSurrogate == -1) { + // This is the first iteration of the loop. + // The code unit should either be the high surrogate of the + // pair or a full code point in the Basic Multilingual + // Plane (U+0000..U+FFFF). + if (isHighSurrogate(codeUnit)) { + // Next characters should be "\u" + // '\' + match (expectCodePointAndAdvance(0x5C, parserState)) { + Some(e) => return Err(e), + None => void, + } + // 'u' + match (expectCodePointAndAdvance(0x75, parserState)) { + Some(e) => return Err(e), + None => void, + } + + // Keep the high surrogate and proceed to the second + // iteration of the loop. + highSurrogate = codeUnit + } else if ( + isCodePointInBasicMultilingualPlane(codeUnit) && + !isLowSurrogate(codeUnit) + ) { + let codePoint = codeUnit + Buffer.addCharFromCodePoint(codePoint, buffer) + break + } else { + let message = + "Invalid character escape sequence at position " ++ + runtimeToString(escapeStartPos) ++ + ": expected a Unicode code point in Basic Multilingual Plane (U+0000..U+FFFF) or a high surrogate (0xD800..0xDBFF) of a UTF-16 surrogate pair, found " ++ + "0x" ++ + toHexWithZeroPadding(codeUnit, 4) + return Err(InvalidUTF16SurrogatePair(message)) + } + } else { + // This is the second iteration of the loop. + // The code unit should be the low surrogate of the pair. + if (isLowSurrogate(codeUnit)) { + let lowSurrogate = codeUnit + let combinedCodePoint = combineSurrogatePairToCodePoint( + highSurrogate, + lowSurrogate + ) + Buffer.addCharFromCodePoint(combinedCodePoint, buffer) + break + } else { + let message = + "Invalid character escape sequence at position " ++ + runtimeToString(escapeStartPos) ++ + ": expected a low surrogate (0xDC00..0xDFFF) in the second code unit of the UTF-16 sequence, found " ++ + "0x" ++ + toHexWithZeroPadding(codeUnit, 4) + return Err(InvalidUTF16SurrogatePair(message)) + } + } + } + }, + unexpectedCodePoint => { + // JSON doesn't allow arbitrary characters to be preceded by backslash escape. + // Only the ones above. + let detail = + "expected a valid escape sequence or the end of string, found " ++ + formatCodePointOrEOF(unexpectedCodePoint) + return Err(buildUnexpectedTokenError(parserState, detail)) + }, + } + }, + c => { + if (c >= 0x00 && c <= 0x1F) { + return Err( + buildUnexpectedTokenError( + parserState, + "Bad control character in string literal" + ), + ) + } + // Finally the happy case of a simple unescaped code point. + next(parserState) + Buffer.addCharFromCodePoint(c, buffer) + }, + } + } + + if (done) { + let s = Buffer.toString(buffer) + return Ok(s) + } else { + return Err( + buildUnexpectedTokenError( + parserState, + "unexpected end of string value" + ), + ) + } + }, + } +} +and parseStringValue = (parserState: JsonParserState) => { + match (parseString(parserState)) { + Ok(s) => Ok(JsonString(s)), + Err(e) => Err(e), + } +} +and parseNumberValue = (parserState: JsonParserState) => { + // TODO(#1878): Use a streaming-optimized way to parse numbers + let buffer = parserState.bufferParse + Buffer.clear(buffer) + // First char can optionally be a minus sign. + let mut c = parserState.currentCodePoint + let mut isFloat = false + let isNegative = c == 0x2D + // '-' + if (isNegative) { + c = next(parserState) + } + + // After that, the first/second char can only be a decimal digit ('0'..'9'). + match (c) { + 0x30 => { // '0' + // JSON doesn't allow numbers with additional leading zeros like + // "01". Which means that if a number starts with zero then the + // integer part is just zero and the next one can only be one of + // '.', 'e' or 'E'. In any case all that needs to be done here is + // to advance over the zero character and proceed to the optional + // fractional and exponential parts. If another digit follows then + // a parsing error will occur as expected, but implicitly because + // this function finishes with the parser positioned on a digit + // and not on a token expected after a number like ',', ']', '}' or + // EOF. + Buffer.addCharFromCodePoint(c, buffer) + c = next(parserState) + }, + x when x >= 0x31 && x <= 0x39 => { // '1'..'9' + while (true) { + Buffer.addCharFromCodePoint(c, buffer) + c = next(parserState) + if (c < 0x30 || c > 0x39) { + break + } + } + }, + unexpectedCodePoint => { + // The integer part of the number has to have at least one digit. + // JSON doesn't allow numbers starting with decimal separator like ".1". + let detail = "expected a decimal digit, found " ++ + formatCodePointOrEOF(unexpectedCodePoint) + return Err(buildUnexpectedTokenError(parserState, detail)) + }, + } + // Optional fractional part of the number. + if (c == 0x2E) { // '.' + isFloat = true + Buffer.addChar('.', buffer) + c = next(parserState) + let mut hasHitDigit = false + for (; c >= 0x30 && c <= 0x39;) { + hasHitDigit = true + Buffer.addCharFromCodePoint(c, buffer) + c = next(parserState) + } + if (!hasHitDigit) + return Err( + buildUnexpectedTokenError( + parserState, + "exponent part is missing in number" + ), + ) + } + // Optional exponential part of the number. + if (c == 0x65 || c == 0x45) { // 'e' or 'E' + isFloat = true + Buffer.addChar('e', buffer) + c = next(parserState) + // can start with optional plus or minus sign + match (c) { + 0x2D => { // '-' + c = next(parserState) + Buffer.addChar('-', buffer) + }, + 0x2B => { // '+' + c = next(parserState) + }, + _ => void, + } + // followed by one or more digits (0-9) + let mut hasHitDigit = false + for (; c >= 0x30 && c <= 0x39;) { + hasHitDigit = true + Buffer.addCharFromCodePoint(c, buffer) + c = next(parserState) + } + if (!hasHitDigit) + return Err( + buildUnexpectedTokenError( + parserState, + "exponent part is missing in number" + ), + ) + } + // Note that unlike all other JSON value types there's no explicit ending + // character like ('"' for strings, ']' for arrays,'}' for objects etc). We + // just leave the parser state at current position and the reading of next + // token will succeed or fail, but number parsing just ends here. + let result = match (isFloat) { + false => atoiFast(buffer), + true => { + let str = Buffer.toString(buffer) + match (Atof.parseFloat(str)) { + Err(err) => fail "Impossible: Json parse float on invalid float", + Ok(n) => n, + } + }, + } + if (result == 0 && isNegative) + return Ok(JsonNumber(-0.0)) + else + return Ok(JsonNumber(if (isNegative) result * -1 else result)) +} +and parseArray = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x5B, parserState)) { + // '[' + Some(e) => return Err(e), + None => { + skipWhiteSpace(parserState) + + let mut elems = []: List + + let mut done = false + let mut first = true + let mut trailingComma = false + while (!done) { + let c = parserState.currentCodePoint + match (c) { + 0x2C => { // ',' + if (first) { + return Err( + buildUnexpectedTokenError( + parserState, + "unexpected comma at beginning of array" + ), + ) + } + trailingComma = true + next(parserState) + skipWhiteSpace(parserState) + }, + 0x5D => { // ']' + next(parserState) + done = true + break + }, + -1 => { + // just end the loop without setting done to true + break + }, + _ => { + // note that parseValue skips initial and final whitespace + match (parseValue(parserState)) { + Ok(elem) => { + first = false + trailingComma = false + elems = [elem, ...elems] + }, + Err(e) => return Err(e), + } + }, + } + } + + if (trailingComma) { + return Err( + buildUnexpectedTokenError(parserState, "unexpected end of array"), + ) + } else if (done) { + return Ok(JsonArray(List.reverse(elems))) + } else { + return Err( + buildUnexpectedTokenError(parserState, "unexpected end of array"), + ) + } + }, + } +} +and parseObject = (parserState: JsonParserState) => { + match (expectCodePointAndAdvance(0x7B, parserState)) { + // '{' + Some(e) => return Err(e), + None => { + let mut entries = []: List<(String, Json)> + + let mut done = false + let mut first = true + + // one iteration of this loop should correspond to a key-value pair + let mut trailingComma = false + while (!done) { + skipWhiteSpace(parserState) + + let c = parserState.currentCodePoint + match (c) { + -1 => { + let detail = "expected a key-value pair or the end of the object" + return Err(buildUnexpectedTokenError(parserState, detail)) + }, + 0x2C => { // ',' + trailingComma = true + if (first) { + let detail = + "expected a key-value pair or the end of the object, found ','" + return Err(buildUnexpectedTokenError(parserState, detail)) + } else { + ignore(next(parserState)) + } + }, + 0x7D => { // '}' + if (trailingComma) { + let detail = "unexpected trailing comma in object" + return Err(buildUnexpectedTokenError(parserState, detail)) + } + next(parserState) + done = true + break + }, + _ => { + trailingComma = false + // A new entry in current object. + // Just call parseString directly. In case the current character id not '"', it will return an error we can pass along. + match (parseString(parserState)) { + Ok(key) => { + skipWhiteSpace(parserState) + + match (expectCodePointAndAdvance(0x3A, parserState)) { + // ':' + None => { + // note that parseValue skips initial and final whitespace + match (parseValue(parserState)) { + Ok(value) => { + entries = [(key, value), ...entries] + first = false + }, + Err(e) => return Err(e), + } + }, + Some(e) => return Err(e), + } + }, + Err(e) => return Err(e), + } + }, + } + } + // end of entry loop + + if (done) { + return Ok(JsonObject(List.reverse(entries))) + } else { + // This branch is not expected to actually execute, + // but in case it does, may just as well do the right thing. + return Err( + buildUnexpectedTokenError(parserState, "unexpected end of object"), + ) + } + }, + } +} + +/** + * Parses JSON string into a `Json` data structure. + * + * @param str: The JSON string to parse + * @returns `Ok(json)` containing the parsed data structure on a successful parse or `Err(err)` containing a parse error otherwise + * + * @example + * assert parse("{\"currency\":\"$\",\"price\":119}") == Ok( + * JsonObject([ + * ("currency", JsonString("$")), + * ("price", JsonNumber(119)) + * ]) + * ) + * + * @since v0.6.0 + */ +provide let parse: (str: String) => Result = (str: String) => { + let parserState = { + string: str, + bufferParse: Buffer.make(16), + currentCodePoint: readCodePoint(0, str), + pos: 0, + bytePos: 0, + }: JsonParserState + + let root = parseValue(parserState) + + skipWhiteSpace(parserState) + + if (isAtEndOfInput(parserState)) { + root + } else { + match (root) { + Ok(_) => { + let detail = "expected end of input, found " ++ + formatCodePointOrEOF(parserState.currentCodePoint) + Err(buildUnexpectedTokenError(parserState, detail)) + }, + e => e, + } + } +} diff --git a/stdlib/json.md b/stdlib/json.md new file mode 100644 index 000000000..26d7947c6 --- /dev/null +++ b/stdlib/json.md @@ -0,0 +1,608 @@ +--- +title: Json +--- + +JSON (JavaScript Object Notation) parsing, printing, and access utilities. + +```grain +from "json" include Json +``` + +```grain +Json.parse("{\"currency\":\"€\",\"price\":99.99}") +``` + +```grain +print( + toString( + format=Pretty, + JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) + ) +) +``` + +## Types + +Type declarations included in the Json module. + +### Json.**Json** + +```grain +enum Json { + JsonNull, + JsonBoolean(Bool), + JsonNumber(Number), + JsonString(String), + JsonArray(List), + JsonObject(List<(String, Json)>), +} +``` + +Data structure representing JSON in Grain. + +Examples: + +```grain +assert Json.parse("{\"currency\":\"€\",\"price\":99.99}") == JsonObject([ + ("currency", JsonString("€")), + ("price", JsonNumber(99.99)), +]) +``` + +```grain +assert Json.parse("{\n\"currency\":\"€\",\n\"price\":99.99\n}") == JsonObject([ + ("currency", JsonString("€")), + ("price", JsonNumber(99.99)), +]) +``` + +### Json.**JsonToStringError** + +```grain +enum JsonToStringError { + InvalidNumber(String), +} +``` + +Represents errors for cases where a `Json` data structure cannot be represented as a +JSON string. + +Variants: + +```grain +InvalidNumber(String) +``` + +The `Json` data structure contains a number value of `NaN`, `Infinity`, or `-Infinity`. + +### Json.**IndentationFormat** + +```grain +enum IndentationFormat { + NoIndentation, + IndentWithTab, + IndentWithSpaces(Number), +} +``` + +Controls how indentation is output in custom formatting. + +Variants: + +```grain +NoIndentation +``` + +No indentation is emitted. + +```json +{ +"currency": "€", +"price": 99.9 +} +``` + +```grain +IndentWithTab +``` + +Tabs are emitted. + +```json +{ + "currency": "€", + "price": 99.9 +} +``` + +```grain +IndentWithSpaces(Number) +``` + +The desired number of spaces are emitted. + +`IndentWithSpaces(2)` +```json +{ + "currency": "€", + "price": 99.9 +} +``` + +`IndentWithSpaces(4)` +```json +{ + "currency": "€", + "price": 99.9 +} +``` + +### Json.**ArrayFormat** + +```grain +enum ArrayFormat { + CompactArrayEntries, + SpacedArrayEntries, + OneArrayEntryPerLine, +} +``` + +Controls how arrays are output in custom formatting. + +Variants: + +```grain +CompactArrayEntries +``` + +Arrays are emitted in a compact manner. + +```json +[] +``` + +```json +[1] +``` + +```json +[1,2,3] +``` + +```grain +SpacedArrayEntries +``` + +Arrays are emitted with spaces between elements. + +```json +[ ] +``` + +```json +[1] +``` + +```json +[1, 2, 3] +``` + +```grain +OneArrayEntryPerLine +``` + +Arrays are emitted with newlines and indentation between each element. + +```json +[] +``` + +```json +[ + 1 +] +``` + +```json +[ + 1, + 2, + 3 +] +``` + +### Json.**ObjectFormat** + +```grain +enum ObjectFormat { + CompactObjectEntries, + SpacedObjectEntries, + OneObjectEntryPerLine, +} +``` + +Controls how objects are output in custom formatting. + +Variants: + +```grain +CompactObjectEntries +``` + +Objects are emitted in a compact manner. + +```json +{} +``` + +```json +{"a":1} +``` + +```json +{"a":1,"b":2,"c":3} +``` + +```grain +SpacedObjectEntries +``` + +Objects are emitted with spaces between entries. + +```json +{ } +``` + +```json +{"a": 1} +``` + +```json +{"a": 1, "b": 2, "c": 3} +``` + +```grain +OneObjectEntryPerLine +``` + +Objects are emitted with each entry on a new line. + +``` +{} +``` + +``` +{ + "a": 1 +} +``` + +``` +{ + "a": 1, + "b": 2, + "c": 3 +} +``` + +### Json.**LineEnding** + +```grain +enum LineEnding { + NoLineEnding, + LineFeed, + CarriageReturnLineFeed, + CarriageReturn, +} +``` + +Controls how line endings are output in custom formatting. + +Variants: + +```grain +NoLineEnding +``` + +No line endings will be emitted. + +```grain +LineFeed +``` + +A `\n` will be emitted at the end of each line. + +```grain +CarriageReturnLineFeed +``` + +A `\r\n` will be emitted at the end of each line. + +```grain +CarriageReturn +``` + +A `\r` will be emitted at the end of each line. + +### Json.**FormattingChoices** + +```grain +enum FormattingChoices { + Pretty, + Compact, + PrettyAndSafe, + CompactAndSafe, + Custom{ + indentation: IndentationFormat, + arrayFormat: ArrayFormat, + objectFormat: ObjectFormat, + lineEnding: LineEnding, + finishWithNewLine: Bool, + escapeAllControlPoints: Bool, + escapeHTMLUnsafeSequences: Bool, + escapeNonASCII: Bool, + }, +} +``` + +Allows control of formatting in JSON output. + +Variants: + +```grain +Pretty +``` + +Recommended human readable formatting. + +Escapes all control points for the sake of clarity, but outputs unicode +codepoints directly so the result needs to be treated as proper unicode and +is not safe to be transported in ASCII encoding. + +Roughly Equivalent to: +```grain +Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: true, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false, +} +``` + +```json +{ + "currency": "€", + "price": 99.9, + "currencyDescription": "EURO\u007f", +} +``` + +```grain +Compact +``` + +Compact formatting that minimizes the size of resulting JSON at cost of not +being easily human readable. + +Only performs minimal string escaping as required by the ECMA-404 standard, +so the result needs to be treated as proper unicode and is not safe to be +transported in ASCII encoding. + +Roughly Equivalent to: +```grain +Custom{ + indentation: NoIndentation, + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: false, + escapeHTMLUnsafeSequences: false, + escapeNonASCII: false, +} +``` + +```json +{"currency":"€","price":99.9,"currencyDescription":"EURO␑"} +``` + +```grain +PrettyAndSafe +``` + +Pretty and conservative formatting to maximize compatibility and +embeddability of the resulting JSON. + +Should be safe to copy and paste directly into HTML and to be transported in +plain ASCII. + +Roughly Equivalent to: +```grain +Custom{ + indentation: IndentWithSpaces(2), + arrayFormat: OneArrayEntryPerLine, + objectFormat: OneObjectEntryPerLine, + lineEnding: LineFeed, + finishWithNewLine: true, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: true, + escapeNonASCII: true, +} +``` + +```json +{ + "currency": "\u20ac", + "price": 99.9, + "currencyDescription": "EURO\u007f", +} +``` + +```grain +CompactAndSafe +``` + +Compact and conservative formatting to maximize compatibility and +embeddability of the resulting JSON. + +Should be safe to copy and paste directly into HTML and to transported in +plain ASCII. + +Roughly Equivalent to: +```grain +Custom{ + indentation: NoIndentation, + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: true, + escapeNonASCII: true, +} +``` + +```json +{"currency":"\u20ac","price":99.9,"currencyDescription":"EURO\u007f"} +``` + +```grain +Custom{ + indentation: IndentationFormat, + arrayFormat: ArrayFormat, + objectFormat: ObjectFormat, + lineEnding: LineEnding, + finishWithNewLine: Bool, + escapeAllControlPoints: Bool, + escapeHTMLUnsafeSequences: Bool, + escapeNonASCII: Bool, +} +``` + +Allows for fined grained control of the formatting output. + +### Json.**JsonParseError** + +```grain +enum JsonParseError { + UnexpectedEndOfInput(String), + UnexpectedToken(String), + InvalidUTF16SurrogatePair(String), +} +``` + +Represents errors for JSON parsing along with a human readable message. + +## Values + +Functions and constants included in the Json module. + +### Json.**toString** + +
+Added in next +No other changes yet. +
+ +```grain +toString : + (?format: FormattingChoices, json: Json) => + Result +``` + +Converts the `Json` data structure into a JSON string with specific formatting settings. + +Parameters: + +|param|type|description| +|-----|----|-----------| +|`?format`|`FormattingChoices`|Formatting options| +|`json`|`Json`|The `Json` data structure to convert| + +Returns: + +|type|description| +|----|-----------| +|`Result`|`Ok(str)` containing the JSON string or `Err(err)` if the provided `Json` data structure cannot be converted to a string| + +Examples: + +```grain +assert toString( + JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))] +) == Ok("{\"currency\":\"€\",\"price\":99.9}") +``` + +```grain +assert toString( + format=Compact + JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) +) == Ok("{\"currency\":\"€\",\"price\":99.9}") +``` + +```grain +assert toString( + format=Pretty, + JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) +) == Ok("{ + \"currency\": \"€\", + \"price\": 99.9 +}") +``` + +```grain +assert toString( + format=Custom{ + indentation: NoIndentation, + arrayFormat: CompactArrayEntries, + objectFormat: CompactObjectEntries, + lineEnding: NoLineEnding, + finishWithNewLine: false, + escapeAllControlPoints: true, + escapeHTMLUnsafeSequences: true, + escapeNonASCII: true, + }, + JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]) +) == Ok("{\"currency\":\"\\u20ac\",\"price\":99.9}") +``` + +### Json.**parse** + +
+Added in next +No other changes yet. +
+ +```grain +parse : (str: String) => Result +``` + +Parses JSON string into a `Json` data structure. + +Parameters: + +|param|type|description| +|-----|----|-----------| +|`str`|`String`|The JSON string to parse| + +Returns: + +|type|description| +|----|-----------| +|`Result`|`Ok(json)` containing the parsed data structure on a successful parse or `Err(err)` containing a parse error otherwise| + +Examples: + +```grain +assert parse("{\"currency\":\"$\",\"price\":119}") == Ok( + JsonObject([ + ("currency", JsonString("$")), + ("price", JsonNumber(119)) + ]) +) +``` + diff --git a/stdlib/runtime/string.gr b/stdlib/runtime/string.gr index 0c78ac178..f1b889db8 100644 --- a/stdlib/runtime/string.gr +++ b/stdlib/runtime/string.gr @@ -39,6 +39,9 @@ primitive (&&) = "@and" primitive (||) = "@or" primitive builtinId = "@builtin.id" primitive ignore = "@ignore" +primitive throw = "@throw" + +exception MalformedUnicode @unsafe primitive typeMetadata = "@heap.type_metadata" @@ -874,3 +877,54 @@ provide let print = (value, suffix="\n") => { Memory.free(buf) void } + +@unsafe +provide let getCodePoint = (ptr: WasmI32) => { + // Algorithm from https://encoding.spec.whatwg.org/#utf-8-decoder + use WasmI32.{ (+), (&), (|), (<<), leU as (<=), geU as (>=), (==) } + + let mut codePoint = 0n + let mut bytesSeen = 0n + let mut bytesNeeded = 0n + let mut lowerBoundary = 0x80n + let mut upperBoundary = 0xBFn + + let mut offset = 0n + + while (true) { + let byte = WasmI32.load8U(ptr + offset, 0n) + offset += 1n + if (bytesNeeded == 0n) { + if (byte >= 0x00n && byte <= 0x7Fn) { + return byte + } else if (byte >= 0xC2n && byte <= 0xDFn) { + bytesNeeded = 1n + codePoint = byte & 0x1Fn + } else if (byte >= 0xE0n && byte <= 0xEFn) { + if (byte == 0xE0n) lowerBoundary = 0xA0n + if (byte == 0xEDn) upperBoundary = 0x9Fn + bytesNeeded = 2n + codePoint = byte & 0xFn + } else if (byte >= 0xF0n && byte <= 0xF4n) { + if (byte == 0xF0n) lowerBoundary = 0x90n + if (byte == 0xF4n) upperBoundary = 0x8Fn + bytesNeeded = 3n + codePoint = byte & 0x7n + } else { + throw MalformedUnicode + } + continue + } + if (!(lowerBoundary <= byte && byte <= upperBoundary)) { + throw MalformedUnicode + } + lowerBoundary = 0x80n + upperBoundary = 0xBFn + codePoint = codePoint << 6n | byte & 0x3Fn + bytesSeen += 1n + if (bytesSeen == bytesNeeded) { + return codePoint + } + } + return 0n +} diff --git a/stdlib/runtime/string.md b/stdlib/runtime/string.md index b835617ca..565078e69 100644 --- a/stdlib/runtime/string.md +++ b/stdlib/runtime/string.md @@ -86,3 +86,9 @@ Parameters: |`value`|`a`|The operand| |`?suffix`|`String`|The string to print after the argument| +### String.**getCodePoint** + +```grain +getCodePoint : (ptr: WasmI32) => WasmI32 +``` +