ネコと和解せよ

JSON Canonicalization Scheme(JCS)の簡易バリデータを実装した

JSONのハッシュを取るために、JSONが正規化されているかチェックする実装が必要になった。


AIさんに適当に頼んだら適当なものが出てきたので、手組することにした。

参考資料

RFC 8785 - JSON Canonicalization Scheme (JCS)

Githubにもいくつか実装があるけども、殆どがバリデータではなくエンコーダだった。

実装

とりあえずPythonで書いた。このコードはJsonSchmaも併用するので、いくつかの機能は省略した。
どこでも動いてほしいので古典的な字句解析で実装する。

  • エスケープ文字の範囲チェック
  • 数値表現の桁数

テスト

テストコードはAIさんにお願いしたものにチェックしたい項目を追加して作成。
割と抜けているので油断ならない。

# 単体実行時にテスト実行
if __name__ == "__main__":
    S=[
        "[true,false,null]",
        "{\"a\":true,\"b\":false,\"c\":null}",
        "{\"age\":30,\"name\":\"Alice\"}",
        "{\"address\":{\"city\":\"New York\",\"state\":\"NY\"},\"name\":\"Alice\"}",
        "[\"apple\",\"banana\",\"cherry\"]",
        "[{\"age\":30,\"name\":\"Alice\"},{\"age\":25,\"name\":\"Bob\"}]",
        "{}",
        "[]",
        "{\"location\":{\"city\":\"New York\",\"country\":\"USA\"},\"person\":{\"age\":30,\"name\":\"Alice\"}}",
        "[[1,2e3,+3.0],[-4.1e5,5,6],[7,8,9,.5,5.e+1,.5e-3,1.111e0]]",
        # エスケープ系(JCS仕様準拠のエスケープパターン)
        "{\"key\":\"value with \\n newline\"}",
        "{\"key\":\"value with \\t tab\"}",
        "{\"key\":\"value with \\\" quote\"}",
        "{\"key\":\"backslash \\\\\"}",
        "{\"key\":\"unicode \\u0041\"}",  # \u0041 = "A"
        "{\"key\":\"multiple escapes \\\" \\\\ \\n \\t \\b \\f \\r\"}",

        # 数値フォーマット(境界系や端ケース)
        "[0,1,-1,1.0,-1.0,1e10,1E-10,-1.23456789,0.5,.5,5.]",
        "[12345678901234567890]",
        "[1e-1,1e+1,1E-1,1E+1]",

        # ネスト・複合型(複雑構造パターン)
        "{\"a\":{\"b\":[1,2,3],\"c\":true},\"d\":[false,null,3.14]}",
        "[{\"nested\":{\"key\":\"value\"}},[\"array in array\"],{\"k\":1}]"        
    ]
    F=[
        "{\"a\":1,}",        
        "1",
        "true",
        "\"Hello, World!\"",
        "\"Hello \\xWorld\"",
        "\"Hello \\\"World\\\"\"",
        "[true,false,null ]",
        "[\"a\":true,\"b\":false,\"c\":null]",
        "{ }",
        "[ ]",
        "[, ]",
        "{\"z\": 1, \"a\": 2}",
        "[5.+e1]",
        "{\"a\":1,\"a\":2}",
        "{\"person\":{\"name\":\"Alice\",\"age\":30},\"location\":{\"city\":\"New York\",\"country\":\"USA\"}}",
        "[{\"name\":\"Alice\",\"age\":30},{\"name\":\"Bob\",\"age\":25}]",
        "{\"name\":\"Alice\",\"age\":30}",
        "{\"age\":30\"name\":\"Alice\"}",
        "{\"name\":\"Alice\",\"address\":{\"city\":\"New York\",\"state\":\"NY\"}}",
        "\"Hello, World!",
        "{: \"value\"}",
        "{123: \"value\"}",
        "[\"apple\" \"banana\"]",
        "[{\"name\": \"Alice\", \"age\": \"thirty\"}, {\"name\": \"Bob\", \"age\": \"twenty\"}]",
        "{\"name\" \"Alice\"}",
        "[\"apple\", \"banana\",]",
        "{\"name\": \"Alice\", \"age\": 30,}",
        "[{\"name\": \"Alice\"}, \"banana\"]"
        # エスケープ系(不正エスケープ文字含む)
        # "{\"key\":\"value with \\x invalid escape\"}",
        # "{\"key\":\"value with \\u123 invalid unicode\"}",
        # "{\"key\":\"value with \\u12 invalid unicode\"}",
        # "{\"key\":\"value with \\uGGGG invalid unicode\"}",

        # 数値フォーマット(不正な数値表記)
        "[1e]",
        "[1e+]",
        "[1e-]",
        "[.e1]",
        # 余分カンマ・余分スペース
        "[1,2,3,]",
        "{\"a\":1, \"b\":2,}",

        # JCS非対応の単独スカラー(トップレベルスカラー値)
        "true",
        "null",
        "\"string\"",
        "123",

        # キー順序違反(順序逆)
        "{\"b\":1,\"a\":2}"        
    ]



    # テスト実行関数
    def run_tests():
        jcs_validator = JCSValidator()

        print("=== PASS CASES (Expected to pass) ===")
        for i in S:
            try:
                jcs_validator.isJcsToken(CharIterator(i))
                print(f"PASS: {i}")
            except InvalidJcsException as e:
                print(f"FAIL (Unexpected Failure): {i} => {e}")
            except StopIteration:
                print(f"FAIL (Unexpected StopIteration): {i}")

        print("\n=== FAIL CASES (Expected to fail) ===")
        for i in F:
            try:
                jcs_validator.isJcsToken(CharIterator(i))
                print(f"FAIL (Should have failed): {i}")
            except InvalidJcsException as e:
                print(f"PASS (Expected Failure): {i} => {e}")
            except StopIteration:
                print(f"PASS (Expected Failure): {i} => StopIteration")

    run_tests()


結果

=== PASS CASES (Expected to pass) ===
PASS: [true,false,null]
PASS: {"a":true,"b":false,"c":null}
PASS: {"age":30,"name":"Alice"}
PASS: {"address":{"city":"New York","state":"NY"},"name":"Alice"}
PASS: ["apple","banana","cherry"]
PASS: [{"age":30,"name":"Alice"},{"age":25,"name":"Bob"}]
PASS: {}
PASS: []
PASS: {"location":{"city":"New York","country":"USA"},"person":{"age":30,"name":"Alice"}}
PASS: [[1,2e3,+3.0],[-4.1e5,5,6],[7,8,9,.5,5.e+1,.5e-3,1.111e0]]
PASS: {"key":"value with \n newline"}
PASS: {"key":"value with \t tab"}
PASS: {"key":"value with \" quote"}
PASS: {"key":"backslash \\"}
PASS: {"key":"unicode \u0041"}
PASS: {"key":"multiple escapes \" \\ \n \t \b \f \r"}
PASS: [0,1,-1,1.0,-1.0,1e10,1E-10,-1.23456789,0.5,.5,5.]
PASS: [12345678901234567890]
PASS: [1e-1,1e+1,1E-1,1E+1]
PASS: {"a":{"b":[1,2,3],"c":true},"d":[false,null,3.14]}
PASS: [{"nested":{"key":"value"}},["array in array"],{"k":1}]

=== FAIL CASES (Expected to fail) ===
PASS (Expected Failure): {"a":1,} => Error 8 at ['{', '"', 'a', '"', ':', '1', ',', '}']
PASS (Expected Failure): 1 => Error 1 at 1
PASS (Expected Failure): true => Error 1 at ['t']
PASS (Expected Failure): "Hello, World!" => Error 1 at ['"']
PASS (Expected Failure): "Hello \xWorld" => Error 1 at ['"']
PASS (Expected Failure): "Hello \"World\"" => Error 1 at ['"']
PASS (Expected Failure): [true,false,null ] => Error 17 at ['a', 'l', 's', 'e', ',', 'n', 'u', 'l', 'l', ' ']
PASS (Expected Failure): ["a":true,"b":false,"c":null] => Error 5 at ['[', '"', 'a', '"', ':']
PASS (Expected Failure): { } => Error 2 at ['{', ' ']
PASS (Expected Failure): [ ] => Error 2 at ['[', ' ']
PASS (Expected Failure): [, ] => Error 2 at ['[', ',']
PASS (Expected Failure): {"z": 1, "a": 2} => Error 6 at ['{', '"', 'z', '"', ':', ' ']
PASS (Expected Failure): [5.+e1] => Error 4 at ['[', '5', '.', '+']
PASS (Expected Failure): {"a":1,"a":2} => Error 13 at ['"', ':', '1', ',', '"', 'a', '"', ':', '2', '}']
PASS (Expected Failure): {"person":{"name":"Alice","age":30},"location":{"city":"New York","country":"USA"}} => Error 35 at [',', '"', 'a', 'g', 'e', '"', ':', '3', '0', '}']
PASS (Expected Failure): [{"name":"Alice","age":30},{"name":"Bob","age":25}] => Error 26 at [',', '"', 'a', 'g', 'e', '"', ':', '3', '0', '}']
PASS (Expected Failure): {"name":"Alice","age":30} => Error 25 at [',', '"', 'a', 'g', 'e', '"', ':', '3', '0', '}']
PASS (Expected Failure): {"age":30"name":"Alice"} => Error 10 at ['{', '"', 'a', 'g', 'e', '"', ':', '3', '0', '"']
PASS (Expected Failure): {"name":"Alice","address":{"city":"New York","state":"NY"}} => Error 59 at ['t', 'e', '"', ':', '"', 'N', 'Y', '"', '}', '}']
PASS (Expected Failure): "Hello, World! => Error 1 at ['"']
PASS (Expected Failure): {: "value"} => Error 2 at ['{', ':']
PASS (Expected Failure): {123: "value"} => Error 5 at ['{', '1', '2', '3', ':']
PASS (Expected Failure): ["apple" "banana"] => Error 9 at ['[', '"', 'a', 'p', 'p', 'l', 'e', '"', ' ']
PASS (Expected Failure): [{"name": "Alice", "age": "thirty"}, {"name": "Bob", "age": "twenty"}] => Error 10 at ['[', '{', '"', 'n', 'a', 'm', 'e', '"', ':', ' ']
PASS (Expected Failure): {"name" "Alice"} => Error 8 at ['{', '"', 'n', 'a', 'm', 'e', '"', ' ']
PASS (Expected Failure): ["apple", "banana",] => Error 10 at ['[', '"', 'a', 'p', 'p', 'l', 'e', '"', ',', ' ']
PASS (Expected Failure): {"name": "Alice", "age": 30,} => Error 9 at ['{', '"', 'n', 'a', 'm', 'e', '"', ':', ' ']
PASS (Expected Failure): [{"name": "Alice"}, "banana"][1e] => Error 10 at ['[', '{', '"', 'n', 'a', 'm', 'e', '"', ':', ' ']
PASS (Expected Failure): [1e+] => Error 5 at ['[', '1', 'e', '+', ']']
PASS (Expected Failure): [1e-] => Error 5 at ['[', '1', 'e', '-', ']']
PASS (Expected Failure): [.e1] => Error 3 at ['[', '.', 'e']
PASS (Expected Failure): [1,2,3,] => Error 8 at ['[', '1', ',', '2', ',', '3', ',', ']']
PASS (Expected Failure): {"a":1, "b":2,} => Error 8 at ['{', '"', 'a', '"', ':', '1', ',', ' ']
PASS (Expected Failure): true => Error 1 at ['t']
PASS (Expected Failure): null => Error 1 at ['n']
PASS (Expected Failure): "string" => Error 1 at ['"']
PASS (Expected Failure): 123 => Error 1 at ['1']
PASS (Expected Failure): {"b":1,"a":2} => Error 13 at ['"', ':', '1', ',', '"', 'a', '"', ':', '2', '}']

良き。