for-GET / jesse

jesse (JSon Schema Erlang) is an implementation of a JSON Schema validator for Erlang.
https://github.com/for-get/jesse
Apache License 2.0
124 stars 64 forks source link

anyOf / OneOf returns unexpected error with allowed_errors=infinity #80

Closed seriyps closed 5 years ago

seriyps commented 5 years ago

Might be caused by fix for #22

Minimized example:

data.json

{
  "unknown_field1":true,
  "known_field1":{
    "some_id":"410391-8351"
  },
  "known_field2":{
    "client_ip":"127.0.0.1"
  }
}

schema.json

{
    "$schema": "http://json-schema.org/draft-04/schema#",
    "additionalProperties": false,
    "properties": {
        "known_field2": {
            "additionalProperties": false,
            "properties": {
                "client_ip": {
                    "type": "string",
                    "anyOf": [
                        { "format": "ipv4" },
                        { "format": "ipv6" }
                    ]
                }
            }
        },
        "known_field1": {
            "additionalProperties": false,
            "properties": {
                "some_id": {
                    "anyOf": [
                        {
                            "pattern": "^[0-9]{6}[+-]?[0-9]{3}[0-9A-Z]{1}$"
                        },
                        {
                            "pattern": "^[8-9][0-9]{8}$"
                        },
                        {
                            "pattern": "^([0-9]{8}|[0-9]{6})[+-]?[0-9]{4}$"
                        }
                    ]
                }
            }
        }
    }
}
{ok, Data} = file:read_file("data.json"),
{ok, Schema} = file:read_file("schema.json"),
jesse:validate_with_schema(
      Schema,
      Data,
      [{allowed_errors,infinity}, {parser_fun, fun jsx:decode/1}]).
%-----
{error,[{data_invalid,[<...>],
                      no_extra_properties_allowed,
                      [{<<"unknown_field1">>,true},
                       {<<"known_field1">>,[{<<"some_id">>,<<"4103918351">>}]},
                       {<<"known_field2">>,[{<<"client_ip">>,<<"127.0.0.1">>}]}],
                      [<<"unknown_field1">>]},
        {data_invalid,[{<<"type">>,<<"string">>},
                       {<<"anyOf">>,
                        [[{<<"format">>,<<"ipv4">>}],[{<<"format">>,<<"ipv6">>}]]}],
                      any_schemas_not_valid,<<"127.0.0.1">>,
                      [<<"known_field2">>,<<"client_ip">>]},
        {data_invalid,[{<<"anyOf">>,
                        [[{<<"pattern">>,<<"^[0-9]{6}[+-]?[0-9]{3}[0-9A-Z]{1}$">>}],
                         [{<<"pattern">>,<<"^[8-9][0-9]{8}$">>}],
                         [{<<"pattern">>,
                           <<"^([0-9]{8}|[0-9]{6})[+-]?[0-9]{4}$">>}]]}],
                      any_schemas_not_valid,<<"410321-9202">>,
                      [<<"known_field1">>,<<"some_id">>]}]}

I expect it to only generate no_extra_properties_allowed, but it also generates any_schemas_not_valid

seriyps commented 5 years ago

I believe the problem might be here:

https://github.com/for-GET/jesse/blob/9f9d050627093d6a43e02a102d78122f0c4989c8/src/jesse_validator_draft4.erl#L1142-L1146

If State already have some errors in it's accumulator (because wefailed "additionalProperties": false check earlier), we will end up in 2nd clause of case even if this subschema of anyOf was checked without any errors.

I believe what should be done is that we should reset error_list of State before checking the subschema and restore it after (or check if number of errors in error_list increased).

seriyps commented 5 years ago

Ok, this does the trick:

diff --git a/src/jesse_validator_draft4.erl b/src/jesse_validator_draft4.erl
index 0583d3f..1a36402 100644
--- a/src/jesse_validator_draft4.erl
+++ b/src/jesse_validator_draft4.erl
@@ -1139,10 +1139,11 @@ check_any_of(_Value, _InvalidSchemas, State) ->
 check_any_of_(Value, [], State) ->
   handle_data_invalid(?any_schemas_not_valid, Value, State);
 check_any_of_(Value, [Schema | Schemas], State) ->
+  NumErrsBefore = length(jesse_state:get_error_list(State)),
   case validate_schema(Value, Schema, State) of
     {true, NewState} ->
-        case jesse_state:get_error_list(NewState) of
-            [] -> NewState;
+        case length(jesse_state:get_error_list(NewState)) of
+            NumErrsBefore -> NewState;
             _  -> check_any_of_(Value, Schemas, State)
         end;
     {false, _} -> check_any_of_(Value, Schemas, State)
@@ -1176,10 +1177,11 @@ check_one_of_(Value, [], State, 0) ->
 check_one_of_(Value, _Schemas, State, Valid) when Valid > 1 ->
   handle_data_invalid(?not_one_schema_valid, Value, State);
 check_one_of_(Value, [Schema | Schemas], State, Valid) ->
+  NumErrsBefore = length(jesse_state:get_error_list(State)),
   case validate_schema(Value, Schema, State) of
     {true, NewState} ->
-        case jesse_state:get_error_list(NewState) of
-            [] -> check_one_of_(Value, Schemas, NewState, Valid + 1);
+        case length(jesse_state:get_error_list(NewState)) of
+            NumErrsBefore -> check_one_of_(Value, Schemas, NewState, Valid + 1);
             _  -> check_one_of_(Value, Schemas, State, Valid)
         end;
     {false, _} ->

Will try to make a PR