zentity-io / zentity

Entity resolution for Elasticsearch.
https://zentity.io
Apache License 2.0
157 stars 28 forks source link

Match field which is of type "array of objects" #46

Closed abhishekpratap13 closed 3 years ago

abhishekpratap13 commented 4 years ago

Hello Dave, Thanks for developing such a wonderful project, I am trying to match a field which is 'array of objects' type but getting ValidationException. I might be missing something here, can you please look into it. index mapping: "test" : { "mappings" : { "properties" : { "education" : { "properties" : { "major" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } }, "school" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } } } }, "name" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } } } } } sample doc: { "_index" : "test", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "John Wick", "education" : [ { "major" : "Master Of Science In Information Management", "school" : "Syracuse University" }, { "major" : "Certification Of Advanced Study In Data Science" }, { "major" : "Bachelor Of Technology", "school" : "Charotar University Of Science And Technology" } ] } }

zentity model: PUT _zentity/models/name_education { "attributes" : { "name" : { "type": "string" }, "school" : { "type": "string" } }, "resolvers" : { "name_education" : { "attributes" : ["name", "school"] } }, "matchers" : { "simple" : { "clause" : { "match" : { "{{ field }}" : "{{ value }}" } } }, "fuzzy" : { "clause" : { "match" : { "{{ field }}" : { "query" : "{{ value }}", "fuzziness" : "1" } } } }, "exact" : { "clause" : { "term" : { "{{ field }}" : "{{ value }}" } } } }, "indices" : { "test" : { "fields" : { "name" : { "attribute" : "name", "matcher" : "simple" }, "education.school" : { "attribute" : "school", "matcher" : "simple" } } } } } resolution request: POST _zentity/resolution/name_education?pretty&_source=true&_explanation=true&_score=true { "attributes": { "school": [ "Syracuse University", "", "Charotar University Of Science And Technology" ], "name": [ "John Wick" ] } }

Here I am trying to do a simple match for both the attributes name and education.school. Above request results into following error (same error if I use 'fuzzy' matcher): "error": { "by": "zentity", "type": "io.zentity.model.ValidationException", "reason": "Expected 'string' attribute data type.", "stack_trace": "io.zentity.model.ValidationException: Expected 'string' attribute data type.\n\tat io.zentity.resolution.input.value.StringValue.validate(StringValue.java:35)\n\tat io.zentity.resolution.input.value.Value.<init>(Value.java:18)\n\tat io.zentity.resolution.input.value.StringValue.<init>(StringValue.java:11)\n\tat io.zentity.resolution.input.value.Value.create(Value.java:40)\n\tat io.zentity.resolution.Job.traverse(Job.java:1346)\n\tat io.zentity.resolution.Job.run(Job.java:1539)\n\tat org.elasticsearch.plugin.zentity.ResolutionAction.lambda$prepareRequest$0(ResolutionAction.java:118)\n\tat org.elasticsearch.rest.BaseRestHandler.handleRequest(BaseRestHandler.java:108)\n\tat org.elasticsearch.xpack.security.rest.SecurityRestFilter.lambda$handleRequest$0(SecurityRestFilter.java:58)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$writeAuthToContext$24(AuthenticationService.java:570)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.writeAuthToContext(AuthenticationService.java:579)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.finishAuthentication(AuthenticationService.java:560)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.consumeUser(AuthenticationService.java:510)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$consumeToken$16(AuthenticationService.java:404)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.action.support.ContextPreservingActionListener.onResponse(ContextPreservingActionListener.java:43)\n\tat org.elasticsearch.xpack.core.common.IteratingActionListener.onResponse(IteratingActionListener.java:120)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$consumeToken$13(AuthenticationService.java:374)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.xpack.security.authc.support.CachingUsernamePasswordRealm.lambda$authenticateWithCache$1(CachingUsernamePasswordRealm.java:145)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.xpack.security.authc.support.CachingUsernamePasswordRealm.handleCachedAuthentication(CachingUsernamePasswordRealm.java:196)\n\tat org.elasticsearch.xpack.security.authc.support.CachingUsernamePasswordRealm.lambda$authenticateWithCache$2(CachingUsernamePasswordRealm.java:137)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.common.util.concurrent.ListenableFuture$1.doRun(ListenableFuture.java:112)\n\tat org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)\n\tat org.elasticsearch.common.util.concurrent.EsExecutors$DirectExecutorService.execute(EsExecutors.java:225)\n\tat org.elasticsearch.common.util.concurrent.ListenableFuture.notifyListener(ListenableFuture.java:106)\n\tat org.elasticsearch.common.util.concurrent.ListenableFuture.addListener(ListenableFuture.java:68)\n\tat org.elasticsearch.xpack.security.authc.support.CachingUsernamePasswordRealm.authenticateWithCache(CachingUsernamePasswordRealm.java:132)\n\tat org.elasticsearch.xpack.security.authc.support.CachingUsernamePasswordRealm.authenticate(CachingUsernamePasswordRealm.java:103)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$consumeToken$15(AuthenticationService.java:365)\n\tat org.elasticsearch.xpack.core.common.IteratingActionListener.run(IteratingActionListener.java:102)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.consumeToken(AuthenticationService.java:408)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$extractToken$11(AuthenticationService.java:335)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.extractToken(AuthenticationService.java:345)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$checkForApiKey$3(AuthenticationService.java:288)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.xpack.security.authc.ApiKeyService.authenticateWithApiKeyIfPresent(ApiKeyService.java:325)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.checkForApiKey(AuthenticationService.java:269)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$authenticateAsync$0(AuthenticationService.java:252)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)\n\tat org.elasticsearch.xpack.security.authc.TokenService.getAndValidateToken(TokenService.java:379)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$authenticateAsync$2(AuthenticationService.java:248)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$lookForExistingAuthentication$6(AuthenticationService.java:306)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lookForExistingAuthentication(AuthenticationService.java:317)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.authenticateAsync(AuthenticationService.java:244)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.access$000(AuthenticationService.java:196)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService.authenticate(AuthenticationService.java:122)\n\tat org.elasticsearch.xpack.security.rest.SecurityRestFilter.handleRequest(SecurityRestFilter.java:55)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:222)\n\tat org.elasticsearch.rest.RestController.tryAllHandlers(RestController.java:295)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:166)\n\tat org.elasticsearch.http.AbstractHttpServerTransport.dispatchRequest(AbstractHttpServerTransport.java:322)\n\tat org.elasticsearch.http.AbstractHttpServerTransport.handleIncomingRequest(AbstractHttpServerTransport.java:372)\n\tat org.elasticsearch.http.AbstractHttpServerTransport.incomingRequest(AbstractHttpServerTransport.java:301)\n\tat org.elasticsearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:69)\n\tat org.elasticsearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:31)\n\tat io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat org.elasticsearch.http.netty4.Netty4HttpPipeliningHandler.channelRead(Netty4HttpPipeliningHandler.java:58)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.handler.codec.MessageToMessageCodec.channelRead(MessageToMessageCodec.java:111)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:326)\n\tat io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:300)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.handler.ssl.SslHandler.unwrap(SslHandler.java:1478)\n\tat io.netty.handler.ssl.SslHandler.decodeJdkCompatible(SslHandler.java:1227)\n\tat io.netty.handler.ssl.SslHandler.decode(SslHandler.java:1274)\n\tat io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:503)\n\tat io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:442)\n\tat io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:281)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)\n\tat io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1422)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)\n\tat io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:931)\n\tat io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:600)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:554)\n\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514)\n\tat io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050)\n\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n\tat java.base/java.lang.Thread.run(Thread.java:830)\n" }

However if I use 'education.school.keyword' and do exact match, there's no error and it match the record: PUT _zentity/models/name_education { "attributes" : { "name" : { "type": "string" }, "school" : { "type": "string" } }, "resolvers" : { "name_education" : { "attributes" : ["name", "school"] } }, "matchers" : { "simple" : { "clause" : { "match" : { "{{ field }}" : "{{ value }}" } } }, "fuzzy" : { "clause" : { "match" : { "{{ field }}" : { "query" : "{{ value }}", "fuzziness" : "1" } } } }, "exact" : { "clause" : { "term" : { "{{ field }}" : "{{ value }}" } } } }, "indices" : { "test" : { "fields" : { "name" : { "attribute" : "name", "matcher" : "simple" }, "education.school.keyword" : { "attribute" : "school", "matcher" : "exact" } } } } } response for same resolution request (used earlier): "hits" : { "total" : 1, "hits" : [ { "_index" : "test", "_type" : "_doc", "_id" : "1", "_hop" : 0, "_query" : 0, "_score" : null, "_attributes" : { "name" : [ "John Wick" ] }, "_explanation" : { "resolvers" : { "name_education" : { "attributes" : [ "name", "school" ] } }, "matches" : [ { "attribute" : "name", "target_field" : "name", "target_value" : "John Wick", "input_value" : "John Wick", "input_matcher" : "simple", "input_matcher_params" : { }, "score" : null }, { "attribute" : "school", "target_field" : "education.school.keyword", "target_value" : null, "input_value" : "Charotar University Of Science And Technology", "input_matcher" : "exact", "input_matcher_params" : { }, "score" : null }, { "attribute" : "school", "target_field" : "education.school.keyword", "target_value" : null, "input_value" : "Syracuse University", "input_matcher" : "exact", "input_matcher_params" : { }, "score" : null } ] }, "_source" : { "name" : "John Wick", "education" : [ { "major" : "Master Of Science In Information Management", "school" : "Syracuse University" }, { "major" : "Certification Of Advanced Study In Data Science" }, { "major" : "Bachelor Of Technology", "school" : "Charotar University Of Science And Technology" } ] } } ] }

Thanks Abhishek

abhishekpratap13 commented 4 years ago

I am using zentity v1.6.0 for elasticsearch v7.5.0

davemoore- commented 3 years ago

@abhishekpratap13 Thanks for pointing this out. I'm able to reproduce the issue, and I'll work toward a resolution in an upcoming patch release.

davemoore- commented 3 years ago

Closing issue and tracking progress on #85