name and namespace of my key schema were overwritten by this connector which took me quite some time because the error message in the serde was that a ConnectDefault class couldn't be found. First I was looking up that class in the internet and couldn't find any dependency that I could have missed again. The the inspection of the schema revealed that the SpecificAvroSerde is using that name.
A simple extensions of the ValueToKey class with a new name field helped to bring in the name again. Here the changed class:
import org.apache.kafka.common.cache.Cache;
import org.apache.kafka.common.cache.LRUCache;
import org.apache.kafka.common.cache.SynchronizedCache;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.transforms.Transformation;
import org.apache.kafka.connect.transforms.util.NonEmptyListValidator;
import org.apache.kafka.connect.transforms.util.SimpleConfig;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.kafka.connect.transforms.util.Requirements.requireMap;
import static org.apache.kafka.connect.transforms.util.Requirements.requireStruct;
public class ValueToKey<R extends ConnectRecord<R>> implements Transformation<R> {
public static final String OVERVIEW_DOC = "Replace the record key with a new key formed from a subset of fields in the record value.";
public static final String FIELDS_CONFIG = "fields";
public static final String NAME_CONFIG = "name";
public static final ConfigDef CONFIG_DEF = new ConfigDef()
.define(FIELDS_CONFIG, ConfigDef.Type.LIST, ConfigDef.NO_DEFAULT_VALUE, new NonEmptyListValidator(), ConfigDef.Importance.HIGH,
"Field names on the record value to extract as the record key.")
.define(NAME_CONFIG, ConfigDef.Type.STRING, ConfigDef.NO_DEFAULT_VALUE, ConfigDef.Importance.HIGH,
"The name of the key schema.");
private static final String PURPOSE = "copying fields from value to key";
private List<String> fields;
private String name;
private Cache<Schema, Schema> valueToKeySchemaCache;
@Override
public void configure(Map<String, ?> configs) {
final SimpleConfig config = new SimpleConfig(CONFIG_DEF, configs);
fields = config.getList(FIELDS_CONFIG);
name = config.getString(NAME_CONFIG);
valueToKeySchemaCache = new SynchronizedCache<>(new LRUCache<>(16));
}
@Override
public R apply(R record) {
if (record.valueSchema() == null) {
return applySchemaless(record);
} else {
return applyWithSchema(record);
}
}
private R applySchemaless(R record) {
final Map<String, Object> value = requireMap(record.value(), PURPOSE);
final Map<String, Object> key = new HashMap<>(fields.size());
for (String field : fields) {
key.put(field, value.get(field));
}
return record.newRecord(record.topic(), record.kafkaPartition(), null, key, record.valueSchema(), record.value(), record.timestamp());
}
private R applyWithSchema(R record) {
final Struct value = requireStruct(record.value(), PURPOSE);
Schema keySchema = valueToKeySchemaCache.get(value.schema());
if (keySchema == null) {
final SchemaBuilder keySchemaBuilder;
if (name == null) {
keySchemaBuilder = SchemaBuilder.struct();
} else {
keySchemaBuilder = SchemaBuilder.struct().name(name);
}
for (String field : fields) {
final Field fieldFromValue = value.schema().field(field);
if (fieldFromValue == null) {
throw new DataException("Field does not exist: " + field);
}
keySchemaBuilder.field(field, fieldFromValue.schema());
}
keySchema = keySchemaBuilder.build();
valueToKeySchemaCache.put(value.schema(), keySchema);
}
final Struct key = new Struct(keySchema);
for (String field : fields) {
key.put(field, value.get(field));
}
return record.newRecord(record.topic(), record.kafkaPartition(), keySchema, key, value.schema(), value, record.timestamp());
}
@Override
public ConfigDef config() {
return CONFIG_DEF;
}
@Override
public void close() {
valueToKeySchemaCache = null;
}
}
name and namespace of my key schema were overwritten by this connector which took me quite some time because the error message in the serde was that a ConnectDefault class couldn't be found. First I was looking up that class in the internet and couldn't find any dependency that I could have missed again. The the inspection of the schema revealed that the SpecificAvroSerde is using that name.
A simple extensions of the ValueToKey class with a new name field helped to bring in the name again. Here the changed class: