Open Johnwickdev opened 3 months ago
New splitter
import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
public class FlinkMongoDBSplit {
public static void main(String[] args) throws Exception {
// Initialize the Flink execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// Read parameters, you can provide MongoDB connection details here
final ParameterTool params = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(params);
// Create a DataStream from MongoDB (this part depends on your MongoDB Flink connector)
DataStream<String> jsonStream = env.addSource(new MongoDBSourceFunction(params));
// Deserialize JSON to your document class
DataStream<MyDocumentClass> documentStream = jsonStream.map(new MapFunction<String, MyDocumentClass>() {
@Override
public MyDocumentClass map(String value) throws Exception {
// Use your preferred JSON library to convert JSON string to MyDocumentClass
return MyDocumentClass.fromJson(value);
}
});
// Key the stream by the primary key
SingleOutputStreamOperator<MyDocumentClass> keyedStream = documentStream
.keyBy(document -> document.getPrimaryKey());
// Here you can process each partition independently
keyedStream.map(new MapFunction<MyDocumentClass, MyDocumentClass>() {
@Override
public MyDocumentClass map(MyDocumentClass value) throws Exception {
// Process the document
return processDocument(value);
}
});
// Sink the processed stream to another MongoDB collection or other sinks
keyedStream.addSink(new MongoDBSinkFunction(params));
// Execute the Flink job
env.execute("Flink MongoDB Split by Primary Key");
}
}
Split json by Java8
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.Map; import java.util.stream.Collectors;
public class JsonSplitter {
public static void main(String[] args) throws Exception {
String json = "{ \"key1\": {\"name\": \"John\", \"age\": 30}, " +
"\"key2\": {\"name\": \"Jane\", \"age\": 25}, " +
"\"key3\": {\"name\": \"Doe\", \"age\": 22}}";
// Parse JSON to Map
ObjectMapper mapper = new ObjectMapper();
Map<String, Map<String, Object>> map = mapper.readValue(json, Map.class);
// Split the map by primary key
Map<String, Map<String, Object>> key1Map = map.entrySet().stream()
.filter(entry -> "key1".equals(entry.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
Map<String, Map<String, Object>> key2Map = map.entrySet().stream()
.filter(entry -> "key2".equals(entry.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// Output the results
System.out.println("Key1 Map: " + mapper.writeValueAsString(key1Map));
System.out.println("Key2 Map: " + mapper.writeValueAsString(key2Map));
}
}
Convert do to json object
import com.mongodb.client.FindIterable; import org.bson.Document; import com.google.gson.JsonArray; import com.google.gson.JsonObject;
public class MongoToJsonExample {
public static JsonArray convertToJSON(FindIterable<Document> documents) {
JsonArray jsonArray = new JsonArray();
// Iterate over the documents and add them to the JSON array
for (Document document : documents) {
JsonObject jsonObject = new JsonObject();
// Convert each BSON Document to a JSON string
document.forEach((key, value) -> jsonObject.addProperty(key, value.toString()));
jsonArray.add(jsonObject);
}
return jsonArray;
}
public static void main(String[] args) {
// Assuming 'documents' is your FindIterable<Document> object
FindIterable<Document> documents = /* your MongoDB query here */;
JsonArray jsonArray = convertToJSON(documents);
// Convert the JsonArray to a JSON string
String jsonString = jsonArray.toString();
System.out.println(jsonString);
}
}
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.KeyedStream; import org.apache.flink.streaming.connectors.mongodb.MongoSource; import org.apache.flink.streaming.connectors.mongodb.source.MongoSourceBuilder; import org.apache.flink.api.common.functions.MapFunction; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper;
public class MongoDBFlinkExample {
}