uniVocity-parsers is a suite of extremely fast and reliable parsers for Java. It provides a consistent interface for handling different file formats, and a solid framework for the development of new parsers.
I'm using univocity Parser too process csv files, I need to process each and every line so I'm storing each line with corresponding processed data the process ends up in heap space issue and memory is not being released after that
List<Map<String, String>> batchRecords = new ArrayList<>();
StringBuilder keyBuilder = new StringBuilder();
StringBuilder lineDataBuilder = new StringBuilder();
StringBuilder missedColumnFilterValueBuilder = new StringBuilder();
Map<String, String> cleanRecord = new HashMap<>();
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.getFormat().setDelimiter(compareFileMetaData.userDelimiter);
parser = new CsvParser(parserSettings);
parser.beginParsing(new FileReader(inputFile));
String[] headers = parser.getContext().headers();
System.out.println(" header processing ......");
headers = Arrays.stream(headers)
.map(header -> header.replaceAll("\\p{C}", "").toLowerCase())
.toArray(String[]::new);
Map<String, String> row;
Record record;
int i=1;
while ((record = parser.parseNextRecord()) != null) {
row = record.toFieldMap(headers);
batchRecords.add(row);
if (batchRecords.size() == BATCH_SIZE) {
System.out.println(i*BATCH_SIZE+" Records data preparation started");
for (Map<String, String> batchCleanRecord : batchRecords) {
preprocessMetaData.setRecord(batchCleanRecord);
preprocessMetaData.setFileLineData(lineDataBuilder);
String actualLineData = fileDataPreprocessing(preprocessMetaData, keyBuilder, missedColumnFilterValueBuilder, cleanRecord);
fileContent.put(keyBuilder.toString(), actualLineData);
fileMissedFieldFilterMap.put(actualLineData, missedColumnFilterValueBuilder.toString());
keyBuilder.setLength(0);
missedColumnFilterValueBuilder.setLength(0);
lineDataBuilder.setLength(0);
cleanRecord.clear();
}
batchRecords.clear();
i++;
}
}
System.out.println("remaining batch processing ");
for (Map<String, String> batchCleanRecord : batchRecords) {
preprocessMetaData.setRecord(batchCleanRecord);
preprocessMetaData.setFileLineData(lineDataBuilder);
String actualLineData = fileDataPreprocessing(preprocessMetaData, keyBuilder, missedColumnFilterValueBuilder, cleanRecord);
fileContent.put(keyBuilder.toString(), actualLineData);
fileMissedFieldFilterMap.put(actualLineData, missedColumnFilterValueBuilder.toString());
keyBuilder.setLength(0);
missedColumnFilterValueBuilder.setLength(0);
lineDataBuilder.setLength(0);
cleanRecord.clear();
}
batchRecords.clear();
return fileContent;
} catch (Exception e) {
System.out.println("file Content size ==>"+fileContent.size());
e.printStackTrace();
throw new MatchingServiceException(e.getMessage(), e.getCause());
}finally {
if (parser != null) {
// Close the parser and writer to release resources
parser.stopParsing();
}
I'm using univocity Parser too process csv files, I need to process each and every line so I'm storing each line with corresponding processed data the process ends up in heap space issue and memory is not being released after that
List<Map<String, String>> batchRecords = new ArrayList<>(); StringBuilder keyBuilder = new StringBuilder(); StringBuilder lineDataBuilder = new StringBuilder(); StringBuilder missedColumnFilterValueBuilder = new StringBuilder(); Map<String, String> cleanRecord = new HashMap<>();