I tried now everything but I’m not able with the newest beta to get the LineFeed chars there are not in the string objects and they are not inside the operator type.
Did I do something wrong?
I just want also the Backslash n chars in my returned string.
I use the following class:
public static class PdfTextExtractor
{
public static string GetText(string pdfFileName)
{
using (var _document = PdfReader.Open(pdfFileName, PdfDocumentOpenMode.ReadOnly))
{
var result = new StringBuilder();
foreach (var page in _document.Pages)
{
ExtractText(ContentReader.ReadContent(page), result);
result.AppendLine();
}
return result.ToString();
}
}
#region CObject Visitor
private static void ExtractText(CObject obj, StringBuilder target)
{
if (obj is CArray)
ExtractText((CArray)obj, target);
else if (obj is CComment)
ExtractText((CComment)obj, target);
else if (obj is CInteger)
ExtractText((CInteger)obj, target);
else if (obj is CName)
ExtractText((CName)obj, target);
else if (obj is CNumber)
ExtractText((CNumber)obj, target);
else if (obj is COperator)
ExtractText((COperator)obj, target);
else if (obj is CReal)
ExtractText((CReal)obj, target);
else if (obj is CSequence)
ExtractText((CSequence)obj, target);
else if (obj is CString)
ExtractText((CString)obj, target);
else
throw new NotImplementedException(obj.GetType().AssemblyQualifiedName);
}
private static void ExtractText(CArray obj, StringBuilder target)
{
foreach (var element in obj)
{
ExtractText(element, target);
}
}
private static void ExtractText(CComment obj, StringBuilder target) { /* nothing */ }
private static void ExtractText(CInteger obj, StringBuilder target) { /* nothing */ }
private static void ExtractText(CName obj, StringBuilder target) { /* nothing */ }
private static void ExtractText(CNumber obj, StringBuilder target) { /* nothing */ }
private static void ExtractText(COperator obj, StringBuilder target)
{
//// target.Append($"((({obj.OpCode.OpCodeName})))");
if (obj.OpCode.OpCodeName == OpCodeName.Tj || obj.OpCode.OpCodeName == OpCodeName.TJ)
{
foreach (var element in obj.Operands)
{
ExtractText(element, target);
}
target.Append(" ");
}
}
private static void ExtractText(CReal obj, StringBuilder target) { /* nothing */ }
private static void ExtractText(CSequence obj, StringBuilder target)
{
foreach (var element in obj)
{
ExtractText(element, target);
}
}
private static void ExtractText(CString obj, StringBuilder target)
{
target.Append(obj.Value);
}
#endregion
}
I tried now everything but I’m not able with the newest beta to get the LineFeed chars there are not in the string objects and they are not inside the operator type.
Did I do something wrong? I just want also the Backslash n chars in my returned string.
I use the following class: