Improving gdocs2latex.gs

professor commented 6 years ago

Hello,

I would be interested in improving gdocs2latex.gs to handle these situations: bullet lists, numbered lists, subsubsections, and quotes.

However, I'm not aware of how to easily debug the script and would need guidance in understanding the data structure element.

Thanks!

Hola,

Me interesaría mejorar gdocs2latex.gs para manejar estas situaciones: listas de viñetas, listas numeradas, subsecciones y citas.

Sin embargo, no estoy al tanto de cómo depurar fácilmente el script y necesitaría una guía para comprender el elemento de estructura de datos.

¡Gracias!

jjconti commented 6 years ago

I have this not published version of the file which handles bullet lists and numbered lists. Also subjections. I don't handle quotes yet. What I'd really like to add is footnotes.

That said, I don't have a good way to debug this. What I've done so far is develop via try and error with a very small doc file.

// Based on http://github.com/mangini/gdocs2md

function ConvertToSimpleLatex() {
  var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren();
  var text = '';
  var attachments = [];
  var inItemize = false;
  var inEnumerate = false;

  // Walk through all the child elements of the doc.
  for (var i = 9; i < numChildren; i++) {  // 9 to skipe the firest 2 pages
    var child = DocumentApp.getActiveDocument().getActiveSection().getChild(i);
    var result = processParagraph(i, child);
    if (result !== null) {
      if (result && result.length > 0) {
        if (starts(result, '{itemize}') || starts(result, '{enumerate}')) {
          if (starts(result, '{itemize}')) {
            line = result.substring(9);
            if (!inItemize) {
              text += '\\begin{itemize}\n'
              inItemize = true;
            }
            text += '\\item ' + line
          } else {  // enumerate
            line = result.substring(11);
            if (!inEnumerate) {
              text += '\\begin{enumerate}\n'
              inEnumerate = true;
            }
            text += '\\item ' + line
          }
        } else {
          if (inItemize) {
            text += '\\end{itemize}\n'
            inItemize = false;
          }
          if (inEnumerate) {
            text += '\\end{enumerate}\n'
            inEnumerate = false;
          }
          text += result
        }
        text += '\n';
      }
    } else {
      text += '\n\n\\vspace{1cm}\n\n'
    }
  }

  attachments.push({'fileName': DocumentApp.getActiveDocument().getName() + '.txt', 'mimeType': 'text/plain', 'content': text});

  MailApp.sendEmail(Session.getActiveUser().getEmail(),
                    '[Automágica] ' + DocumentApp.getActiveDocument().getName(),
                    'Convertiste el adjunto a Latex simplificado para usar con Automágica (' + DocumentApp.getActiveDocument().getUrl() + ')' +
                    '\n\nMás información en http://www.juanjoconti.com/automagica/\n',
                    { 'attachments': attachments });
}

function starts(string, prefix) {
  return string.lastIndexOf(prefix, 0) === 0;
}

// Process each child element (not just paragraphs).
function processParagraph(index, element) {
  // First, check for things that require no processing.
  if (element.getNumChildren() == 0) {
    return null;
  }
  // TOC.
  if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
    return null;
  }

  // Set up for real results.
  var result = {};
  var pOut = '';
  var textElements = [];

  // Skip tables
  if (element.getType() === DocumentApp.ElementType.TABLE) {
    return null;
  }

  // Process various types (ElementType)
  for (var i = 0; i < element.getNumChildren(); i++) {
    var t = element.getChild(i).getType();

    if (t === DocumentApp.ElementType.TEXT) {
      var txt = element.getChild(i);
      pOut += txt.getText();
      textElements.push(txt);
    }
  }

  if (textElements.length == 0) {
    return result;
  }

  var prefix = findPrefix(element);
  var suffix = '';
  if (prefix.lastIndexOf('\\afterpage', 0) === 0) {
    suffix = '}}'
  } else if (prefix.lastIndexOf('\\', 0) === 0) {
    suffix = '}'
  } else if (prefix == '\n\n\\vspace{0.5cm}\\hrulefill \\hspace{0.1cm}\\decofourleft\\hspace{0.2cm} ') {
    suffix = ' \\hspace{0.2cm}\\decofourright \\hspace{0.1cm}\\hrulefill \\nopagebreak \\vspace{0.5cm} \\nopagebreak'
  } else if (prefix == '\n\n') {
    suffix = '\n\n';
  }

  var pOut = '';
  for (var i = 0; i < textElements.length; i++) {
    pOut += processTextElement(textElements[i]);
  }

  return prefix + pOut + suffix;
}

// Add correct prefix to list items and headers.
function findPrefix(element) {
  var prefix='';
  if (element.getType() === DocumentApp.ElementType.PARAGRAPH) {
    var paragraphObj = element;
    switch (paragraphObj.getHeading()) {
      case DocumentApp.ParagraphHeading.HEADING4: prefix+='\\subsection*{'; break;
      case DocumentApp.ParagraphHeading.HEADING3: prefix+='\\afterpage{\\includepdf{'; break;
      case DocumentApp.ParagraphHeading.HEADING2: prefix+='\n\n\\vspace{0.5cm}\\hrulefill \\hspace{0.1cm}\\decofourleft\\hspace{0.2cm} '; break;
      case DocumentApp.ParagraphHeading.HEADING1: prefix+='\n\n'; break;
      default:
    }
  } else if (element.getType() === DocumentApp.ElementType.LIST_ITEM) {
      var listItem = element;
      var nesting = listItem.getNestingLevel();
      var gt = listItem.getGlyphType();
      // Bullet list (<ul>):
      if (gt == DocumentApp.GlyphType.BULLET
          || gt == DocumentApp.GlyphType.HOLLOW_BULLET
          || gt == DocumentApp.GlyphType.SQUARE_BULLET) {
        prefix = '{itemize}';
      } else {
        // Ordered list (<ol>):
        prefix = '{enumerate}';
      }
    }
  return prefix;
}

function processTextElement(txt) {
  if (typeof(txt) === 'string') {
    return txt;
  }

  var pOut = txt.getText();
  if (!txt.getTextAttributeIndices) {
    return pOut;
  }

  var attrs = txt.getTextAttributeIndices();
  var lastOff = pOut.length;

  for (var i = attrs.length - 1; i >= 0; i--) {
    var off = attrs[i];
    if (txt.isBold(off)) {
      var d1 = '\\textbf{'
      var d2 = '}';
      if (txt.isItalic(off)) {
        d1 = '\\textbf{\\textit{'; d2 = '}}';
      }
      pOut = pOut.substring(0, off) + d1 + pOut.substring(off, lastOff) + d2 + pOut.substring(lastOff);
    } else if (txt.isItalic(off)) {
      pOut = pOut.substring(0, off) + '\\textit{' + pOut.substring(off, lastOff) + '}' + pOut.substring(lastOff);
    } else if (txt.getFontFamily(off) == "Courier New") {
      pOut = pOut.substring(0, off) + '\\small{\\texttt{' + pOut.substring(off, lastOff) + '}}' + pOut.substring(lastOff);
    } else if (txt.isStrikethrough(off)) {
      pOut = pOut.substring(0, off) + '\\st{' + pOut.substring(off, lastOff) + '}' + pOut.substring(lastOff);
    }
    lastOff=off;
  }
  if (pOut == '*') {
    pOut = '\\begin{center} * \\end{center}';
  }
  return pOut;
}

professor commented 6 years ago

Thank you for providing me with your latest. For my google document, all the section headers are coming out as \vspace{1cm}

jjconti / automagica

Improving gdocs2latex.gs #21