File: layouter.nim

package info (click to toggle)
nim 2.2.0-1
links: PTS, VCS
area: main
in suites: sid
size: 1,911,644 kB
sloc: sh: 24,603; ansic: 1,761; python: 1,492; makefile: 1,013; sql: 298; asm: 141; xml: 13
file content (609 lines) | stat: -rw-r--r-- 19,550 bytes
#
#
#           The Nim Compiler
#        (c) Copyright 2018 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## Layouter for nimpretty.

import idents, lexer, ast, lineinfos, llstream, options, msgs, strutils, pathutils

const
  MinLineLen = 15

type
  SplitKind = enum
    splitComma, splitParLe, splitAnd, splitOr, splitIn, splitBinary

  SemicolonKind = enum
    detectSemicolonKind, useSemicolon, dontTouch

  LayoutToken* = enum
    ltSpaces,
    ltCrucialNewline, ## a semantically crucial newline (indentation!)
    ltSplittingNewline, ## newline used for splitting up long
                        ## expressions (like after a comma or a binary operator)
    ltTab,
    ltOptionalNewline, ## optional newline introduced by nimpretty
    ltComment, ltLit, ltKeyword, ltExportMarker, ltIdent,
    ltOther, ltOpr, ltSomeParLe, ltSomeParRi,
    ltBeginSection, ltEndSection

  Emitter* = object
    config: ConfigRef
    fid: FileIndex
    lastTok: TokType
    inquote, lastTokWasTerse: bool
    semicolons: SemicolonKind
    col, lastLineNumber, lineSpan, indentLevel, indWidth*, inSection: int
    keepIndents*: int
    doIndentMore*: int
    kinds*: seq[LayoutToken]
    tokens*: seq[string]
    indentStack: seq[int]
    fixedUntil: int # marks where we must not go in the content
    altSplitPos: array[SplitKind, int] # alternative split positions
    maxLineLen*: int

proc openEmitter*(em: var Emitter, cache: IdentCache;
                  config: ConfigRef, fileIdx: FileIndex) =
  let fullPath = AbsoluteFile config.toFullPath(fileIdx)
  if em.indWidth == 0:
    em.indWidth = getIndentWidth(fileIdx, llStreamOpen(fullPath, fmRead),
                                cache, config)
    if em.indWidth == 0: em.indWidth = 2
  em.config = config
  em.fid = fileIdx
  em.lastTok = tkInvalid
  em.inquote = false
  em.col = 0
  em.indentStack = newSeqOfCap[int](30)
  em.indentStack.add 0
  em.lastLineNumber = 1

proc computeMax(em: Emitter; pos: int): int =
  var p = pos
  var extraSpace = 0
  result = 0
  while p < em.tokens.len and em.kinds[p] != ltEndSection:
    var lhs = 0
    var lineLen = 0
    var foundTab = false
    while p < em.tokens.len and em.kinds[p] != ltEndSection:
      if em.kinds[p] in {ltCrucialNewline, ltSplittingNewline}:
        if foundTab and lineLen <= em.maxLineLen:
          result = max(result, lhs + extraSpace)
        inc p
        break
      if em.kinds[p] == ltTab:
        extraSpace = if em.kinds[p-1] == ltSpaces: 0 else: 1
        foundTab = true
      else:
        if not foundTab:
          inc lhs, em.tokens[p].len
        inc lineLen, em.tokens[p].len
      inc p

proc computeRhs(em: Emitter; pos: int): int =
  var p = pos
  result = 0
  while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}:
    inc result, em.tokens[p].len
    inc p

proc isLongEnough(lineLen, startPos, endPos: int): bool =
  result = lineLen > MinLineLen and endPos > startPos + 4

proc findNewline(em: Emitter; p, lineLen: var int) =
  while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}:
    inc lineLen, em.tokens[p].len
    inc p

proc countNewlines(s: string): int =
  result = 0
  for i in 0..<s.len:
    if s[i] == '\L': inc result

proc calcCol(em: var Emitter; s: string) =
  var i = s.len-1
  em.col = 0
  while i >= 0 and s[i] != '\L':
    dec i
    inc em.col

proc optionalIsGood(em: var Emitter; pos, currentLen: int): bool =
  let ourIndent = em.tokens[pos].len
  var p = pos+1
  var lineLen = 0
  em.findNewline(p, lineLen)
  if p == pos+1: # optionalNewline followed by another newline
    result = false
  elif em.kinds[p-1] == ltComment and currentLen+lineLen < em.maxLineLen+MinLineLen:
    result = false
  elif p+1 < em.tokens.len and em.kinds[p+1] == ltSpaces and
      em.kinds[p-1] == ltOptionalNewline:
    if em.tokens[p+1].len == ourIndent:
      # concatenate lines with the same indententation
      var nlPos = p
      var lineLenTotal = lineLen
      inc p
      em.findNewline(p, lineLenTotal)
      if isLongEnough(lineLenTotal, nlPos, p):
        em.kinds[nlPos] = ltOptionalNewline
        if em.kinds[nlPos+1] == ltSpaces:
          # inhibit extra spaces when concatenating two lines
          em.tokens[nlPos+1] = if em.tokens[nlPos-2] == ",": " " else: ""
      result = true
    elif em.tokens[p+1].len < ourIndent:
      result = isLongEnough(lineLen, pos, p)
  elif em.kinds[pos+1] in {ltOther, ltSomeParLe, ltSomeParRi}: # note: pos+1, not p+1
    result = false
  else:
    result = isLongEnough(lineLen, pos, p)

proc lenOfNextTokens(em: Emitter; pos: int): int =
  result = 0
  for i in 1..<em.tokens.len-pos:
    if em.kinds[pos+i] in {ltCrucialNewline, ltSplittingNewline, ltOptionalNewline}: break
    inc result, em.tokens[pos+i].len

proc guidingInd(em: Emitter; pos: int): int =
  var i = pos - 1
  while i >= 0 and em.kinds[i] != ltSomeParLe:
    dec i
  while i+1 <= em.kinds.high and em.kinds[i] != ltSomeParRi:
    if em.kinds[i] == ltSplittingNewline and em.kinds[i+1] == ltSpaces:
      return em.tokens[i+1].len
    inc i
  result = -1

proc renderTokens*(em: var Emitter): string =
  ## Render Emitter tokens to a string of code
  template defaultCase() =
    content.add em.tokens[i]
    inc lineLen, em.tokens[i].len
  var content = newStringOfCap(16_000)
  var maxLhs = 0
  var lineLen = 0
  var lineBegin = 0
  var openPars = 0
  var i = 0
  while i <= em.tokens.high:
    when defined(debug):
      echo (token: em.tokens[i], kind: em.kinds[i])
    case em.kinds[i]
    of ltBeginSection:
      maxLhs = computeMax(em, lineBegin)
    of ltEndSection:
      maxLhs = 0
      lineBegin = i+1
    of ltTab:
      if i >= 2 and em.kinds[i-2] in {ltCrucialNewline, ltSplittingNewline} and
          em.kinds[i-1] in {ltCrucialNewline, ltSplittingNewline, ltSpaces}:
        # a previous section has ended
        maxLhs = 0

      if maxLhs == 0:
        if em.kinds[i-1] != ltSpaces:
          content.add em.tokens[i]
          inc lineLen, em.tokens[i].len
      else:
        # pick the shorter indentation token:
        var spaces = maxLhs - lineLen
        if spaces < em.tokens[i].len or computeRhs(em, i+1)+maxLhs <= em.maxLineLen+MinLineLen:
          if spaces <= 0 and content[^1] notin {' ', '\L'}: spaces = 1
          for j in 1..spaces: content.add ' '
          inc lineLen, spaces
        else:
          content.add em.tokens[i]
          inc lineLen, em.tokens[i].len
    of ltCrucialNewline, ltSplittingNewline:
      content.add em.tokens[i]
      lineLen = 0
      lineBegin = i+1
    of ltOptionalNewline:
      let totalLineLen = lineLen + lenOfNextTokens(em, i)
      if totalLineLen > em.maxLineLen and optionalIsGood(em, i, lineLen):
        if i-1 >= 0 and em.kinds[i-1] == ltSpaces:
          let spaces = em.tokens[i-1].len
          content.setLen(content.len - spaces)
        content.add "\L"
        let guide = if openPars > 0: guidingInd(em, i) else: -1
        if guide >= 0:
          content.add repeat(' ', guide)
          lineLen = guide
        else:
          content.add em.tokens[i]
          lineLen = em.tokens[i].len
        lineBegin = i+1
        if i+1 < em.kinds.len and em.kinds[i+1] == ltSpaces:
          # inhibit extra spaces at the start of a new line
          inc i
    of ltLit:
      let lineSpan = countNewlines(em.tokens[i])
      if lineSpan > 0:
        em.calcCol(em.tokens[i])
        lineLen = em.col
      else:
        inc lineLen, em.tokens[i].len
      content.add em.tokens[i]
    of ltSomeParLe:
      inc openPars
      defaultCase()
    of ltSomeParRi:
      doAssert openPars > 0
      dec openPars
      defaultCase()
    else:
      defaultCase()
    inc i

  return content

type
  FinalCheck = proc (content: string; origAst: PNode): bool {.nimcall.}

proc writeOut*(em: Emitter; content: string; origAst: PNode; check: FinalCheck) =
  ## Write to disk
  let outFile = em.config.absOutFile
  if fileExists(outFile) and readFile(outFile.string) == content:
    discard "do nothing, see #9499"
    return

  if check(content, origAst):
    var f = llStreamOpen(outFile, fmWrite)
    if f == nil:
      rawMessage(em.config, errGenerated, "cannot open file: " & outFile.string)
      return
    f.llStreamWrite content
    llStreamClose(f)

proc closeEmitter*(em: var Emitter; origAst: PNode; check: FinalCheck) =
  ## Renders emitter tokens and write to a file
  let content = renderTokens(em)
  em.writeOut(content, origAst, check)

proc wr(em: var Emitter; x: string; lt: LayoutToken) =
  em.tokens.add x
  em.kinds.add lt
  inc em.col, x.len
  assert em.tokens.len == em.kinds.len

proc wrNewline(em: var Emitter; kind = ltCrucialNewline) =
  em.tokens.add "\L"
  em.kinds.add kind
  em.col = 0

proc newlineWasSplitting*(em: var Emitter) =
  if em.kinds.len >= 3 and em.kinds[^3] == ltCrucialNewline:
    em.kinds[^3] = ltSplittingNewline

#[
Splitting newlines can occur:
- after commas, semicolon, '[', '('.
- after binary operators, '='.
- after ':' type

We only need parser support for the "after type" case.
]#

proc wrSpaces(em: var Emitter; spaces: int) =
  if spaces > 0:
    wr(em, strutils.repeat(' ', spaces), ltSpaces)

proc wrSpace(em: var Emitter) =
  wr(em, " ", ltSpaces)

proc wrTab(em: var Emitter) =
  wr(em, " ", ltTab)

proc beginSection*(em: var Emitter) =
  let pos = max(0, em.tokens.len-2)
  em.tokens.insert "", pos
  em.kinds.insert ltBeginSection, pos
  inc em.inSection

#wr(em, "", ltBeginSection)
proc endSection*(em: var Emitter) =
  em.tokens.insert "", em.tokens.len-2
  em.kinds.insert ltEndSection, em.kinds.len-2
  dec em.inSection

#wr(em, "", ltEndSection)

proc removeSpaces(em: var Emitter) =
  while em.kinds.len > 0 and em.kinds[^1] == ltSpaces:
    let tokenLen = em.tokens[^1].len
    setLen(em.tokens, em.tokens.len-1)
    setLen(em.kinds, em.kinds.len-1)
    dec em.col, tokenLen


const
  openPars = {tkParLe, tkParDotLe,
              tkBracketLe, tkBracketDotLe, tkBracketLeColon,
              tkCurlyDotLe, tkCurlyLe}
  closedPars = {tkParRi, tkParDotRi,
                tkBracketRi, tkBracketDotRi,
                tkCurlyDotRi, tkCurlyRi}

  splitters = openPars + {tkComma, tkSemiColon} # do not add 'tkColon' here!
  oprSet = {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
            tkIsnot, tkNot, tkOf, tkAs, tkFrom, tkDotDot, tkAnd, tkOr, tkXor}

template goodCol(col): bool = col >= em.maxLineLen div 2

template moreIndent(em): int =
  if em.doIndentMore > 0: em.indWidth*2 else: em.indWidth

template rememberSplit(kind) =
  if goodCol(em.col) and not em.inquote:
    let spaces = em.indentLevel+moreIndent(em)
    if spaces < em.col and spaces > 0:
      wr(em, strutils.repeat(' ', spaces), ltOptionalNewline)
    #em.altSplitPos[kind] = em.tokens.len

proc emitMultilineComment(em: var Emitter, lit: string, col: int; dontIndent: bool) =
  # re-align every line in the multi-line comment:
  var i = 0
  var lastIndent = if em.keepIndents > 0: em.indentLevel else: em.indentStack[^1]
  var b = 0
  var dontIndent = dontIndent
  var hasEmptyLine = false
  for commentLine in splitLines(lit):
    if i == 0 and (commentLine.endsWith("\\") or commentLine.endsWith("[")):
      dontIndent = true
      wr em, commentLine, ltComment
    elif dontIndent:
      if i > 0: wrNewline em
      wr em, commentLine, ltComment
    else:
      let stripped = commentLine.strip()
      if i == 0:
        if em.kinds.len > 0 and em.kinds[^1] != ltTab:
          wr(em, "", ltTab)
      elif stripped.len == 0:
        wrNewline em
        hasEmptyLine = true
      else:
        var a = 0
        while a < commentLine.len and commentLine[a] == ' ': inc a

        if a > lastIndent:
          b += em.indWidth
          lastIndent = a
        elif a < lastIndent:
          b -= em.indWidth
          lastIndent = a
        wrNewline em
        if not hasEmptyLine or col + b < 15:
          if col + b > 0:
            wr(em, repeat(' ', col+b), ltTab)
          else:
            wr(em, "", ltTab)
        else:
          wr(em, repeat(' ', a), ltSpaces)
      wr em, stripped, ltComment
    inc i

proc lastChar(s: string): char =
  result = if s.len > 0: s[s.high] else: '\0'

proc endsInWhite(em: Emitter): bool =
  var i = em.tokens.len-1
  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
  result = if i >= 0: em.kinds[i] in {ltSpaces, ltCrucialNewline, ltSplittingNewline, ltTab} else: true

proc endsInNewline(em: Emitter): bool =
  var i = em.tokens.len-1
  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection, ltSpaces}: dec(i)
  result = if i >= 0: em.kinds[i] in {ltCrucialNewline, ltSplittingNewline, ltTab} else: true

proc endsInAlpha(em: Emitter): bool =
  var i = em.tokens.len-1
  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
  result = if i >= 0: em.tokens[i].lastChar in SymChars+{'_'} else: false

proc emitComment(em: var Emitter; tok: Token; dontIndent: bool) =
  var col = em.col
  let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB)
  em.lineSpan = countNewlines(lit)
  if em.lineSpan > 0: calcCol(em, lit)
  if em.lineSpan == 0:
    if not endsInNewline(em):
      wrTab em
    wr em, lit, ltComment
  else:
    if not endsInWhite(em):
      wrTab em
      inc col
    emitMultilineComment(em, lit, col, dontIndent)

proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
  template wasExportMarker(em): bool =
    em.kinds.len > 0 and em.kinds[^1] == ltExportMarker

  if tok.tokType == tkComment and tok.literal.startsWith("#!nimpretty"):
    case tok.literal
    of "#!nimpretty off":
      inc em.keepIndents
      wrNewline em
      em.lastLineNumber = tok.line + 1
    of "#!nimpretty on":
      dec em.keepIndents
      em.lastLineNumber = tok.line
    wrNewline em
    wr em, tok.literal, ltComment
    em.col = 0
    em.lineSpan = 0
    return

  var preventComment = false
  if tok.tokType == tkComment and tok.line == em.lastLineNumber:
    # we have an inline comment so handle it before the indentation token:
    emitComment(em, tok, dontIndent = (em.inSection == 0))
    preventComment = true
    em.fixedUntil = em.tokens.high

  elif tok.indent >= 0:
    var newlineKind = ltCrucialNewline
    if em.keepIndents > 0:
      em.indentLevel = tok.indent
    elif (em.lastTok in (splitters + oprSet) and
        tok.tokType notin (closedPars - {tkBracketDotRi})):
      if tok.tokType in openPars and tok.indent > em.indentStack[^1]:
        while em.indentStack[^1] < tok.indent:
          em.indentStack.add(em.indentStack[^1] + em.indWidth)
      while em.indentStack[^1] > tok.indent:
        discard em.indentStack.pop()

      # aka: we are in an expression context:
      let alignment = max(tok.indent - em.indentStack[^1], 0)
      em.indentLevel = alignment + em.indentStack.high * em.indWidth
      newlineKind = ltSplittingNewline
    else:
      if tok.indent > em.indentStack[^1]:
        em.indentStack.add tok.indent
      else:
        # dedent?
        while em.indentStack.len > 1 and em.indentStack[^1] > tok.indent:
          discard em.indentStack.pop()
      em.indentLevel = em.indentStack.high * em.indWidth
    #[ we only correct the indentation if it is not in an expression context,
       so that code like

        const splitters = {tkComma, tkSemicolon, tkParLe, tkParDotLe,
                          tkBracketLe, tkBracketLeColon, tkCurlyDotLe,
                          tkCurlyLe}

       is not touched.
    ]#
    # remove trailing whitespace:
    removeSpaces em
    wrNewline em, newlineKind
    for i in 2..tok.line - em.lastLineNumber: wrNewline(em)
    wrSpaces em, em.indentLevel
    em.fixedUntil = em.tokens.high

  var lastTokWasTerse = false
  case tok.tokType
  of tokKeywordLow..tokKeywordHigh:
    if endsInAlpha(em):
      wrSpace em
    elif not em.inquote and not endsInWhite(em) and
        em.lastTok notin (openPars+{tkOpr, tkDotDot}) and not em.lastTokWasTerse:
      #and tok.tokType in oprSet
      wrSpace em

    if not em.inquote:
      wr(em, $tok.tokType, ltKeyword)
      if tok.tokType in {tkAnd, tkOr, tkIn, tkNotin}:
        rememberSplit(splitIn)
        wrSpace em
    else:
      # keywords in backticks are not normalized:
      wr(em, tok.ident.s, ltIdent)

  of tkColon:
    wr(em, $tok.tokType, ltOther)
    wrSpace em
  of tkSemiColon, tkComma:
    wr(em, $tok.tokType, ltOther)
    rememberSplit(splitComma)
    wrSpace em
  of openPars:
    if tsLeading in tok.spacing and not em.endsInWhite and
        (not em.wasExportMarker or tok.tokType == tkCurlyDotLe):
      wrSpace em
    wr(em, $tok.tokType, ltSomeParLe)
    if tok.tokType != tkCurlyDotLe:
      rememberSplit(splitParLe)
  of closedPars:
    wr(em, $tok.tokType, ltSomeParRi)
  of tkColonColon:
    wr(em, $tok.tokType, ltOther)
  of tkDot:
    lastTokWasTerse = true
    wr(em, $tok.tokType, ltOther)
  of tkEquals:
    if not em.inquote and not em.endsInWhite: wrSpace(em)
    wr(em, $tok.tokType, ltOther)
    if not em.inquote: wrSpace(em)
  of tkOpr, tkDotDot:
    if em.inquote or (tok.spacing == {} and
        tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]):
      # bug #9504: remember to not spacify a keyword:
      lastTokWasTerse = true
      # if not surrounded by whitespace, don't produce any whitespace either:
      wr(em, tok.ident.s, ltOpr)
    else:
      if not em.endsInWhite: wrSpace(em)
      wr(em, tok.ident.s, ltOpr)
      template isUnary(tok): bool =
        tok.spacing == {tsLeading}

      if not isUnary(tok):
        rememberSplit(splitBinary)
        wrSpace(em)
  of tkAccent:
    if not em.inquote and endsInAlpha(em): wrSpace(em)
    wr(em, $tok.tokType, ltOther)
    em.inquote = not em.inquote
  of tkComment:
    if not preventComment:
      emitComment(em, tok, dontIndent = false)
  of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit:
    if not em.inquote:
      let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB)
      if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wrSpace(em)
      em.lineSpan = countNewlines(lit)
      if em.lineSpan > 0: calcCol(em, lit)
      wr em, lit, ltLit
    else:
      if endsInAlpha(em): wrSpace(em)
      wr em, tok.literal, ltLit
  of tkEof: discard
  else:
    let lit = if tok.ident != nil: tok.ident.s else: tok.literal
    if endsInAlpha(em): wrSpace(em)
    wr em, lit, ltIdent

  em.lastTok = tok.tokType
  em.lastTokWasTerse = lastTokWasTerse
  em.lastLineNumber = tok.line + em.lineSpan
  em.lineSpan = 0

proc endsWith(em: Emitter; k: varargs[string]): bool =
  if em.tokens.len < k.len: return false
  for i in 0..high(k):
    if em.tokens[em.tokens.len - k.len + i] != k[i]: return false
  return true

proc rfind(em: Emitter, t: string): int =
  for i in 1..5:
    if em.tokens[^i] == t:
      return i

proc starWasExportMarker*(em: var Emitter) =
  if em.endsWith(" ", "*", " "):
    setLen(em.tokens, em.tokens.len-3)
    setLen(em.kinds, em.kinds.len-3)
    em.tokens.add("*")
    em.kinds.add ltExportMarker
    dec em.col, 2

proc commaWasSemicolon*(em: var Emitter) =
  if em.semicolons == detectSemicolonKind:
    em.semicolons = if em.rfind(";") > 0: useSemicolon else: dontTouch
  if em.semicolons == useSemicolon:
    let commaPos = em.rfind(",")
    if commaPos > 0:
      em.tokens[^commaPos] = ";"

proc curlyRiWasPragma*(em: var Emitter) =
  if em.endsWith("}"):
    em.tokens[^1] = ".}"
    inc em.col