{}

#!/usr/bin/python3
#
# gmi2html - convert Gemtext to HTML
#
# SPDX-FileCopyrightText: 2023 Daniel Kalak
# SPDX-License-Identifier: GPL-3.0-or-later

import sys

# This aims to replace an enum.
(
  PRE_TOGGLE, PRE, H1, H2, H3, LIST, LINK,
  QUOTE_TEXT, QUOTE_GAP, BODY_TEXT, BODY_GAP
) = [i for i in range(11)]

QUOTE = (QUOTE_TEXT, QUOTE_GAP)
BODY = (BODY_TEXT, BODY_GAP)
PRE_TOGGLE_PREFIX = "```"

# ("", BODY) must come last because "" would match prematurely in categorize().
# Note that QUOTE and BODY don't need brackets because they already are tuples.
PREFIXES = [
  (PRE_TOGGLE_PREFIX, [PRE_TOGGLE]),
  ("# ", [H1]), ("## ", [H2]), ("### ", [H3]), ("* ", [LIST]), ("=>", [LINK]),
  (">", QUOTE), ("", BODY)
]

# ("&", "&amp;") must come first so that "&" isn't replaced in the other
# replacements in escape().
ESCAPES = [
  ("&", "&amp;"), ("<", "&lt;"), (">", "&gt;"),
  ("\"", "&quot;"), ("\'", "&apos;")
]

def escape(s):
  for escape in ESCAPES:
    s = s.replace(escape[0], escape[1])
  return s

def categorize(line, pre):
  if pre and not line.startswith(PRE_TOGGLE_PREFIX):
    return (PRE, escape(line[:-1]))
  # The loop only returns if PREFIXES contains ("", _).
  for prefix, categories in PREFIXES:
    if line.startswith(prefix):
      line = line[len(prefix):]
      # line.isspace() works on empty lines because line[-1] is still "\n".
      # categories[-1] works independently of whether categories has 1 or 2
      # elements.
      category = categories[0] if not line.isspace() else categories[-1]
      return (category, escape(line[:-1]))

def close_tags(prev, curr, pre):
  # (prev == PRE) instead of (pre) doesn't work because there could be 2
  # adjacent PRE_TOGGLE lines with no PRE lines inbetween.
  if pre and curr == PRE_TOGGLE:
    sys.stdout.write("</pre>\n")
  elif prev == LIST and curr != LIST:
    sys.stdout.write("</ul>\n")
  elif prev in QUOTE and curr not in QUOTE:
    sys.stdout.write("</blockquote>\n")

def open_tags(prev, curr, pre):
  # (prev != PRE) instead of (not pre) doesn't work because there could be 2
  # adjacent PRE_TOGGLE lines after a PRE line with no PRE lines inbetween.
  if not pre and curr == PRE_TOGGLE:
    sys.stdout.write("<pre>\n")
  elif prev != LIST and curr == LIST:
    sys.stdout.write("<ul>\n")
  elif prev not in QUOTE and curr in QUOTE:
    sys.stdout.write("<blockquote>\n")

def print_line(prev, curr, escaped_line):
  if curr == PRE:
    sys.stdout.write(escaped_line)
  elif curr == H1:
    sys.stdout.write("<h1>{}</h1>".format(escaped_line))
  elif curr == H2:
    sys.stdout.write("<h2>{}</h2>".format(escaped_line))
  elif curr == H3:
    sys.stdout.write("<h3>{}</h3>".format(escaped_line))
  elif curr == LIST:
    sys.stdout.write("<li>{}".format(escaped_line))
  elif curr == LINK:
    sys.stdout.write("<p>" if prev != curr else "<br>")
    # escaped_line.removeprefix(" ") isn't necessary here because split()
    # covers that case.
    parts = escaped_line.split(maxsplit=1)
    # parts[-1] works independently of whether parts has 1 or 2 elements.
    url, description = parts[0], parts[-1]
    sys.stdout.write("<a href=\"{}\">{}</a>".format(url, description))
  elif curr == QUOTE_TEXT:
    sys.stdout.write("<p>" if prev != curr else "<br>")
    sys.stdout.write(escaped_line.removeprefix(" "))
  elif curr == BODY_TEXT:
    sys.stdout.write("<p>" if prev != curr else "<br>")
    sys.stdout.write(escaped_line)
  sys.stdout.write("\n")

def print_error(i, msg):
  return sys.stderr.write("Line {}: {}\n".format(i, msg))

# Return True on error, False otherwise.
def check_line_sequence(i, prev, curr):
  error_bytes = 0
  if prev == BODY_GAP and curr == BODY_GAP:
    error_bytes += print_error(i, "consecutive empty lines")
  elif prev == QUOTE_GAP and curr == QUOTE_GAP:
    error_bytes += print_error(i, "consecutive empty lines in quote")
  elif prev not in QUOTE and curr == QUOTE_GAP:
    error_bytes += print_error(i, "first line in quote is empty")
  elif prev == QUOTE_GAP and curr not in QUOTE:
    error_bytes += print_error(i-1, "last line in quote is empty")
  elif prev == None and curr == BODY_GAP:
    error_bytes += print_error(i, "first line is empty")
  elif prev == BODY_GAP and curr == None:
    error_bytes += print_error(i-1, "last line is empty")
  elif prev == PRE_TOGGLE and curr == PRE_TOGGLE:
    error_bytes += print_error(i, "code block is empty")
  if prev not in (BODY_GAP, None) and (
      (curr == PRE_TOGGLE and prev != PRE) or
      curr in (H1, H2, H3) or
      (curr in (LIST, LINK, BODY_TEXT) and prev != curr) or
      (curr in QUOTE and prev not in QUOTE)
    ):
    error_bytes += print_error(i, "empty line missing")
  return error_bytes > 0

# Return True on error, False otherwise.
def check_character_sequence(i, curr, escaped_line):
  error_bytes = 0
  if curr == PRE_TOGGLE and escaped_line != "":
    error_bytes += print_error(i, "text after ```")
  # This if statement doesn’t cover cases where the line contains only
  # whitespace (which technically should also count as "no content"), but those
  # cases are covered by the if statement after it that checks against trailing
  # whitespace.
  if escaped_line == "" and curr in (H1, H2, H3, LINK, LIST):
    error_bytes += print_error(i, "no content")
  if escaped_line != "" and escaped_line[-1].isspace():
    error_bytes += print_error(i, "trailing whitespace")
  if curr in (LINK, QUOTE_TEXT):
    if escaped_line != "" and escaped_line[0] != " ":
      error_bytes += print_error(i, "missing space after >")
    else:
      escaped_line = escaped_line[1:]
  if escaped_line != "" and escaped_line[0].isspace() and curr != PRE:
    error_bytes += print_error(i, "leading whitespace")
  return error_bytes > 0

i = 0
prev, curr = None, None
pre = False
errors = False

for line in sys.stdin:
  i += 1
  prev = curr
  curr, escaped_line = categorize(line, pre)

  close_tags(prev, curr, pre)
  open_tags(prev, curr, pre)
  if curr == PRE_TOGGLE:
    pre = not pre
  else:
    print_line(prev, curr, escaped_line)
  errors |= check_line_sequence(i, prev, curr)
  errors |= check_character_sequence(i, curr, escaped_line)

close_tags(curr, None, pre)
errors |= check_line_sequence(i+1, curr, None)
sys.exit(1 if errors else 0)