#
# Copyright (c) 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#

module Luce
  # Parses paragraphs of regular text.
  class ParagraphSyntax < BlockSyntax
    @@reflink_definition_start : Regex = Regex.new(%q([ ]{0,3}\[))

    @@whitespace_pattern : Regex = Regex.new(%q(^\s*$))

    def pattern : Regex
      Luce.dummy_pattern
    end

    def can_end_block?(parser : BlockParser) : Bool
      false
    end

    def can_parse?(parser : BlockParser) : Bool
      true
    end

    def parse(parser : BlockParser) : Node
      child_lines = [] of String

      # Consume until we hit something that ends a paragraph
      until BlockSyntax.at_block_end? parser
        child_lines << parser.current
        parser.advance
      end

      paragraph_lines = extract_reflink_definitions(parser, child_lines)
      if paragraph_lines.nil?
        # Paragraph consisted solely of reference link definitions.
        Text.new("")
      else
        contents = UnparsedContent.new(paragraph_lines.join("\n").rstrip)
        Element.new("p", [contents] of Node)
      end
    end

    private enum InnerLoopRes
      None
      Next
    end

    # Extracts reference link definitions from the front of the
    # paragraph, and return the remaining paragraph lines.
    private def extract_reflink_definitions(
      parser : BlockParser,
      lines : Array(String)
    ) : Array(String)?
      line_starts_reflink_definition = ->(i : Int32) {
        lines[i].starts_with? @@reflink_definition_start
      }

      i = 0
      loop do
        # Check for reflink definitions.
        if !line_starts_reflink_definition.call(i)
          # It's paragraph content from here on out.
          break
        end
        contents = lines[i]
        j = i + 1
        inner_loop_res : InnerLoopRes = InnerLoopRes::None
        while j < lines.size
          # Check to see if the _next_ line might start a new reflink
          # definition. Even if it turns out not to be, but it started
          # with a '[', then it is not part of _this_ possible reflink
          # definition.
          if line_starts_reflink_definition.call(j)
            # Try to parse `contents` as a reflink definition.
            if parse_reflink_definition(parser, contents)
              # loop again, starting at the next possible reflink
              # definition
              i = j
              inner_loop_res = InnerLoopRes::Next
              break
            else
              # Could not parse `contents` as a reflink definition.
              break
            end
          else
            contents = "#{contents}\n#{lines[j]}"
            j += 1
          end
        end

        next if inner_loop_res == InnerLoopRes::Next

        # End of the block
        if parse_reflink_definition(parser, contents)
          i = j
          break
        end

        # It may be that there is a reflink definition starting at [i],
        # but it does not extend all the way to [j], such as:
        #
        #     [link]: url      # line i
        #     "title"
        #     garbage
        #     [link2]: url     # link j
        #
        # In this case, [i..i+i] is a reflink definition, and the rest
        # is paragraph content.
        while j >= i
          # This isn't the most efficient loop, what with this big 'ole
          # Iterable allocation ([Range]) followed by a big 'ole String
          # allocation, but we must walk backwards, checking each range
          contents = lines[i...j].join("\n")
          if parse_reflink_definition(parser, contents)
            # That is the last reflink definition. The rest is
            # paragraph content.
            i = j
            break
          end
          j -= 1
        end

        # The ending was not a reflink definition at all. Just
        # paragraph content.
        break
      end

      if i == lines.size
        # No paragraph content
        nil
      else
        # ends with paragraph content
        lines[i..]
      end
    end

    # Parse *contents* as a reference link definition.
    #
    # Also adds the reference link definitions to the document.
    #
    # Returns whether *contents* could be parsed as a reference link
    # definition.
    private def parse_reflink_definition(parser : BlockParser, contents : String) : Bool
      pattern = Regex.new(
        # Leading indentation
        %q(^[ ]{0,3}) +
        # Reference id in brackets, and URL.
        %q(\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*) +
        # Title in double or single quotes, or parens.
        %q{("[^"]+"|'[^']+'|\([^)]+\)|)\s*$},
        Regex::Options::MULTILINE
      )
      match = pattern.match(contents)
      if match.nil?
        # Not a reference link definition
        return false
      end
      if match[0].size < contents.size
        # Trailing text. No good.
        return false
      end

      label = match[1].not_nil!
      destination = match[2]? || match[3].not_nil!
      title = match[4]

      # The label must contain at least one non-whitespace character.
      if @@whitespace_pattern.matches? label
        return false
      end

      if title.empty?
        # No title
        title = nil
      else
        # Remove "", '', or ().
        title = title.not_nil![1...-1]
      end

      # References are case-insensitive, and internal whitespace is
      # compressed.
      label = Luce.normalize_link_label(label)

      unless parser.document.link_references.has_key? label
        parser.document.link_references[label] = LinkReference.new(label,
          destination, title)
      end
      true
    end
  end
end
