# frozen_string_literal: true

module Banzai
  module Filter
    # TaskListFilter annotates task items and task table items with aria-labels, and
    # adds strikethroughs to the text body of inapplicable task items (created with `[~]`).
    #
    # This should be run on the HTML generated by the Markdown filter, which handles the actual
    # parsing, after the SanitizationFilter.
    class TaskListFilter < HTML::Pipeline::Filter
      prepend Concerns::PipelineTimingCheck

      CSS   = 'input.task-list-item-checkbox'
      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze

      def call
        doc.xpath(XPATH).each do |input|
          if self.class.task_table_item?(input)
            header_text = self.class.text_for_task_table_item_column_header_from_input(input)
            if header_text.present?
              input['aria-label'] = format(_('Check option in column "%{column}"'), column: header_text)
            end
          else
            text_content = self.class.text_for_task_item_from_input(input)
            truncated_text_content = text_content.truncate(100, separator: ' ', omission: '…')
            if truncated_text_content.present?
              input['aria-label'] = format(_('Check option: %{option}'), option: truncated_text_content)
            end
          end

          input['aria-label'] = _('Check option') unless input['aria-label']

          next unless input.has_attribute?('data-inapplicable')

          # We manually apply a <s> to strikethrough text in inapplicable task items,
          # specifically in tight lists where text within the list items isn't contained in a paragraph.
          # (Those are handled entirely by styles.)
          #
          # To handle tight lists, we wrap every text node after the checkbox in <s>, not descending
          # into <p> or <div> (as they're indicative of non-tight lists) or <ul> or <ol> (as we
          # explicitly want to avoid strikethrough styles on sublists, which may have applicable
          # task items!).

          # This is awkward, but we need to include a text node with a space after the input.
          # Otherwise, the strikethrough will start *immediately* next to the <input>, because
          # the first next sibling of the input is always a text node that starts with a space!
          space = input.add_next_sibling(input.document.create_text_node(' '))

          inapplicable_s = input.document.create_element('s')
          inapplicable_s['class'] = 'inapplicable'

          self.class.yield_text_nodes_without_descending_into(space.next_sibling, %w[p div ul ol]) do |el|
            el.wrap(inapplicable_s)
          end
        end

        doc
      end

      class << self
        # Determines whether the <input> checkbox given belongs to task table, instead of a task list.
        def task_table_item?(input)
          input.parent.classes.include?('task-table-item')
        end

        # Gets the text for the task item, given the <input> checkbox that declares it.
        # Task list items return the text following the checkbox, up until any sublist.
        # Task table items return the text of every non-task table item in the row.
        # Used here in TaskListFilter to provide aria-label for task list items,
        # and by Taskable.get_tasks to provide system note information for both kinds.
        def text_for_task_item_from_input(input)
          text_content = +''
          if task_table_item?(input)
            tr = input.ancestors('tr').first
            if tr
              tr.elements.each do |td_or_th|
                next if td_or_th.classes.include?('task-table-item')

                text_content << ' | ' unless text_content.blank?
                text_content << td_or_th.text
              end
            end
          else
            yield_next_siblings_until(input, %w[ol ul]) do |el|
              text_content << el.text
            end
          end

          text_content.strip
        end

        # Get the column header text for the given task table item, which is used as the
        # aria-label for the input; it's a more useful hint to screen reader users than
        # adjacent columns, which are comparatively more easily navigated to, and don't
        # provide context on the meaning of the checkbox in relation to that information.
        def text_for_task_table_item_column_header_from_input(input)
          # Containing cell is guaranteed to exist; we only get here if task_table_item?(input)
          # is true, which means there's a parent with the 'task-table-item' class. The sanitizer
          # only permits that class on a <td> or <th>.
          cell = input.parent

          # Cell parent is guaranteed to exist: it's meant to be the containing <tr>, but
          # it could be anything. It can't be nil; at "worst" it's the document (fragment).
          row = cell.parent

          # Guaranteed to exist, since we reached row by cell.parent.
          # It's important we use 'elements' here and below; we don't want to consider text
          # nodes, which could differ in number/existence between rows.
          cell_index = row.elements.index(cell)

          table = input.ancestors('table').first
          # <table> is not guaranteed to exist.
          return unless table

          first_row = table.css('tr').first
          # There may be no <tr>s in the <table> at all.
          return unless first_row

          header_cell = first_row.elements[cell_index]
          # The first <tr> may not have as many elements as the row we were found in.
          return unless header_cell

          header_cell.text.strip
        end

        # Gets the HTML corresponding to the task item text, given the <input> checkbox that declares it.
        # This should be used for task item matching **only**; see Taskable.get_tasks. We never return
        # this to the user.
        # Task list items return the HTML of all nodes following the checkbox, up until any sublist.
        # Task table items return the HTML of all nodes in the row, excluding task table items.
        def text_html_for_task_item_from_input(input)
          html_content = +''
          if task_table_item?(input)
            tr = input.ancestors('tr').first
            if tr
              tr.elements.each do |td_or_th|
                next if td_or_th.classes.include?('task-table-item')

                html_content << to_html_without_sourcepos(td_or_th)
              end
            end
          else
            yield_next_siblings_until(input, %w[ol ul]) do |el|
              html_content << to_html_without_sourcepos(el)
            end
          end

          html_content
        end

        # Yields the #next_sibling of start, and then the #next_sibling of that, until either
        # there are no more next siblings or a matching element is encountered.
        #
        # The following #next_sibling is evaluated *before* each element is yielded, so they
        # can safely be reparented or removed without affecting iteration.
        def yield_next_siblings_until(start, els)
          it = start.next_sibling
          while it && els.exclude?(it.name)
            following = it.next_sibling
            yield it
            it = following
          end
        end

        # Starting from start, iteratively yield text nodes contained within its children,
        # and its (repeated) #next_siblings and their children, not descending into any of
        # the elements given by els.
        #
        # The following #next_sibling is evaluated before yielding, as above.
        def yield_text_nodes_without_descending_into(start, els)
          stack = [start]
          while stack.any?
            it = stack.pop

            stack << it.next_sibling if it.next_sibling

            if it.text?
              yield it unless it.content.blank?
            elsif els.exclude?(it.name)
              stack << it.children.first if it.children.first
            end
          end
        end

        # Return the given element's outer HTML (i.e. including el's tag itself), but with
        # all data-sourcepos attributes removed from the node and all children.
        def to_html_without_sourcepos(el)
          el = el.clone
          el.xpath('.//@data-sourcepos').remove
          el.to_html
        end
      end
    end
  end
end
