if $0 == __FILE__
  require 'cgikit'
  require 'cgikit/lang/ja'  
end

module CGIKit::HTMLParser
  
  def self.attribute_string(attrs)
    str = ''
    attrs.each do |key, value|
      str << " #{key}=\"#{value}\""
    end
    str
  end
  
  # CGIKit::HTMLParser::Node stands for nodes of Template's tree structure.
  # Node is base class. Actually, its subclasses are used in tree structure.
  class Node
    
    attr_accessor :node, :name, :content, :parent, :attributes
    
    def initialize( name = nil )
      @node = []
      @name = name || ''
      @content = ''
    end
    
    # returns the `index`th child node.  
    def []( index )
      @node[index]
    end
    
    # returns the number of children.
    def size
      @node.size
    end
    
    # returns the last child node.
    def last
      @node.last
    end
    
    # returns the root node of Template's tree structure. This must be the CGIKit::HTMLParser::RootNode.
    def root
      cur = self
      while cur.parent
        cur = cur.parent        
      end
      cur
    end
    
    # returns true if `other` is the same. 
    def ==( other )
      (@name == other.name) and (@node == other.node)
    end
    
    # adds node as child. 
    def <<( node )
      node.parent = self
      @node << node
    end
    
    # returns true if `self` has no children.
    def empty?
      @node.nil? or @node.empty?
    end
    
    # sets values of request object to node if node is CGIKit element.
    def take_values_from_request( request, context )
      @node.each do |n|
        n.take_values_from_request( request, context )
      end
    end
    
    # invokes action method of node if node is CGIKit element.
    def invoke_action( request, context )
      result = nil
      @node.each do |node|
        if node_result = node.invoke_action(request, context) then
          result = node_result
        end
      end
      result
    end
    
    # adds HTML to response object
    def append_to_response( response, context )
      @node.each do |node|
        node.append_to_response(response, context)
      end
    end
        
    # converts `self` to HTML 
    def to_s
      string = ''
      @node.each do |node|
        string << node.to_s
      end
      string
    end
   
    def cache_copy
      copy = self.class.new(@name)
      copy.attributes = @attributes
      copy.content = @content
      @node.each do |node|
        copy << node.cache_copy
      end
      copy
    end

    def reset
      if @node then
        @node.each do |node|
          node.reset
        end
      end
    end
    
  end
  
  # This class is root node of Template's tree structure.
  # Of all nodes, this node only has reference to CGIKit::Component object.
  class RootNode < Node
    attr_accessor :component
    
    def marshal_dump
      dump = {}
      (instance_variables - ['@component']).each do |var|
        dump[var] = instance_variable_get(var)
      end
      dump
    end

    def marshal_load( object )
      object.each do |key, value|
        instance_variable_set(key, value)
      end
    end
    
    def parent
      nil
    end
    
  end
  
  # This class has represents text. The role of this class is that
  # when CGIKit expands component's template, TextNode appends its `content` to `response` object.
  class TextNode < Node
    
    def initialize(name = nil)
      @content = ''
    end
    
    def take_values_from_request( request, context )
    end
    
    def invoke_action( request, context )
    end
    
    def append_to_response( response, context )
      response.content << @content
    end
    
    def to_s
      @content
    end
    
    def cache_copy
      copy = self.class.new(@name)
      copy.content = @content
      copy
    end
    
  end
  
  
  class CGIKitNode < Node
    
    attr_accessor :element, :ckid
    
    def initialize( name = nil )
      if ::String === name then
        name = name.intern
      end
      super
    end
    
    def take_values_from_request( request, context )
      element = element_for_context(context)
      handler(context).take_values_from_request(element, request, context)
    end
    
    def invoke_action( request, context )
      element = element_for_context(context)
      if result = handler(context).invoke_action(element, request, context) then
        set_result(result)
      end
      result
    end
    
    def set_result( result )
      @element = result
    end
    
    def append_to_response( response, context )
      element = element_for_context(context)
      unless element then return end
      handler(context).append_to_response(element, response, context)
    end

    def element_for_context( context )
      unless @element then
        @element = create_element(context)
        @element.node = @node
      end
      @element
    end
    
    def create_element( context )
      klass = element_class(context)
      unless klass then
        raise "Element class for #{context.parent.class}:#{content} is not specified."
      end

      if CGIKit::DynamicElement > klass then
        element = create_dynamic_element(klass, context)
      else
        element = create_subcomponent(klass, context)
      end

      init_element_for_form(element)
      element
    end
    
    def create_dynamic_element( klass, context )
      klass.new(@ckid, associations(context), root.component)
    end
    
    def create_subcomponent( klass, context )
      element = context.component.next_subcomponent(context.context_id)
      unless element then
        element = klass.new(context)
        element.parent = root.component
        element.context_id = context.context_id
      end
      element.declaration_name = @ckid
      element.associations = associations(context)
      element
    end
    
    def init_element_for_form( element )
      if (CGIKitNode === @parent) and (CGIKit::DynamicElement === @parent.element) then
        if CGIKit::Component === element             
          element.parent = root.component    
        else
          element.parent = @parent.element
        end
      end
    end
    
    def element_class( context )
      type = declaration_store.element_type
      if Class === type then
        klass = type
      else
        evaluated = root.component.value_for_keypath(type)
        if Class === evaluated then
          klass = evaluated
        else
          klass = context.application.class_named(evaluated)
        end
      end
      klass
    end
    
    def declaration_store
      unless dec = root.component.declaration_store[@ckid] then
        raise "Element '#@ckid' is not declared in '#{root.component.class}'."
      end
      dec
    end
    
    def associations( context )
      declaration_store.association_hash
    end
    
    def handler( context )
      context.application.request_handler(context.request.request_handler_key)
    end
    
    def to_s
      content = super
      if content.empty? then
        "<#@name#{CGIKit::HTMLParser.attribute_string(@attributes)} />"
      else
        "<#@name#{CGIKit::HTMLParser.attribute_string(@attributes)}>#{super}</#@name>"
      end
    end
    
    def cache_copy
      copy = super
      copy.ckid = @ckid
      copy
    end

    def reset
      @element = nil
      @node.each do |node|
        node.reset
      end
    end

  end
  
  
  
  class HTMLParser
    
    class ParseError < CGIKit::CGIKitError; end #:nodoc:
    
    def self.cgikit_attribute
      :ckid      
    end
    
    def self.cgikit_regexp
      /\Ack:/u
    end
    
    def self.parse_comment_reg
      /\A\s*ck\s/u
    end
    
    def self.element_attribute; CGIKit::Declaration::ELEMENT_KEY; end
    
    attr_accessor :html_string, :declarations, :node, :encoding, \
    :last_token, :next_token, :inner_nodes
    
    def initialize( filename = nil )
      @filename = filename
      
      @ckid = self.class.cgikit_attribute.to_s
      @id_reg = self.class.cgikit_regexp
      @parse_comment_reg = self.class.parse_comment_reg
      
      if filename then
        string = nil
        #CGIKit::FileLock.shared_lock(filename) do |f|
        File.open(filename) do |f|
          string = f.read
        end
        parse(string)
      end
    end
    
    def init_listener
      @buf = []
      @declarations = {}
      
      @tag_level = 1
      @node2tag_level = Hash.new(0)
      @in_doctype = false
      @html_string = ''
      
      @doctype_buf = []
    end
    
    def parse(string)
      unless Object.const_defined?('REXML')
        require 'rexml/document'
        require 'rexml/streamlistener'
        if CGIKit::Application.respond_to?(:precede_iconv_as_rexml_encoding_module) and not(CGIKit::Application.precede_iconv_as_rexml_encoding_module)
          require 'cgikit/lang/encoding-patch.rb'
        end
      end
      
      @html_string = string
      init_listener
      
      @node = CGIKit::HTMLParser::RootNode.new
      @cur = @node
      
      @parser = REXML::Parsers::BaseParser.new(string)

      @encoding = Thread.current[:ck_root_component_encoding]
      @root_component_parsed = Thread.current[:ck_root_component_parsed]

      begin
        __parse
      rescue REXML::ParseException
        raise CGIKit::HTMLParser::HTMLParser::ParseError, "REXML raises Error when parsing #{@filename}.\nREXML error message: #{$!.to_s.gsub(/\r?\n/, ' ')}\n"
      end
      
      unless Thread.current[:ck_root_component_parsed]
        Thread.current[:ck_root_component_encoding] = @encoding
        Thread.current[:ck_root_component_parsed] = true
      end

      if @buf.size > 0
        @cur << buffer2textnode 
      end
      
      self.node
    end
    
    #
    # use REXML::Parsers::BaseParser API
    # copied from REXML::Parsers::TreeParse and StreamParser
    
    # This is dirty but may be a bit faster.
    def __parse
      name = attrs = nil
      # entity string
      while true
        event = @parser.pull
        case event[0]
        when :end_document
          # end of XML
          return
        when :start_element
          # not normalize
          @tag_level += 1
          name = event[1]
          attrs = event[2]
          
          if match_pattern = self.cgikit_element?(name, attrs)
            if tn = buffer2textnode
              @cur << tn
            end
            
            new_node = CGIKit::HTMLParser::CGIKitNode.new(name)
            
            ck_attrs = {}
            attrs.each do |k,v|
              ck_attrs[k.intern] = self.value_for_string(v)
            end
            case match_pattern
            when :id
              ck_attrs[@ckid.intern] = attrs['id'].sub(@id_reg, '')
            when :ns_id
              ck_attrs[@ckid.intern] = attrs['ck:id']
              ck_attrs.delete(:'ck:id')
            end
            
            new_node.attributes = ck_attrs
            add_declaration(new_node.name, new_node.attributes)
            new_node.ckid = new_node.attributes[@ckid.intern].intern
            
            @node2tag_level[@cur] = @tag_level - 1
            @cur << new_node
            @cur = new_node
            @node2tag_level[new_node] = 1
            @tag_level = 1
          else
            @buf << "<#{name}#{CGIKit::HTMLParser.attribute_string(attrs)}>"
          end
        when :end_element
          @tag_level -= 1
          name = event[1]
          if @tag_level == 0
            if node = buffer2textnode
              @cur << node
            end
            
            unless RootNode === @cur
              @cur = @cur.parent
            end
            @tag_level = @node2tag_level[@cur]
          else
            if (@buf.size != 0) and (@buf.last.index("<#{name}") == 0)
              s = @buf.pop
              ss = s.sub(/>\z/um, ' />')
              @buf << ss
            else
              @buf << "</#{name}>"
            end
          end
        when :text
          unless @in_doctype
            # not normalize
            @buf << event[1]
          end
        when :end_doctype
          @in_doctype = false
          end_doctype
        when :start_doctype
          @in_doctype = true                    
          start_doctype( *event[1..-1] )
        when :processing_instruction
          instruction( event[1], event[2] )
        when :externalentity
          externalentity( event[1] )
        when :elementdecl
          elementdecl(event[1])
        when :entitydecl
          entitydecl(event)
        when :comment, :attlistdecl, :cdata, :xmldecl, :notationdecl, :entitydecl
          #__send__( event[0].to_s, *event[1..-1] )
          __send__( event[0], *event[1..-1] )
        else
          raise CGIKit::HTMLParser::HTMLParser::ParseError, "#{@filename}: CGIKit doesn't recognize the event(#{event.inspect})"
        end
      end
    end
  
    #def tag_start(name, attrs)
    #end
    #def tag_end(name)     
    #end
    #def text(text)
    #end
    
    def instruction(name, instruction)
      @buf << %Q|<?#{name}#{instruction}?>|
    end
    
    def comment(comment)
      # comment has already been converted to UTF-8.
      @buf <<  '<!--'
      
      if @parse_comment_reg =~  comment
        @cur << buffer2textnode
        
        s = comment.sub(@parse_comment_reg, '')
        # don't need to set encoding. 
        parser = REXML::Parsers::BaseParser.new(s)
        org = @parser
        @parser = parser 
        __parse
        @parser = org
      else
        @buf << comment
      end
      
      @buf << '-->'
    end
    
    def start_doctype(name, pub_sys, long_name, uri)
      if tn = buffer2textnode
        @cur << tn
      end
      
      s = ''
      s  << "<!DOCTYPE #{name} #{pub_sys}"
      if long_name
        s << ' '
        s << long_name
      end
      if uri
        s << ' '
        s << uri
      end
      
      # for the time being, "[" is used.
      s << '['
      
      @buf << s
    end
    
    def end_doctype
      if REXML::Parsers::BaseParser::DOCTYPE_START =~ @buf.last
        # There is no `markupdecl`
        s = @buf.pop
        ss = s.sub(/\[\z/um, '>')
        @buf << ss
      else
        @buf << ']>' 
      end
    end
    
    def externalentity(content)
      @buf << (content + "\n")
    end    
    
    def elementdecl(content)
      @buf << (content + '>')
    end
    
    def attlistdecl(element_name, attributes, raw_content)
      @buf << raw_content
    end
    
    def entitydecl(event)
      s = ''
      REXML::Entity.new(event).write(s)
      @buf << s
    end
    
    def notationdecl(name, middle, rest)
      @buf << "<!NOTATION #{name} '#{middle} #{rest}'>"
    end
    
    def entity(content)
      @buf << %Q|%#{content};|
    end
    
    def cdata(content)
      @buf << %Q|<![CDATA[#{content}]]>|
    end
    
    def xmldecl(version, encoding, standalone)
      s = '<?xml '

      if version
        s << %Q|version="#{version}"|
      end
      
      if encoding
        if @encoding and @encoding != encoding
          raise ParseError, "#{@filename}: charcter encoding does not match that of root component.\nRoot component is #{@encoding}.\n#{@filename} is #{encoding}\n"
        end
        @encoding = encoding
        s  << %Q| encoding="#{encoding}"|
      end
      
      if standalone
        s << %Q| standalone="#{standalone}"|
      end
      
      s <<  %Q|?>|
      @buf << s
    end
    
    #
    # end of BaseParser API
    #
    
    
    def cgikit_element?(tag, attrs)
      if attrs.size == 0
        false
      else	
        if attrs.key?(@ckid)
          :ckid
        elsif attrs.key?('ck:id')
          :ns_id
        elsif @id_reg =~ attrs['id'] 
          :id
        else
          false
        end
      end
    end
    
    def buffer2textnode
      if @buf.size > 0
        tn = CGIKit::HTMLParser::TextNode.new(nil)
        o = REXML::Output.new(tn.content, @encoding)
        
        # REXML::Output encodes `@buf` and add its result to `tn.content`
        o << @buf.join('')
        
        @buf.clear
        tn
      else
        nil
      end
    end
    
    def add_declaration( tag, ck_attrs )
      dec = {}
      name = ck_attrs[@ckid.intern].intern
      
      #if klass = class_for_element(tag, ck_attrs) 
      #	 dec[self.class.element_attribute] = klass
      #end			
      
      keys = ck_attrs.keys - [@ckid.intern]
      keys.each do |key|
        value = ck_attrs[key]
        if key == self.class.element_attribute then
          dec[key] = class_for_name(value)
        else
          dec[key] = value_for_string(value)
        end
      end
      
      @declarations[name] = dec
    end
    
    def class_named( name )
      CGIKit::Utilities.class_named_from(name)
    end
    alias class_for_name class_named
    
    def class_for_element( tag_type, attributes )
      case tag_type
      when :a        then Link
      when :form     then Form
      when :img      then Image
      when :textarea then Text
      when :select then
        if attributes[:multiple] or attributes.key?(:size) then
          Browser
        else
          Popup
        end
      when :input then
        unless attributes[:type] then
          return TextField
        end
        case attributes[:type].downcase
        when 'text'     then TextField
        when 'password' then TextField
        when 'hidden'   then TextField
        when 'checkbox' then Checkbox
        when 'radio'    then Radio
        when 'submit'   then Submit
        when 'reset'    then Reset
        when 'file'     then Upload
        else
          TextField
        end
      end
    end
    
    def value_for_string( value )
      case value
      when /\A:/ then
        value.sub(/\A:/, '').intern
      when /\A\s*ruby\s*:/ # it should be add `i` option
        value.sub(/\A\s*ruby\s*:/, '').intern
      when 'true' then
        true
      when 'false' then
        false
      when 'nil' then
        nil
      else
        value
      end
    end		
    
  end
  
end


if $0 == __FILE__
  require 'pp'
  
  parser = CGIKit::HTMLParser::HTMLParser.new(ARGV[0])
  #pp parser.node
  pp parser.declarations
end
