# ==============================================================================
# Extract full composition exclusion data from Unicode data file
#
# Copyright (c) 2012-2013 by the developers. See the LICENSE file for details.


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("%s", "/* Unicode full composition exclusion data")
   printf("%s\n", " created by build1.mk */")
   printf("static const struct uc_fce  uc_fce_table[] =\n{\n")
}

END \
{
   printf("   /* Range starting at -1 is the end marker */\n")
   printf("   { -1L, -1L }\n")
   printf("};\n")
   printf("\n\n/* EOF */\n")
}


# ==============================================================================
# Ignore comment lines

/^#/ \
{
   next
}


# ==============================================================================
# Process lines

{
   # Extract ranges of codepoints from 1st field
   # (2nd field "Full_Composition_Exclusion" indicates relevant data)
   if(match($2, /^[ ]Full_Composition_Exclusion/))
   {
      range = extract_fce($1)
      if("" != range)
      {
         # Format: { Start codepoint if range, End codepoint if range }
         printf("   { %s },\n", range)
      }
   }
   next
}


# ==============================================================================
# Extract full composition exclusions

function extract_fce(s) \
{
   range = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /[0-9A-F]+/))
      {
         first = "0x" substr(s, RSTART, RLENGTH) "L"
         remainder = substr(s, RSTART + RLENGTH)
         if(match(remainder, /[.][.][0-9A-F]+/))
         {
            second = "0x" substr(remainder, RSTART + 2, RLENGTH - 2) "L"
         }
         else
         {
            # If single codepoint insert the value as start and end of range
            second = first
         }
      }
      range = first ", " second
   }
   return(range)
}


# EOF
