import re

# this script was created to prepare the English string files for translations
# this is a temporary short term thing.  Post endor the translations should be automatic


def main():
  processFile('visRecommenderStrings_en.properties')
  processFile('visRecommenderExtStrings_en.properties')

def processFile(file):
  vars = []
  lines = []
  ids = {}
  with open(file, 'r') as f:
    for line in f:
      # collect ids and look for duplicates
      m = re.match('([A-Z_]*)=(.*)', line)
      if m != None:
        id = m.groups()[0]
        val = m.groups()[1]
        if id not in ids:
          ids[id] =  val
        else:
          print('*** Duplicate: ' + id + '=' + ids[id])
          print('*** Duplicate: ' + id + '=' + val)
      # harvest variable names
      find = re.findall(r'\{([a-zA-Z_]*)\}', line)
      for v in find:
        if v not in vars:
          vars.append(v)
      i = 1
      # replace {NAME} with {1,NAME}
      index = line.find('{', 0)
      while index != - 1:
        insert =  '{},'.format(i)
        line = line[:index + 1] + insert + line[index + len(insert) - 1:]
        i = i + 1
        index = line.find('{', index + 1)
      # replace leading space with &nbsp;
      line = line.replace('=\ ', '=&nbsp;')
      # replace trailing space with &nbsp;
      line = line.replace(' \n', '&nbsp;\n')
      lines.append(line)


  vars.sort()
  writeOutput(file, lines, vars)

def writeOutput(file, lines, vars):
  with open('new_' + file, 'w') as f:
    f.write('# NLS_ENCODING=UTF-8\n')
    f.write('# NLS_MESSAGEFORMAT_ALL\n')
    f.write('# \n')
    f.write('# Glossary of variables\n')
    f.write('# \n')
    for v in vars:
      if v in explain:
        f.write('# ' + v + ': ' + explain[v] + '\n')
      else:
        f.write('# ' + v + '\n')

    for l in lines:
      f.write(l)
# glossary

explain = {}
explain['COORDINATE_COLOR'] = 'Column in the data used to color points in a tiled map layer defined by latitude and longitude.'
explain['COORDINATE_LATITUDE'] = 'Column in the data used as latitude in a tiled map layer defined by latitude and longitude.'
explain['COORDINATE_LONGITUDE'] = 'Column in the data used as longitude in a tiled map layer defined by latitude and longitude.'
explain['COORDINATE_SIZE'] = 'Column in the data used to size points in a tiled map layer defined by latitude and longitude.'
explain['MEASURES'] = 'For a tiled map use a list of measures as its label.'
explain['POINT_COLOR'] = 'Column in the data used to color points in a tiled map layer showing points as defined by a location name.'
explain['POINT_LOCATION'] = 'Column in the data used to position points in a tiled map layer showing points as defined by a location name.'
explain['POINT_SIZE'] = 'Column in the data used to size points in a tiled map layer showing points as defined by a location name.'
explain['REGION_COLOR'] = 'Column in the data used to color regions in a tiled map layer showing regions as defined by a location name.'
explain['REGION_LOCATION'] = 'Column in the data used to select a region in a tiled map layer showing regions as defined by a location name.'
explain['_ALL'] = 'Use all the columns in the data for this string.'
explain['ACTUAL'] = 'Column in the data that is the actual value of a measure, e.g., Revenue or Quantity.  This is as opposed to PLANNED which would be Planned Revenue or Budgeted Expenses.'
explain['BARS'] = 'Column in the data used to define the bars in a bar chart.'
explain['BIVALUED'] = 'Column in the data that has only two unique values such as male and female or left and right.'
explain['CATEGORY'] = 'Column in the data used in a chart as a category.  Usually this is to define points or bars or other shapes in a chart.'
explain['COLOR'] = 'Column in the data used to color an element in a chart.  This item will typically show up in the legend of the chart.'
explain['COLUMN'] = 'Column in the data used to define the columns in a column chart or crosstab.'
explain['COLUMN_HIERARCHY'] = 'A collection of columns in the data that form a hierarchy which is shown in a chart or crosstab.'
explain['CUMULATIVE'] = 'Column in the data that represents a cumulative value such as balance or inventory.  Typically shown in a waterfall chart.'
explain['CYCLICAL'] = 'Column in the data that represents a cyclical attribute and is shown in a chart.  Often the chart will be a radar chart.  Examples are months or seasons.'
explain['END_DATE'] = 'Column in the data which is a date and represents the end of something such as a project, that is shown in a gantt chart.'
explain['FROM'] = 'Column in the data that identifies the beginning of edges in a network chart.'
explain['HIERARCHY'] = 'Collection of columns in the data that form a hierarchy which are shown in a chart.'
explain['LENGTH'] = 'Column in the data used to determine the length of a bar in a floating bar chart.'
explain['LINK_WIDTH'] = 'Column in the data which is used to determine the width of edges in a network chart.'
explain['MATRIX_COLUMN'] = 'Column in the data which is used to create the column of a matrix of charts.'
explain['MATRIX_ROW'] = 'Column in the data which is used to create the row of a matrix of charts.'
explain['MAXIMUM'] = 'Column in the data which defines the maximum acceptable value for that actual measure in a bullet chart.'
explain['MIDDLE'] = 'Column in the data which defines the acceptable middle value for that actual measure in a bullet chart.'
explain['MINIMUM'] = 'Column in the data which defines the minimum acceptable value for that actual measure in a bullet chart.'
explain['ORDINAL'] = 'Column in the data that has a natural order to its values.  This could be a date or the numbers on the exits of a highway.'
explain['PLANNED'] = 'Column in the data that is the planned value of a measure, e.g., Planned Revenue or Target Income.  This is as opposed to the ACTUAL measure.  This is shown in a bullet chart.'
explain['POINTS'] = 'Column in the data that defines the points in a scatter chart.'
explain['ROW'] = 'Column in the data that is used for the rows in a crosstab.'
explain['ROW_HIERARCHY'] = 'Collection of columns that define a hierarchy and are shown in a crosstab.'
explain['SIZE'] = 'Column in the data that is used to determine the size of chart shapes.  Typically this is used in a scatter chart or treemap.'
explain['START_DATE'] = 'Column in the data which is a date and represents the start of something such as a project, that is shown in a gantt chart.'
explain['TARGET'] = 'Column in the data that is the planned or target value of a measure, e.g., Planned Revenue or Target Income.  This is as opposed to the ACTUAL measure.  This is shown in a bullet chart.'
explain['TO'] = 'Column in the data that identifies the end of edges in a network chart.'
explain['VALUE'] = 'Column in the data that is a value shown in a chart.'
explain['WIDTH'] = 'Column in the data that is used to determine the width of chart shapes such as bars or boxs in a treemap.'
explain['X'] = 'Column in the data to position points on the X axis of a scatter chart.'
explain['Y'] = 'Column in the data to position points on the Y axis of a scatter chart.'
explain['ColumnName'] = 'Name of a column in the data.  Used in the description of the chart.'
explain['Concept'] = 'Name of a concept associated with a column.  Used in the description of the chart.'
explain['Hierarchy'] = 'Name of the hierarchy formed by a group of columns.  Used in the description of the chart.'
explain['Nulls'] = 'Number of nulls in the column.  Used in the description of the chart.'
explain['Unique'] = 'Number of unique values in the column.  Used in the description of the chart.'
explain['max'] = 'Maximum value of the column.  Used in the description of the chart.'
explain['min'] = 'Minimum value of the column.  Used in the description of the chart.'

main()

# find unique list of variables and output them as comments at top of file
# add 1. and 2. etc to each variable is the file (requires code change!)
# add marker for trailing spaces
# check for duplicate ids and report them

# Requested from the translation team:
# X 1. NLS_ENCODING
# X 2. NLS_MESSAGEFORMAT
# X 3. Remove duplicate IDs
# X 4. Convert variables to 1,NAME
# X 5. Variable explanation
# x 6. Trailing and leading spaces
# x 7. Partial or single word strings
#   8. Language codes and naming conventions
#   9. Fall back mechanism