# -*- coding: utf-8 -*-

# Change log
#
#
#
#

"""
cwtex 5.1 system
(cwtex.py, tex2xtc.py, cwbiblatex.py, cwmkidx.py, cwmkgls.py, cwcjksort, cwhtml)

Copyright (C) 2018 Tsong-Min Wu and Tsong-Huey Wu
  with support of ...

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, see <http://www.gnu.org/licenses/>
"""


import os, sys, logging, unicodedata, argparse, re
import shutil
from cwfont_decode import *


parser = argparse.ArgumentParser(description="cwmkgls processor version 5.1")
parser.add_argument("infile")

"""
parser.add_argument("-s", "--style", help="speccify style filename")

parser.add_argument("-cwe", "--cjklast", action="store_true", help="Print English index before Chinese")
parser.add_argument("-cwp", "--phonetic", action="store_true", help="Sort cjk index by phonetic characters")
"""

args = parser.parse_args()

if args.infile:
    inputfile = args.infile

# Default: inputfile has file extension .gls,

if inputfile.split(".")[0] == inputfile:  # No file extension
    temp = inputfile
else :
    temp = inputfile.split(".")[0]

inputfile = temp + ".gls"
#glofile = temp + ".glo"
ctxfile = temp + ".ctx"

logging.basicConfig(format='%(lineno)d %(message)s')

glssym = re.compile(r'\\glsgroupheading{glssymbols}')
glseng = re.compile(r'\\glsgroupheading{[a-zA-Z0-9]}')
glsentr = re.compile(r'\\glossentry{')
glspage = re.compile(r'{page}\\glsnumberformat{')
loadgls = re.compile(r'\\loadglsentries{')
newglsentry = re.compile(r'\\newglossaryentry{([a-zA-Z]+)}')
lnewglsentry = re.compile(r'\\longnewglossaryentry{([a-zA-Z]+)}')

file = open(inputfile, encoding='utf-8')
file_eng = open("glseng.tmp", 'w', encoding='utf-8')

chinese = 0
glslabel = ""
glslist = []

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0

    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if glssym.match(str1[i:]):
            chinese = 1
            break
        elif glseng.match(str1[i:]):
            file_eng.write(str1)
            chinese = 2
            break
        elif glsentr.match(str1[i:]) and chinese == 1:
            i = i + 12
            glslabel = ""
            while str1[i] != '}':
                glslabel = glslabel + str1[i]
                i = i + 1
            break
        elif glspage.match(str1[i:]) and chinese == 1:
            i = i + 22
            while str1[i] != '}':
                glslabel = glslabel + str1[i]
                i = i + 1
            glslist.append(glslabel)
            break
        else:
            if chinese == 2:
                file_eng.write(str1)
                break
        i = i + 1
file.close()
file_eng.close()

## If no English entry, we need to add a line to the end of the file ## 

if os.stat("glseng.tmp").st_size == 0:   
    with open('glseng.tmp', 'w') as file:
        file.write("\end{theglossary}\glossarypostamble")
   
#for i in range(len(glslist)):
#    print(glslist[i])

glsload = ""
nf = 0
file = open(ctxfile, encoding='utf-8')
lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    if str1[0] == '%':
        continue
    i = 0
    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if loadgls.match(str1[i:]):
            i = i + 16
            while str1[i] != '}':
                glsload = glsload + str1[i]
                i = i + 1; nf = 1
            break
        i = i + 1
    if glsload != "":
        break
file.close()

if nf == 1:
    glsload = glsload + ".ctx"

#for i in range(len(glslist)):
#    print(glslist[i])


##  glsload is glo-defns.ctx which defines all the glossary entries 
##  Next, we need to extract all the entries and then sort

#glsname = re.compile(r'name = {')

def glsutf8(str1,m, glsstr):
    chinstr = ""
    k = 0
    while str1[m] != '}':
        if cjk.match(str1[m]):
            utf8,j = uni2utf8(ord(str1[m]),sorton)
            if j > 26:
                jstr = "ZZ"+chr(j-26+64)
            else :
                jstr = chr(j+64)

            if k == 0:
                chinstr = jstr + str(ord(str1[m])) + "{" +str(j) + "{"
            else :
                chinstr = chinstr + str(ord(str1[m]))
            k = k + 1
        m = m + 1
    if k != 0:
        chinstr = chinstr + "{" + glsstr
    return chinstr

def gtgd(str2):
    gt = "text="
    gd = "description="
    linelength=len(str2)
    if str2.find(gt) >= 0 or str2.find(gd) >= 0:
        if str2.find(gt) >= 0 :
            if str2.find(gd) >= 0 :
                if str2[linelength-3:] == '}}\n':  # one line
                    nameon = 0
                else :
                    nameon = 3
            else :
                nameon = 4
        else :
            if str2[linelength-3:] == '}}\n':
                nameon = 0	
            else :
                nameon = 3
    else :
        nameon = 2
    return nameon

	
cjk = re.compile(u'[\u3041-\u30ff\u4e00-\u9fff]+', re.UNICODE)

file = open(ctxfile, encoding='utf-8') # main file
file_gls = open("gls2files.tmp", 'w', encoding='utf-8')
lines = file.readlines()

nameon = 0
gf = "\\newglossaryentry{" 
gfl = "\\longnewglossaryentry{" 
gn = "name="
gt = "text="
gd = "description="

for linei in range(len(lines)):
    str1 = lines[linei]
    str2=str1.replace(" ","")
    n = 0	
    linelength = len(str2)
    if nameon == 1:
        file_gls.write(str1);
        if str2.find(gn) >= 0 :
            nameon = 2
            nameon = gtgd(str2) 
            if nameon == 0:
                file_gls.write(" \n")		
    elif nameon == 2:
        file_gls.write(str1)
        nameon = gtgd(str2)
    elif nameon == 3:
        file_gls.write(str1); nameon = 0
        file_gls.write(" \n")
    elif nameon == 4:       
        file_gls.write(str1)
        if str2.find(gd) >= 0 :
            if str2[linelength-3:] == '}}\n':
                nameon = 0
            else :
                nameon = 3

    else:
        if str2.find(gf) >= 0 or str2.find(gfl) >= 0 :
            nameon = 1
            file_gls.write(str1)
            if str2.find(gn) >= 0 :
                nameon = 2
                nameon = gtgd(str2)
                if nameon == 0:
                    file_gls.write(" \n")
        else :
            pass

file.close()

if os.path.isfile(glsload) and nf == 1: 
    file = open(glsload, encoding='utf-8')  # loadgls file
    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_gls.write(str1)
    file_gls.close()
	
nameon = 0
chinlist = []
sorton = 1  # 1: strokes, 2:zhuyin

file = open("gls2files.tmp", encoding='utf-8')  # main and sub gls file
lines = file.readlines()

for i in range(len(glslist)):
    nameon = 0
    fn = glslist[i].split('{')
    fnblank = fn[0]
    fnblank=fnblank.replace(" ", "")
    f = "\\newglossaryentry{" + fn[0] + "}\n"
    fl = "\\longnewglossaryentry{" + fn[0] + "}\n"
    
	
    glsstr = glslist[i]
    for linei in range(len(lines)):
        str1 = lines[linei]
		
        if str1 == f or str1 == fl:
            nameon = 1
        elif nameon == 2 or nameon == 1  :  # bug 2019/03/09
            nstr=str1.replace(" ", "")
            nstrlength = len(nstr)
            w = 0
            while w != nstrlength:
                if re.match(r'name={', nstr[w:]):
                    chinstr = glsutf8(nstr,w,glsstr)
                    chinlist.append(chinstr)
                    
                    n = linelength ; nameon=3
                    break
                w = w + 1                                                 
        else:           
            n = 0	
            # ith char in current line
            linelength = len(str1)
            while n < linelength:
                if newglsentry.match(str1[n:]) or lnewglsentry.match(str1[n:]):
                    n = n + 18
                    glslebel=""
                    while str1[n] != '}' and str1[n] != '\n':
                        glslebel = glslebel + str1[n]
                        n = n + 1
                    n = n + 1         
                    if glslebel == fnblank and str1[n] == '{':
                        nameon = 3
                    else :
                        break
                if nameon == 3 and str1[n] == '{' :
                    if linelength - n >= 10 :
                        nstr = str1[n+1:]
                        nstr=nstr.replace(" ","")
                        nstrlength = len(nstr)
                        w = 0
                        while w != nstrlength:
                            if re.match(r'name={', nstr[w:]):
                                chinstr = glsutf8(nstr,w,glsstr)
                                chinlist.append(chinstr)                               
                                n = linelength 
                                break
                            w = w + 1     
                    else :
                        nameon=2
                        break	
						
                if nameon == 3 :
                    break
 							
                n = n + 1
        if nameon == 3 :
            nameon=0; break
 					
file.close()

sorted(chinlist)

aprej = -2

file_glo = open("glsall.tmp", 'w', encoding='utf-8')

file_glo.write("\\providecommand{\\cwglsgroupheading}[1]{\\par #1\\par}\n")
### file_glo.write("\\providecommand{\\cwglsgroupheading}[1]{\\medskip\\par #1\\par}\n")
file_glo.write("\\glossarysection[\\glossarytoctitle]{\\glossarytitle}\\glossarypreamble\n")
file_glo.write("\\begin{theglossary}\\glossaryheader\n")

for i in range(len(chinlist)):
    fn = chinlist[i].split('{')
    #print(fn[1] + "," +fn[3] + "," + fn[4])
    j = int(fn[1])
    if j != aprej :
        if aprej != -2:
            file_glo.write("\\glsgroupskip\n")
        aprej = j
        file_glo.write("\\cwglsgroupheading{" + fn[1] + "}\\relax \\glsresetentrylist %\n")
    else :
        file_glo.write("%\n")
    file_glo.write("\\glossentry{" + fn[3] + "}{\\glossaryentrynumbers{\\relax\n")
    file_glo.write("		\\setentrycounter[]{page}\\glsnumberformat{" + fn[4] + "}}}")

file_glo.close()

file = open("glseng.tmp", encoding='utf-8')
file_glo = open("glsall.tmp", 'a', encoding='utf-8')
file_glo.write("\\glsgroupskip\n")

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    file_glo.write(str1)

file_glo.close()
file.close()

"""
file = open(ctxfile, encoding='utf-8')
file_ctx = open("glsctx.tmp", 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    file_ctx.write(str1)

file_ctx.close()


ctxback = "gls_backup_" + ctxfile
shutil.copy(ctxfile, ctxback)

ctxtmp = "glsctx.tmp"
shutil.copy(ctxtmp, ctxfile)
"""

inputfile = temp + ".gls"
glsback = "gls_backup_" + inputfile
shutil.copy(inputfile, glsback)

gls_new = "glsall.tmp"
shutil.copy(gls_new, inputfile)

# os.remove("glsall.tmp")
# os.remove("glseng.tmp")
# os.remove(glsback)
os.remove("gls2files.tmp")
