# -*- coding: utf-8 -*-
#
# Change log
#  20220812:
#     * \ctxidxstroke{.}\nopagebreak ("\nopagebreak" added)
#
#  20220809: 
#     * do not delete _tmp.tmp

"""
cwtex 5.1 system
(cwtex.py, tex2xtc.py, cwbiblatex.py, cwmkidx.py, cwmkgls.py, cwcjksort, cwhtml)

Copyright (C) 2018 Tsong-Min Wu and Tsong-Huey Wu
  with support of ...

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, see <http://www.gnu.org/licenses/>
"""


import os, sys, logging, unicodedata, argparse, re
import shutil
from cwfont_decode import *

parser = argparse.ArgumentParser(description="cwmkidx processor version 5.1")
parser.add_argument("infile")
parser.add_argument("-ind", "--ind", action="store_true", help="for index")
parser.add_argument("-gls", "--glossary", action="store_true", help="for glossary")
parser.add_argument("-s", "--style", help="speccify style filename")
parser.add_argument("-cwe", "--cjklast", action="store_true", help="Print English index before Chinese")
parser.add_argument("-cwp", "--phonetic", action="store_true", help="Sort cjk index by phonetic characters")

args = parser.parse_args()

if args.infile:
    inputfile = args.infile

# Default: inputfile has file extension .xtc, so next line is not necessary
# if inputfile.split(".")[0] == inputfile:  # No file extension
#     temp = inputfile
# else :
#     temp = inputfile.split(".")[0]
# 
# inputfile = temp + ".xtc"
# ilgname = temp + ".ilg"
# realname = temp + ".ind"
# cjkindname = temp + "-cjk.ind"
# ilgname = inputfile.split(".")[0] + ".ilg"
# realname = inputfile.split(".")[0] + ".ind"

### cjkindname = inputfile.split(".")[0] + "_cjk.ind"            ### should be in cwpdf.py
### input_cjk_ind = inputfile.split(".")[0] + "_cjk.ind"         ### cjkindname      :final but in cjk

if args.ind:
    input_xtc = inputfile.split(".")[0] + ".xtc"                     ### inputfile                                                             
    input_ilg = inputfile.split(".")[0] + ".ilg"                     ### ilgname                                                               
    input_tmp = inputfile.split(".")[0] + "_tmp.tmp"                 ### "cwindexg.tmp"  :sorted but cjk preceded with "00..."                 
    input_tmp_ind = inputfile.split(".")[0] + "_tmp.ind"             ### "cwindexg.ind"  :sorted but cjk preceded with "00..."                 
    input_tmp_bin = inputfile.split(".")[0] + "_tmp_bin.tmp"         ### "cwindexg4.tmp" :sorted but cjk preceded with binary                  
    input_sorted_ind = inputfile.split(".")[0] + "_sorted.ind"       ### "cwindexs.ind"  :sorted but need adding cjk header and strokes number 
    input_ind = inputfile.split(".")[0] + ".ind"                     ### realname        :final and cjk converted to TeX code   
elif args.gls:    
    input_xtc = inputfile.split(".")[0] + ".xtc"                     ### inputfile                                                             
    input_glg = inputfile.split(".")[0] + ".glg"                     ### ilgname                                                               
    input_tmp = inputfile.split(".")[0] + "_tmp.tmp"                 ### "cwindexg.tmp"  :sorted but cjk preceded with "00..."                 
    input_tmp_gls = inputfile.split(".")[0] + "_tmp.gls"             ### "cwindexg.ind"  :sorted but cjk preceded with "00..."                 
    input_tmp_bin = inputfile.split(".")[0] + "_tmp_bin.tmp"         ### "cwindexg4.tmp" :sorted but cjk preceded with binary                  
    input_sorted_gls = inputfile.split(".")[0] + "_sorted.gls"       ### "cwindexs.ind"  :sorted but need adding cjk header and strokes number 
    input_gls = inputfile.split(".")[0] + ".gls"                     ### realname        :final and cjk converted to TeX code                   
    input_ist = inputfile.split(".")[0] + ".ist"                     ### 
    input_glo = inputfile.split(".")[0] + ".glo"                     ###     

if args.style:
    if args.style == None:
        print("You forgot to give style filename.")
    else:
        stylefile = args.style
logging.basicConfig(format='%(lineno)d %(message)s')

print("\ncwmkidx processing file ... ")

file = open(input_xtc, encoding='utf-8')
file_object = open(input_tmp, 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0

    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if ord(str1[i]) == 64102:
            file_object.write("00")
        else:
            file_object.write(str1[i])
        i = i + 1
file.close()
file_object.close()

# if (makendxno==1)   spawnlp(P_WAIT,"makeindex.exe","makeindex.exe",makendxoption,"cwindexg.tmp",NULL);  /* 2.2g*/
# else  spawnlp(P_WAIT,"makeindex.exe","makeindex.exe","cwindexg.tmp",NULL);
#
# -c means blank compression. see LaTeX Companion, 2nd ed., p.655
# -l means "seal" before "seal lion"
#
# for index, makeindex generate .ind and .ilg
# for glossaries, makeindex generate .gls and .glg (?)

if args.ind:
    from subprocess import call                               
    call(["makeindex", "-l", "-c", "-s", "cwtex", input_tmp])  
elif args.gls:     
    call(["makeindex", "-l", "-s", input_ist, "-o", input_gls, input_glo])

find00 = re.compile(r' 00')  

if args.ind:
    file = open(input_tmp_ind, encoding='utf-8')            
    file_object = open(input_tmp_bin, 'w', encoding='utf-8')
elif args.gls:    
    file = open(input_tmp_gls, encoding='utf-8')            
    file_object = open(input_tmp_bin, 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0

    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if find00.match(str1[i:]):
            i = i + 2
            file_object.write(" "+chr(64102))
        else:
            file_object.write(str1[i])

        i = i + 1

file.close()
file_object.close()

"""
/* see cwmkidx4b20.c */
/* remove line {\edx{Symbol} */
/* convert {\edx{ to \edx{ */
"""

# print("\nprocessing cwindexg4.tmp to file.ind ...")

az = re.compile(r'\w')
cjk = re.compile(u'[\u3041-\u30ff\u4e00-\u9fff]+', re.UNICODE)
cjk_c = re.compile(u'[\u4e00-\u9fff]+', re.UNICODE)

if args.ind:
    file = open(input_tmp_bin, encoding='utf-8')               
    file_object = open(input_sorted_ind, 'w', encoding='utf-8')
elif args.gls:
    file = open(input_tmp_bin, encoding='utf-8')               
    file_object = open(input_sorted_gls, 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0
    found64102 = 0
    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if ord(str1[i]) == 64102:
            found64102 = 1
            i = i + 1
            d = 0
            while str1[i] != '\n':
                if cjk.match(str1[i]) :
                    file_object.write(str1[i])
                    found64102 = 0
                elif found64102 == 2:                       # bug 2018/08/12
                    file_object.write(str1[i])
                elif az.match(str1[i]) or  str1[i]=='?' :   # bug 2016/06/20
                    if found64102 == 0:
                        file_object.write(str1[i])
                    elif str1[i] == '0' and d == 0:         # bug 2018/08/12
                        found64102 = 2
                elif ord(str1[i]) == 64102:
                    found64102 = 1
                else :
                    file_object.write(str1[i])
                    found64102 = 0
                i = i + 1
                d = d + 1
            file_object.write(str1[i])
        else:
            file_object.write(str1[i])
        i = i + 1
file.close()
file_object.close()


##
## So far, we have generated a sorted .ind for index.
## If glossaries, the output is .gls.
## Next, we need to add header for cjk section, and also for strokes.
##

file = open(input_sorted_ind, encoding='utf-8')
chifile_object = open("cwindexs_chinese.x0x1x", 'w', encoding='utf-8')
engfile_object = open("cwindexs_english.x0x1x", 'w', encoding='utf-8')
sitem = re.compile(r'  \\item ')
bindex = re.compile(r'\\begin{theindex}')
eindex = re.compile(r'\\end{theindex}')
indexspace = re.compile(r'  \\indexspace')
## {\edx{Symbols}\nopagebreak
edxsym = re.compile(r'\\edx{Symbols}\\nopagebreak')
edxnotsym = re.compile(r'\\edx{[A-Z]}\\nopagebreak')

sorton = 0            # 2016/4/9
if not args.phonetic:
    sorton = 1
else :
    sorton = 2

aprej = -2
fchinese = 1
engon =0 
k=0
lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0
    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if sitem.match(str1[i:]) and engon == 0:
            i = i + 8
            if cjk_c.match(str1[i]) or  engon == 0:
                utf8,j = uni2utf8(ord(str1[i]),sorton)  # 2016/4/9
                fchinese = 1

                if j != aprej or j == -1:
                    if j > 0:        # 2017/06/27
                        aprej = j

                    if j > 0:
                        k = 1
#                        if phonetic_on == 0 :
                        if not args.phonetic:
#                            chifile_object.write("\n  \\ctxidxstroke{" + str(j) + "}\n")
                            chifile_object.write("\n  \\ctxidxstroke{" + str(j) + "}\\nopagebreak\n")
                        else :
                            if j <= 21:
                                j = j
                            else :
                                if j <= 24:
                                    j = j + 13
                                else :
                                    j = j - 3
                            chifile_object.write("  \\par\\bigskip \\noindent{{\\MbfQ\\cH" + str(j) + "}} \\par\\medskip\n")  # 2016/4/9  str(j-1 )

# Problems:
#
# 1. Need to add  MbfQ: fontno=57  to xcinput.tex,
# 2. sorting is incorrect.
# 3. need to add \\MdQ\\cH149(劃) to xcinput.tex?
#
                    else :
                        if k == 0 and cjk_c.match(str1[i]):
                            chifile_object.write("  \\par\\bigskip \\noindent{{\\MaQ\\cH46}\\z{\\MiQ\\cH204}\\z{\\MhQ\\cH176}\\z{\\MdQ\\cH149}} \\par\\medskip\n")
                chifile_object.write(str1)
            else :
                fchinese = 0
                engfile_object.write(str1)
        elif bindex.match(str1[i:]) or eindex.match(str1[i:]) :
            engfile_object.write("") # do nothing for head and tail
        elif indexspace.match(str1[i:]) :
            engfile_object.write("%" + str1)
        elif edxsym.match(str1[i:]) :
            engfile_object.write("%" + str1)
        elif edxnotsym.match(str1[i:]):
            engon = 1
            engfile_object.write(str1)
        else :
            if fchinese == 1 and engon == 0:
                chifile_object.write(str1)
            else :
                engfile_object.write(str1)
        break

file.close()
chifile_object.close()
engfile_object.close()

#file = open(input_sorted_ind, encoding='utf-8')
#chifile_object = open("cwindexs_chinese.x0x1x", 'w', encoding='utf-8')
#engfile_object = open("cwindexs_english.x0x1x", 'w', encoding='utf-8')

# file_object = open(realname, 'w', encoding='utf-8')
file_object = open(input_ind, 'w', encoding='utf-8')
file_object.write("\\begin{theindex}\n")

file = open("cwindexs_chinese.x0x1x", encoding='utf-8')
file_object.write("\\ifdefined\\ctxcidx\\let\\ctxcindextitle\\ctxcindex\\fi\n")
file_object.write("\\ifdefined\\ctxcidx\\let\\ctxcindextitle\\ctxcindex\\fi\n")
file_object.write("\\providecommand{\\ctxcindextitle}{}\n")
file_object.write("\\providecommand{\\ctxeindextitle}{}\n")
file_object.write("\\providecommand{\\ctxidxstroke}{}\n")
file_object.write("\\providecommand{\\edx}{}\n")

# if slashc == 1:

if not args.cjklast:                        #  Chinese first, then English

    file_object.write("\n\\ctxcindextitle\n")

    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

    #
    # Portion of English
    #
    file = open("cwindexs_english.x0x1x", encoding='utf-8')

#    if args.addctxe:  ## add not matter what

    file_object.write("\n\\ctxeindextitle\n\n")
#    file_object.write("\n  \\indexspace\n\n")

    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

else:                                         # English first, then Chinese
    file = open("cwindexs_english.x0x1x", encoding='utf-8')
#    if args.addctxe:
    file_object.write("\n\\ctxeindextitle\n\n")
    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

    file = open("cwindexs_chinese.x0x1x", encoding='utf-8')
 #   if args.addctxe:
    file_object.write("\n\\ctxcindextitle\n\n")
    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

file_object.write("\\end{theindex}\n")
file_object.close()

# 2016/4/9
file_cinput = open('xcinput.tex', 'a', newline="\n")
if args.phonetic:
    if '{\\MbfQ}{\\fontfamily{cwM}\\fontseries{57}' not in open('xcinput.tex').read():
        file_cinput.write("\\providecommand{\\MbfQ}{\\fontfamily{cwM}\\fontseries{57}\\selectfont}\n")
else :
    if '{\\MdQ}{\\fontfamily{cwM}\\fontseries{3}' not in open('xcinput.tex').read():
        file_cinput.write("\\providecommand{\\MdQ}{\\fontfamily{cwM}\\fontseries{3}\\selectfont}\n")

file_cinput.close()

try:
    os.remove("cwindexs_chinese.x0x1x")
except OSError:
    pass

try:
    os.remove("cwindexs_english.x0x1x")
except OSError:
    pass

try:
    os.remove(input_sorted_ind)
except OSError:
    pass

# try:
#     os.remove(input_tmp_ind)
# except OSError:
#     pass

try:
    os.remove(input_tmp)
except OSError:
    pass

try:
    os.remove(input_tmp_bin)
except OSError:
    pass
    
try:     
    shutil.copyfile(input_xtc, input_tmp)
except OSError:
    pass


# try:     
#     shutil.move("cwindexg.ilg", ilgname)    
# except OSError:
#     pass

print("cwmkidx processing done.")

# 2017.3.24
# print('[' + str(linei+1) + ']')

try:
    linei
except NameError:
    print('Nothing to process.')
else:
    print('[' + str(linei+1) + ']')


