# -*- coding: utf-8 -*-

"""
cwtex 4.1
(cwtex41.py, tex2xtc41.py, cwbiblatex41.py, cwmkidx41.py)

Copyright (C) 2016 Tsong-Min Wu and Tsong-Huey Wu
  with support of ...

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, see <http://www.gnu.org/licenses/>
"""


import os, sys, logging, unicodedata, argparse, re
import shutil
from cwfont_decode import *

"""  # 2016/4/9
def replace_str(source, str_ori, str_new):
    f = open(source, 'r', encoding='utf-8')
    filedata = f.read()
    f.close()
    newdata = filedata.replace(str_ori, str_new)
    f = open(source, 'w', encoding='utf-8')
    f.write(newdata)
    f.close()
"""
parser = argparse.ArgumentParser(description="cwmkidx processor version 4.1")
parser.add_argument("infile")

# parser.add_argument("-d", "--addctxc", action="store_true", help="add \\ctxcindextitle  in index")
# parser.add_argument("-e", "--addctxe", action="store_true", help="add \\ctxeindextitle  in index")
# parser.add_argument("-m", "--optmkndx", action="store_true", help="options for makeindex, e.g. -mc -mssty -moind")
# parser.add_argument("-zcs", "--stroke", action="store_true", help="option for font of stroke")
# parser.add_argument("-y", "--zhuyin", action="store_true", help="Sort cjk by phonetic characters")

parser.add_argument("-s", "--style", help="speccify style filename")

parser.add_argument("-cwe", "--cjklast", action="store_true", help="Print English index before Chinese")
parser.add_argument("-cwp", "--phonetic", action="store_true", help="Sort cjk index by phonetic characters")



args = parser.parse_args()

if args.infile:
    inputfile = args.infile

# Default: inputfile has file extension .xtc,

if inputfile.split(".")[0] == inputfile:  # No file extension
    temp = inputfile

else :
    temp = inputfile.split(".")[0]

inputfile = temp + ".xtc"
ilgname = temp + ".ilg"
realname = temp + ".ind"

if args.style:
    if args.style == None:
        print("You forgot to give style filename.")
    else:
        stylefile = args.style

logging.basicConfig(format='%(lineno)d %(message)s')

# slashc = 0
# slashd = 0
# slashe = 0
# makendxno = 0
# ctrokeon = 0
#if args.optmkndx:
#   makendxno=1
#   makendxoption = ""
# if args.stroke:
# ctrokeon = 1


# phonetic_on = 0
# slashc = 0         # cjk first
# if args.cjklast:
#     slashc = 0     # cjk last
# else:
#     slashc = 1     # cjk first
# if args.phonetic:
#     phonetic_on = 1


print("processing xtcfile to cwindexg.tmp ... ")

file = open(inputfile, encoding='utf-8')
outputfile = inputfile.split(".")[0] + ".xtc"
file_object = open("cwindexg.tmp", 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0

    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if ord(str1[i]) == 64102:
            file_object.write("00")
        else:
            file_object.write(str1[i])

        i = i + 1

file.close()
file_object.close()

#if (makendxno==1)   spawnlp(P_WAIT,"makeindex.exe","makeindex.exe",makendxoption,"cwindexg.tmp",NULL);  /* 2.2g*/
#else  spawnlp(P_WAIT,"makeindex.exe","makeindex.exe","cwindexg.tmp",NULL);
#
# -c means blank compression. see LaTeX Companion, 2nd ed., p.655
# -l means "seal" before "seal lion"

from subprocess import call
# call(["makeindex", "-c", "-s", "cwtex", "cwindexg.tmp"])
call(["makeindex", "-l", "-c", "-s", "cwtex", "cwindexg.tmp"])

find00 = re.compile(r' 00')

print("processing  cwindexg.ind to cwindexg4.tmp ... ")

file = open("cwindexg.ind", encoding='utf-8')
file_object = open("cwindexg4.tmp", 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0

    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if find00.match(str1[i:]):
            i = i + 2
            file_object.write(" "+chr(64102))
        else:
            file_object.write(str1[i])

        i = i + 1

file.close()
file_object.close()

"""
/* see cwmkidx4b20.c */
/* remove line {\edx{Symbol} */
/* convert {\edx{ to \edx{ */
"""

print("\nprocessing cwindexg4.tmp to file.ind ...")

az = re.compile(r'\w')
cjk = re.compile(u'[\u3041-\u30ff\u4e00-\u9fff]+', re.UNICODE)

file = open("cwindexg4.tmp", encoding='utf-8')
file_object = open("cwindexs.ind", 'w', encoding='utf-8')

lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0
    found64102 = 0
    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if ord(str1[i]) == 64102:
            found64102 = 1
            i = i + 1
            while str1[i] != '\n':
                if cjk.match(str1[i]) :
                    file_object.write(str1[i])
                    found64102 = 0
                elif az.match(str1[i]):
                    if found64102 == 0:
                        file_object.write(str1[i])
                elif ord(str1[i]) == 64102:
                    found64102 = 1
                else :
                    file_object.write(str1[i])
                    found64102 = 0

                i = i +1
            file_object.write(str1[i])
        else:
            file_object.write(str1[i])

        i = i + 1

file.close()
file_object.close()

file = open("cwindexs.ind", encoding='utf-8')
chifile_object = open("cwindexs_chinese.x0x1x", 'w', encoding='utf-8')
engfile_object = open("cwindexs_english.x0x1x", 'w', encoding='utf-8')
sitem = re.compile(r'  \\item ')
bindex = re.compile(r'\\begin{theindex}')
eindex = re.compile(r'\\end{theindex}')
indexspace = re.compile(r'  \\indexspace')
## {\edx{Symbols}\nopagebreak
edxsym = re.compile(r'\\edx{Symbols}\\nopagebreak')
edxnotsym = re.compile(r'\\edx{')

sorton = 0            # 2016/4/9
if not args.phonetic:
    sorton = 1
else :
    sorton = 2

aprej = -2
fchinese = 1
lines = file.readlines()
for linei in range(len(lines)):
    str1 = lines[linei]
    i = 0
    # ith char in current line
    linelength = len(str1)
    while i != linelength:
        if sitem.match(str1[i:]) :
            i = i + 8
            if cjk.match(str1[i]) :
                utf8,j = uni2utf8(ord(str1[i]),sorton)  # 2016/4/9
                fchinese = 1

                if j != aprej :
                    aprej = j

                    if j > 0:
#                        if phonetic_on == 0 :
                        if not args.phonetic:
                            chifile_object.write("\n  \\ctxidxstroke{" + str(j) + "}\n")
                        else :
                            if j <= 21:
                                j = j
                            else :
                                if j <= 24:
                                    j = j + 13
                                else :
                                    j = j - 3
                            chifile_object.write("  \\par\\bigskip \\noindent{{\\MbfQ\\cH" + str(j) + "}} \\par\\medskip\n")  # 2016/4/9  str(j-1 )

# Problems:
#
# 1. Need to add  MbfQ: fontno=57  to cinput.tex,
# 2. sorting is incorrect.
# 3. need to add \\MdQ\\cH149(劃) to cinput.tex?
#
                    else :
                        chifile_object.write("  \\par\\bigskip \\noindent{{\\MaQ\\cH46}\\z{\\MiQ\\cH204}\\z{\\MhQ\\cH176}\\z{\\MdQ\\cH149}} \\par\\medskip\n")
                chifile_object.write(str1)
            else :
                fchinese = 0
                engfile_object.write(str1)
        elif bindex.match(str1[i:]) or eindex.match(str1[i:]) :
            engfile_object.write("") # do nothing for head and tail
        elif indexspace.match(str1[i:]) :
            engfile_object.write("%" + str1)
        elif edxsym.match(str1[i:]) :
            engfile_object.write("%" + str1)
        elif edxnotsym.match(str1[i:]) :
            engfile_object.write(str1)
        else :
            if fchinese == 1:
                chifile_object.write(str1)
            else :
                engfile_object.write(str1)
        break

file.close()
chifile_object.close()
engfile_object.close()

#file = open("cwindexs.ind", encoding='utf-8')
#chifile_object = open("cwindexs_chinese.x0x1x", 'w', encoding='utf-8')
#engfile_object = open("cwindexs_english.x0x1x", 'w', encoding='utf-8')

file_object = open(realname, 'w', encoding='utf-8')
file_object.write("\\begin{theindex}\n")

file = open("cwindexs_chinese.x0x1x", encoding='utf-8')
file_object.write("\\ifdefined\\ctxcidx\\let\\ctxcindextitle\\ctxcindex\\fi\n")
file_object.write("\\ifdefined\\ctxcidx\\let\\ctxcindextitle\\ctxcindex\\fi\n")
file_object.write("\\providecommand{\\ctxcindextitle}{}\n")
file_object.write("\\providecommand{\\ctxeindextitle}{}\n")
file_object.write("\\providecommand{\\ctxidxstroke}{}\n")

# if slashc == 1:

if not args.cjklast:                        #  Chinese first, then English

    file_object.write("\n\\ctxcindextitle\n")

    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

    #
    # Portion of English
    #
    file = open("cwindexs_english.x0x1x", encoding='utf-8')

#    if args.addctxe:  ## add not matter what

    file_object.write("\n\\ctxeindextitle\n\n")
#    file_object.write("\n  \\indexspace\n\n")

    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

else:                                         # English first, then Chinese
    file = open("cwindexs_english.x0x1x", encoding='utf-8')
#    if args.addctxe:
    file_object.write("\n\\ctxeindextitle\n\n")
    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

    file = open("cwindexs_chinese.x0x1x", encoding='utf-8')
 #   if args.addctxe:
    file_object.write("\n\\ctxcindextitle\n\n")
    lines = file.readlines()
    for linei in range(len(lines)):
        str1 = lines[linei]
        file_object.write(str1)
    file.close()

file_object.write("\\end{theindex}\n")
file_object.close()

# 2016/4/9
file_cinput = open('cinput.tex', 'a', newline="\n")
if args.phonetic:
    if '{\\MbfQ}{\\fontfamily{cwM}\\fontseries{57}' not in open('cinput.tex').read():
        file_cinput.write("\\providecommand{\\MbfQ}{\\fontfamily{cwM}\\fontseries{57}\\selectfont}\n")
else :
    if '{\\MdQ}{\\fontfamily{cwM}\\fontseries{3}' not in open('cinput.tex').read():
        file_cinput.write("\\providecommand{\\MdQ}{\\fontfamily{cwM}\\fontseries{3}\\selectfont}\n")

file_cinput.close()

try:
    os.remove("cwindexs_chinese.x0x1x")
except OSError:
    pass

try:
    os.remove("cwindexs_english.x0x1x")
except OSError:
    pass

try:
    os.remove("cwindexs.ind")
except OSError:
    pass

try:
    os.remove("cwindexg.ind")
except OSError:
    pass

try:
    os.remove("cwindexg.tmp")
except OSError:
    pass

try:
    os.remove("cwindexg4.tmp")
except OSError:
    pass
print('[' + str(linei+1) + ']')




