# -*- coding: utf-8 -*-

# Change log
# 1019.7.8
#  * bug fixed: \newcommand{\bibcwfont}
#
# 2019.2.13
#  * 注音符號聲調符號更改為 unicode (cwTeXm57)
#
# 2019.1.21
#  * bug fixed: Under lualatex, test.CTX will cause error of not being able to 
#    find the main file. 
#
# 2018.3.4
#  * Default output is utf8, with option to latin-1
#       only affect \verb and verbatim environment
#  * Support Verbatim (from fancyvrb package)
#  * If document contains no verbatim, then xcinput.tex 
#  *   will not contain definition of \catcode+252=1 ... (waiting)
#

"""
cwtex 5.1 system
(cwtex.py, tex2xtc.py, cwbiblatex.py, cwmkidx.py, cwmkgls.py, cwcjksort, cwhtml)

Copyright (C) 2018 Tsong-Min Wu and Tsong-Huey Wu
  with support of ...

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, see <http://www.gnu.org/licenses/>
"""

###
#
# 1. read .ctx
# 2. read initial values (for main file initial values are 0 or "")
# 3. translate char to latex font command
# 4. write out .tex file when done
# 5. returns value for variables
# 6. If \inputd or \includeonly file, read initial values from previou run
#
# If main file, write \input{xcinput.tex}
###


import os
import sys
import shutil
import logging
import unicodedata
import inspect
import configparser
import re

import time
start_time = time.time()

from cwfont_encode import *

# Both patterns and strings to be searched can be Unicode strings as well as 8-bit strings.
# However, Unicode strings and 8-bit strings cannot be mixed: that is,
# you cannot match an Unicode string with a byte pattern or vice-versa.

# cjk = re.compile(u'[\u3041-\u30ff\u4e00-\u9fff]+', re.UNICODE)
#cjk = re.compile(u'[\u3041-\u30ff\u3105-\u3129\u4e00-\u9fff]+', re.UNICODE)   ### BOPOMOFO added
cjk = re.compile(u'[\u02c7-\u02d9\u3041-\u30ff\u3105-\u3129\u4e00-\u9fff]+', re.UNICODE) #  2019/02/11 bopomofo

cjk_comp = re.compile(u'[\uf900-\ufad9]+', re.UNICODE)
fullwf = re.compile(u'[\uff01-\uff5e\ueeb8-\uf302]+', re.UNICODE)      # added 03/03: \uff01-\uff5e  fullwidth forms,  2016/06/29 added full blank
cjk_punc = re.compile(u'[\u3000-\u301f]+', re.UNICODE)
punc_lq = re.compile(u'(\u3008|\u300a|\u300c|\u300e|\u3010|\u3014|\u3016|\u3018|\u301a|\u301d)', re.UNICODE)     # 《, 「 ...
punc_rq = re.compile(u'[u3001\u3003\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e]+', re.UNICODE)  # 》, 」, ...
ascii_rq = re.compile(u'[a-zA-Z,\.\)\]!:?]')
#math_symbol = re.compile(u'[\u2044-\u2b4c]+', re.UNICODE)
math_symbol = re.compile(u'[\u2000-\u2bef]+', re.UNICODE)  # 2016/7/11

begin = re.compile(r'\\begin{([a-zA-Z]+)}')
end = re.compile(r'\\end{([a-zA-Z]+)}')

sbib = re.compile(r'\\(bibliography|addbibresource|addglobalbib|addsectionbib){')
gloss = re.compile(r'\\loadglsentries{([-_a-zA-Z\d]+)}')

# for \$ \# and so on, can't handle \\, [ and ]
backslash_char = re.compile(r'(#|%|&|\$|\*|\'|\`|\"|\{|\}|\(|\)|\||\+|\-|,|\.|;|!|_|\^)')

# ctxf font name
# me?s? = m, me, ms, mes
# ctxfs = ctxf + [space], ctxf is used in c_macro, and ctxfs is used in text
#
# NB. {...\ctxfr{\large ...}} does not work, {...\large{\ctxfr ...}} should be OK, 2016.5.15
# NB. To be expanded to incluce all fonts
# NB: double check for legitimate file name
#     no cjk allowed in \include and \input

# ctxf = re.compile(r'\\ctxf(rue?s?|mue?s?|me?s?|mbe?s?|bbe?s?|be?s?|rbe?s?|re?s?|fe?s?|ke?s?)')
# ctxfs = re.compile(r'\\ctxf(rue?s?|mue?s?|me?s?|mbe?s?|bbe?s?|be?s?|rbe?s?|re?s?|fe?s?|ke?s?)\s')

ctxf = re.compile(r'\\ctxf(mle?s?|mbe?s?|mue?s?|mxe?s?|me?s?|ble?s?|bbe?s?|bue?s?|bxe?s?|be?s?| \
    rle?s?|rbe?s?|rue?s?|rxe?s?|re?s?|kle?s?|kue?s?|kxe?s?|ke?s?| \
    fle?s?|fe?s?|lbe?s?|le?s?)')

ctxfs = re.compile(r'\\ctxf(mle?s?|mbe?s?|mue?s?|mxe?s?|me?s?|ble?s?|bbe?s?|bue?s?|bxe?s?|be?s?| \
    rle?s?|rbe?s?|rue?s?|rxe?s?|re?s?|kle?s?|kue?s?|kxe?s?|ke?s?| \
    fle?s?|fe?s?|lbe?s?|le?s?)')

digit = re.compile('[\d]+')
comment = re.compile('[%]+')
texorpdf = re.compile(r'\\(texorpdfstring|bkmark)\{')         # to be expanded to incluce all fonts

input = re.compile(r'\\input{([-_a-zA-Z\d]+)}')
includeonly = re.compile(r'\\includeonly{([a-z,]+)}')
filelist = re.compile(r'\\(includeonly){')

# for \verb or verbatim env
# charcode+252 is escape for {, 253 for }, 254 for \, and 251 is for + or # or |
## NB: if xcinput.tex exists in the disk, should use it
##

# Algorithm for processing Chinese font
#
# fontlist = [["M", n], ["K", n], ...]
#    n = number of bgn before the next font change (\ctxf),  bgn = {, \begin{.}
#    if n = -1, the last font in the list is removed, and the n of the new last font is decreased by 1
#      [ only occurs in, eg., \section[.]{.}, and is not included
#      bgn_s = 1, when entering [, and = 0 when leaving
#
# For example,
#
#   \ctxfm
#   ...
#   \ctxfk
#   ...
#   {\ctxfbb ... {\ctxfr ...} ...} ..
#
# On the third line, after \ctxfr is removed, ["BB", 1] will be changed to ["BB", 0]
# And if the immediately next char is } or \end{...}, then n -1 again, see the following case:
#
#   {\ctxfbb ... {\ctxfr ...}} ...  or  {\ctxfbb ... \ctxfr ...} ...
#

# Algorithm for processing \ctxfdef
#
# If \ctxfdef found, then do the following algorithm.
#
#      For example, \ctxfdef{\section}[\ctxfr]{\ctxfk}{\ctxfbb}
#
#      seperate (a) \section, (b) \ctxfr (optional), (c) \ctxfk, \ctxfbb
#      for (c), there at most 2 arguments. For part (b) and (c), generate font name: r, k, bb
#
#        example:
#        \ctxfdef{\section}[\ctxff]{\ctxfk}{ctxfbb} =>
#          \section, 1, 1, 1, m, k, bb
#        \ctxfdef{\section}{\ctxfk}{ctxfbb} =>
#          \section, 0, 1, 1, m, k, bb
#        \ctxfdef{\section}{\ctxfk} =>
#          \section, 0, 1, 0, m, k, m
#
#      Search for command name from (a) in .ctx file, eg., c_macro[0] (= \section).
#      If found, check if \section*, goto the char position after [ or {, and change font name.
#          Here, we use the same algorithm as the \ctxf portion, but only need to
#          (a) re-define  curf, and (b) fontlist.append
#          For example,  {\ctxfm ,,. \bfig[.]{.}{.}  ...}  or  ... \st{.} ...
#
#
# c_macro is a list of group of 7 elements
#
#   c_macro["\section", 0, 1, 0, "m", "bb", "m", "\bfig", 0, 1, 0, "m", "r", "m"]
#
# the first element of each group is the latex macro
#
#   cur_macro > 0 means there is ctxf macro
#   curf is current font, default is  \ctxfm
#
# NB. \ctxfdef 須自成一行

# Definiation for variables
#
# fontlist = [["M", 0]]
# biblist=[]                     # list for .bib files
# cur_macro = 0                  # if cur_macro = 1, \ctxfdef{.}{} is difined, use it to define current font
# cur_env = 0                    # if cur_env = 1, \ctxfdef{.}{} is difined, use it to define current font
# cur_verb = 0                   # cur_verb = 2 means that we are in verbatim or \verb+.+
# bgn = []                       # delimiter:  {, [, + (for \verb), and x in \begin{x}
#                                #   if end{.} is not the same as bgn, then issue warning
# c_env = []                     # list for \ctxfdef{env}
# c_macro = []                   # list for \ctxfdef{\cmacro}
# beg = ""                       # argument for \begin{.}, eg., minipage or document
# en = ""                        # argument for \end{.}, eg., minipage
# filelevel = 0                  # filelevel = 1 means \input{.} or \includeonly{.} files
# flist_level = 0                # flist_level = 1 means still collecting file list from \includeonly{.}
# flist_str = ""                 # orignial str of filelist
# flist_str_add = ""             # added str of filelist

# ctx_tex function parameter:
#   source, output, fontlist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk
# initial value
#   file_ctx, file_tex, [["M", 0]],   0    , 0      , 0,        [],  [],    [],      "",  "", 0, "", 0, fcinput.tex, 0, 1
#
# all of the above will be passed to \input and \includeonly #

# pdfstr is used in \texorpdfstring
#
# if texorpdfstring.match :
#    octa=str(oct(int(ord(str1[i])/256)))      # str1[i] 為第2個括號內中文

def pdfstr(cjk_str, bkfile):
    octa=str(oct(int(ord(cjk_str)/256)))
    octb=str(oct(ord(cjk_str)%256))
    oct1 = octa[2:]
    oct2 = octb[2:]

    if len(oct1) == 1:
       oct1 = "00" + oct1
    if len(oct1) == 2:
       oct1 = "0" + oct1
    if len(oct2) == 1:
       oct2 = "00" + oct2
    if len(oct2) == 2:
       oct2 = "0" + oct2
    bkfile.write("\\"+oct1+"\\"+oct2)

# lineno() is not used anymore?
#

def lineno():
    """Returns the current line number in our program."""
    return inspect.currentframe().f_back.f_lineno

# TEXINPUTS was defined in cwpdf.py,
# including subdirectories in environment, cwtex.ini, and current directory
#
# Mac OS and windows use different notations, post-processing here
# But sometimes, we want to run cwtex41b.py directly, so the following 2 lines 
# take care of this situation.

if os.environ.get('TEXINPUTS') is None:
    os.environ['TEXINPUTS'] = ""

new_env = os.environ['TEXINPUTS']    

texinputs_list = new_env.replace(".:", "").replace("//", "").replace(":/", ";/").split(";")
curdir = os.getcwd()

# Search for .ctx, if found copy it to the current directory, and convert to .tex
#
def search_ctx(source_ctx):
    curname = os.path.join(curdir, source_ctx)
    if not os.path.exists(curname):
        for i in range(len(texinputs_list)):
            f = os.path.join(texinputs_list[i], source_ctx)
            if os.path.exists(f):
                if not os.path.exists(curname):                                      # curdir is also in the list, so it may try to
                    shutil.copy(f, curname)                                          # copy a file to itself
                    curname_xbackup = os.path.join(curdir, source_ctx + "_xbackup")  # _xbackup means that the file is copied to curdir,
                    shutil.copy(f, curname_xbackup)                                  # we have to remove both afterwords



# biblist = []   # moved to main()

luatex = 1                # luatex = 1 is the default, and cwtex will output utf8 
                          # luatex = 0, cwtex will add -l option to output latin1
                          # This only affect \verb and verbatim, and also htlatex (latin-1 and pdflatex)
                          # This is for standalone run setup
                          # If called from cwpdf.py or cwhtml.py, need to add -l option for latin-1

def ctx_tex(source, output, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk):

    verb_delimiter = ""                   # \verb delimiter, eg., # ! |
    verbatim_name = ""
    curf = fontlist[len(fontlist)-1][0]   # current font
    copy_ctx = 0
    tps = [[0, 0]]                        # for \texorpdfstring{}{}.  [1][0] means that we are in the first
    net_br = 0                            # net number: # of { - # of }, if net_br < 0, issuing error message
    bgn_s = 0                             # for \section[.]{.}, = 1 when entering after [, = 0 after ]

    if filelevel == 0:
        output.write('\\input{xcinput.tex}')

#    lines = source.readlines()

    file_name = re.findall(r'name=([\'\w\-\.]+)\s', str(source))
#    logging.info('processing %s', file_name[0])

    try:
        lines = source.readlines()
    except UnicodeDecodeError as error:
        logging.info("\nWarning:\n Your file is probably not a utf8 file.")
        logging.info(" Please try to convert it to utf8, and try again.\n")
        try:
            os.system('pause')  #windows, doesn't require enter
        except whatever_it_is:
            os.system('read -p "Press any key to continue"') #linux


    for linei in range(len(lines)):
        str1 = lines[linei]

        if linei == 0:                      # 2017.1.12: Removing BOM char
#            print("This is line 0.")
            str1 = str1.replace(u'\ufeff', '')

        i = 0                             # ith char in current line
        k = 0                             # kth set (7 elements) in \ctxfdef
        linelength = len(str1)

        if flist_level == 1:
            flist_str = flist_str + ''.join(str1[0:]).lstrip().rstrip()
            if comment.findall(flist_str):                            # there are % in s_add
                flist_str = flist_str.split('%')[0].rstrip()

### 2016.9.19
#            if flist_str.endswith("}"):

            if re.findall(r'\}', flist_str):                          # str may end with, eg., ...}}{}
                flist_str = flist_str.split('}')[0].rstrip()
                flist_level = 0                                       # find complete file list
#                flist_str = flist_str.rstrip("}")                     # generate file list and then processing all the files
                filelist = flist_str.split(',')
                for k in range(len(filelist)):
                    inputed_ctx = filelist[k] + ".ctx"
                    inputed_ctx_xbackup = filelist[k] + ".ctx_xbackup"
                    inputed_tex = filelist[k] + ".tex"
                    search_ctx(inputed_ctx)
                    if os.path.exists(os.path.join(curdir, inputed_ctx)):
                        filelevel = 1
                        in_ctx = open(inputed_ctx, encoding='utf-8')
                        if luatex == 1:
                            in_tex = open(inputed_tex, 'w', encoding='utf-8')
                        elif luatex == 0:                                                    
                            in_tex = open(inputed_tex, 'w', encoding='latin-1')                        

                        ctx_tex(in_ctx, in_tex, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk)

        while i != linelength:

            if str1[i] == "%":
                if cur_verb == 2:
                    output.write("%")
                elif cur_verb < 2:
                    if i == 0 and linelength == 1:                              # a line with a single %
                        output.write("%")
                        i = 1                                                   # will minus 1 at the end of the block
                    elif i == 0 and linelength > 1:                             # % at the beginning of line
                        while i < linelength:
                            if cjk.match(str1[i]) or cjk_punc.match(str1[i]):
                                c1, charno, kfno = readn2(str1[i])
                                y = "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"
                            elif fullwf.match(str1[i]):                         # unicode: 65281 - 65384
                                str1 = f2h(i,str1)                              # fullwidth chars convert to halfwidth chars
                                continue
                            elif cjk_comp.match(str1[i]):                       # disable following line 522-533
                                dvalue = ord(str1[i])- 0xf900                   # cjk chars used in Korean document has alternative codings
                                if dic_cjk_compatible[dvalue] == 0x3000:        # convert them to ordinary chars
                                    logging.info("\nLine %d (file %s): %s contains a non-standard coding of cjk char,"  % (linei+1, file_name[0]))
                                    logging.info("we will convert it to ordinary cjk char.\n")
                                    try:
                                        os.system('pause')  #windows, doesn't require enter
                                    except whatever_it_is:
                                        os.system('read -p "Press any key to continue"') #linux
                                rchar=chr(dic_cjk_compatible[dvalue])
                                str2 = list(str1)
                                str2[i] = rchar
                                str1 = ''.join(str2)
                                continue
                            else:
                                y = str1[i]

                                strord=ord(str1[i])
                                if strord > 256:                               # non-cjk char in comment mode, write two ~
                                    y = "~~"                                   # does no check math_symbol.match in comment mode

                            output.write(y)
                            i += 1

                    elif i > 0 and str1[i-1] != "\\":                           # % in the middle of line
                        while i < linelength:
                            if cjk.match(str1[i]) or cjk_punc.match(str1[i]):
                                c1, charno, kfno = readn2(str1[i])
                                y = "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"
                            elif fullwf.match(str1[i]):                         # unicode : 65281 - 65384
                                str1 = f2h(i,str1)                              # fullwidth chars convert to halfwidth char
                                continue
                            elif cjk_comp.match(str1[i]):                       # disable following line 522-533
                                dvalue = ord(str1[i])- 0xf900                   # cjk chars used in Korean document has alternative codings
                                if dic_cjk_compatible[dvalue] == 0x3000:        #   convert them to ordinary chars
                                    logging.info("\nLine %d (file %s): %s contains a non-standard coding of cjk char,"  % (linei+1, file_name[0]))
                                    logging.info("we will convert it to ordinary cjk char .\n")
                                    try:
                                        os.system('pause')  #windows, doesn't require enter
                                    except whatever_it_is:
                                        os.system('read -p "Press any key to continue"') #linux
                                rchar=chr(dic_cjk_compatible[dvalue])
                                str2 = list(str1)
                                str2[i] = rchar
                                str1 = ''.join(str2)
                                continue
                            else:
                                y = str1[i]

                                strord=ord(str1[i])
                                if strord > 256:                               # non-cjk char in comment mode, write two ~
                                    y = "~~"                                   # does no check math_symbol.match in comment mode

                            output.write(y)
                            i += 1
                    i = i - 1                                                   # move to the end of line

# Some python documents:
#
# http://www.bogotobogo.com/python/python_subprocess_module.php
# https://thesesergio.wordpress.com/2013/06/18/subprocess-for-dummies/
# http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/
# http://sharats.me/the-ever-useful-and-neat-subprocess-module.html
#
#

# Important: Make sure that
# \ctxfdef defined in \input{.} and \includeonly{.}
# has to be passed over to the main file and the following file. OK?
# counting of \begin{.} and \end{.} should continue from main file to inputed file, OK now?
# search for \inputed file, OK now.
#
# NB. Latex only allow \input{test}, TeX also allow \input test
#     here we only allow for  \input{test}

            elif re.match(r'\\input', str1[i:]):
                output.write("\\")
                if cur_verb < 2:
                    inputed = input.findall(str1[i:])                  # inputed is a list, eg., inputed[0] = E7-macro
                    if inputed:
                        inputed_ctx = inputed[0] + ".ctx"
                        inputed_ctx_xbackup = inputed[0] + ".ctx_xbackup"
                        inputed_tex = inputed[0] + ".tex"
                        search_ctx(inputed_ctx)
                        if os.path.exists(os.path.join(curdir, inputed_ctx)):
                            filelevel = 1
                            in_ctx = open(inputed_ctx, encoding='utf-8')
                            if luatex == 1:
                                in_tex = open(inputed_tex, 'w', encoding='utf-8')
                            elif luatex == 0:                                
                                in_tex = open(inputed_tex, 'w', encoding='latin-1')                        

                            ctx_tex(in_ctx, in_tex, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk)
                            filelevel = 0
#
# for glossaries
# \loadglsentries{glo-defns}
# gloss = re.compile(r'\\loadglsentries{([-_a-zA-Z\d]+)}')
#

            elif re.match(r'\\loadgls', str1[i:]):
                output.write("\\")
                if cur_verb < 2:
                    inputed = gloss.findall(str1[i:])                  # inputed is a list, eg., inputed[0] = E7-macro
                    if inputed:
                        inputed_ctx = inputed[0] + ".ctx"
                        inputed_ctx_xbackup = inputed[0] + ".ctx_xbackup"
                        inputed_tex = inputed[0] + ".tex"
                        search_ctx(inputed_ctx)
                        if os.path.exists(os.path.join(curdir, inputed_ctx)):
                            filelevel = 1
                            in_ctx = open(inputed_ctx, encoding='utf-8')
                            if luatex == 1:
                                in_tex = open(inputed_tex, 'w', encoding='utf-8')
                            elif luatex == 0:                                
                                in_tex = open(inputed_tex, 'w', encoding='latin-1')                        

                            ctx_tex(in_ctx, in_tex, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk)
                            filelevel = 0

#
# \includeonly{.}
# May be a list of files, and may span over two or more lines
#
            elif re.match(r'\\includeonly{', str1[i:]):
                output.write("\\")
                if cur_verb < 2:
                    k = i + 13
                    flist_str = ''.join(str1[k:])
                    if flist_str:
                        output.write("includeonly")
                        i = i + 11                                           # or i = k - 2
                        if comment.findall(flist_str):                       # there are % in flist_str
                            flist_str = flist_str.split('%')[0].rstrip()
                        else:
                            flist_str = flist_str.rstrip()

                        flist_str_a = re.findall(r'{([-_,a-zA-Z\d\s]+)}', str1[i:])
                        if not flist_str_a:
                            flist_level = 1
                        else:
                            flist_level = 0                                   # we have a list of file except the last one
                            flist_str = flist_str_a[0]
                            flist_str = re.sub('[\s+]', '', flist_str)        # strip all white space in middle
                            flist_str = flist_str.rstrip("}")

                            filelist = flist_str.split(',')
                            for k in range(len(filelist)):
                                inputed_ctx = filelist[k] + ".ctx"
                                inputed_ctx_xbackup = filelist[k] + ".ctx_xbackup"
                                inputed_tex = filelist[k] + ".tex"
                                search_ctx(inputed_ctx)
                                if os.path.exists(os.path.join(curdir, inputed_ctx)):
                                    filelevel = 1
                                    in_ctx = open(inputed_ctx, encoding='utf-8')
                                    if luatex == 1:
                                        in_tex = open(inputed_tex, 'w', encoding='utf-8')                                    
                                    elif luatex == 0:
                                        in_tex = open(inputed_tex, 'w', encoding='latin-1')                        

                                    ctx_tex(in_ctx, in_tex, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii, cjk_cjk)
                                    filelevel = 0

            elif re.match(r'\\ctxfdef', str1[i:]):        # NB. use re.findall will have error
                if cur_verb == 2:
                    output.write("\\")                    # if cur_verb = 2: we are in \verb or verbatim, so do nothing
                elif cur_verb < 2:
                    str2 = ' '.join(['%', str1])
                    output.write(str2)

                    cm_a = re.findall(r'\{[\\a-zA-Z]+\}', str1[i:])
                    option = re.findall(r'\[\\[a-zA-Z]+\]', str1[i:])
                    cm_str = ''.join(cm_a[0]).strip("{").strip("}")

                    if cm_str[0] == "\\":
                        c_macro.append(cm_str)               # getting  \section
                        if not option:
                            c_macro.append(0)                # if option, 1; else  0
                        else:
                            c_macro.append(1)                # no. of commands include \section and option
                        c_macro.append(1)
                        if len(cm_a) == 3:                   # if no option, there is at lease one font command
                            c_macro.append(1)                # cm_a does not count option
                        elif len(cm_a) == 2:
                            c_macro.append(0)
                        n = ctxf.findall(str1[i:])           # next find cw-font in ctxfdef
                        if not option:
                            c_macro.append("m")              # if no option, \ctxfm
                        for j in range(0,len(n)):
                            c_macro.append(n[j])
                        if not option and len(n) == 1:
                            c_macro.append("m")              # if no option, \ctxfm
                        elif len(n) == 2:
                            c_macro.append("m")              # if no option, \ctxfm
                        i = linelength - 1
                    else:
                        c_env.append(cm_a[0].strip("{").strip("}"))          # getting  {abstract}, stripping ( and }
                        c_env.append(1)                      # first argument of environment is mandatory
                        if not option:
                            c_env.append(0)                  # if option, 1; else  0
                        else:
                            c_env.append(1)                  #
                        if len(cm_a) == 3:                   # if no option, there is at lease one font command
                            c_env.append(1)                  # cm_a does not count option
                        elif len(cm_a) == 2:
                            c_env.append(0)
                        e = ctxf.findall(str1[i:])           # next find cw-font in ctxfdef
                        if option:
                            for j in range(0,len(e)):
                                c_env.append(e[j])
                        if not option and len(e)== 1:
                            c_env.append(e[0])
                            c_env.append("m")
                            c_env.append("m")
                        if not option and len(e)== 2:
                            c_env.append(e[0])
                            c_env.append("m")
                            c_env.append(e[1])
                        i = linelength - 1

            elif fullwf.match(str1[i]):                              # unicode : 65281 - 65384
                str1 = f2h(i,str1)                                   # convert fullwidth chars to halfwidth
                continue

# Some python documents:
#
# http://stackoverflow.com/questions/7372974/write-ing-an-encoded-string-in-python-3-x
# https://blog.luminoso.com/2012/08/20/fix-unicode-mistakes-with-python/
# "N{Latin Small Letter Thorn}".encode('latin-1')
#   if this problem solved, then
#   file_tex = open(outputfile, 'w', encoding='utf-8')
# Processing \verb+...+
#

#             if cjk.match(str1[i]) and cjk_cjk == 1 and i == (linelength -2) and linei < (len(lines)-1):
#                     str2 = lines[linei+1]
#                     if cjk.match(str2[0]):
#                         output.write("\z%")

            elif cjk.match(str1[i]):
                c1, charno, kfno = readn2(str1[i])                   # new, kfno_2016_0210
                searchfont(curf,kfno)                                # new 2016_0129
                if cur_verb == 2:                                    # default is R, may be redefined by user
                    y = "ü" + "þ" + curf + c1 + "Q" + "þ" + "cH" + str(charno) + "ý"
                    output.write(y)
                else:
                    if tps[0][0] == 0 and tps[0][1] > 0:
                        pdfstr(str1[i], output)                                      # write texorpdfstring
                    else:
                        y = "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"
                        if i > 0 and ascii_rq.match(str1[i-1]) and cjk_ascii == 1:
                                y = " " + y

                        if cjk_cjk == 1:                                          # additional spacing adjustments are done below
                            if i > 0 and digit.match(str1[i-1]):
                                y = "\\Z" + y

                            if i > 1 and str1[i-1] == "}" and cjk.match(str1[i-2]):
                                y = "\\z" + y

#                            while cjk.match(str1[i+1]):
                            while i < (linelength -2) and cjk.match(str1[i+1]):
                                y = y + "\\z"
                                c1, charno, kfno = readn2(str1[i+1])
                                searchfont(curf,kfno)
                                y = y + "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"
                                i = i + 1
                            output.write(y)

                            if i == (linelength -2) and linei < (len(lines)-1):
                                str2 = lines[linei+1]
                                if cjk.match(str2[0]):
                                    output.write("\z%")

                        elif cjk_cjk == 0:
                            while cjk.match(str1[i+1]):
                                c1, charno, kfno = readn2(str1[i+1])
                                searchfont(curf,kfno)
                                y = y + "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"
                                i = i + 1
                            output.write(y)

            elif digit.match(str1[i]):
                y = str1[i]
                if cjk_cjk == 1 and cur_verb < 2:
                    if i > 0 and cjk.match(str1[i-1]):
                        y = "\\Z" + y
                    if i == (linelength -2) and linei < (len(lines)-1):
                        str2 = lines[linei+1]
                        if cjk.match(str2[0]):
                            y = y + "\Z%"
                output.write(y)

            elif math_symbol.match(str1[i]):   # disable following line 522-533
                msarray = ["-","-","-","--", "---","---","`","\'",",","`","``","\'\'"]  # 7/11 line 514- 542
                strord=ord(str1[i])
                if strord >= 8208 and strord <= 8213:
                    strpos = strord - 8208
                    output.write(msarray[strpos])
                elif strord >= 8216 and strord <= 8221:
                    strpos = strord - 8210
                    output.write(msarray[strpos])
                elif strord == 8231:    #font0 char42  center dot
                    hchar = chr(12539)
                    str2 = list(str1)
                    str2[i] = hchar
                    str1 = ''.join(str2)
                    continue
                elif strord == 8260:    # /
                    output.write("/")
                elif strord == 8270:    # *
                    output.write("*")
                elif strord == 8274:    # \%
                    output.write("%")
                elif strord == 8275:    # \u2053 font0 char24  ~
                    hchar = chr(12316)
                    str2 = list(str1)
                    str2[i] = hchar
                    str1 = ''.join(str2)
                    continue
                elif strord == 8291:    # ,
                    output.write
                elif strord == 9675:    # ○ : unicode 25cb
                    hchar = chr(12295)  # convert to \u3007
                    str2 = list(str1)
                    str2[i] = hchar
                    str1 = ''.join(str2)
                    continue
                else:
                    y = " ~~ "
                    output.write(y)
                    logging.info("\nLine %d (file %s): Character position %d is a symbol."  % (linei+1, file_name[0], i+1))
                    logging.info("You have to use LaTeX command to typeset the symbol.")
                    logging.info("For example, to typeset %s, you should use LaTeX command  \\neq ." % ('\u2260'))
                    logging.info("For now I will replace it with blank space.\n")
                    try:
                        os.system('pause')  #windows, doesn't require enter
                    except whatever_it_is:
                        os.system('read -p "Press any key to continue"') #linux

            elif cjk_comp.match(str1[i]):   # disable following line 522-533
                dvalue = ord(str1[i])- 0xf900
                if dic_cjk_compatible[dvalue] == 0x3000:                       # this is obsolete, we now do conversion
                    logging.info("\nLine %d (file %s): Character position %d is a non-standard coding of cjk char," % (linei+1, file_name[0], i+1))
                    logging.info("it will be replaced by a blank.\n")
                    try:
                        os.system('pause')  #windows, doesn't require enter
                    except whatever_it_is:
                        os.system('read -p "Press any key to continue"') #linux
                rchar=chr(dic_cjk_compatible[dvalue])
                str2 = list(str1)
                str2[i] = rchar
                str1 = ''.join(str2)
                continue

            elif cjk_punc.match(str1[i]):
                c1, charno, kfno=readn2(str1[i])                   # new, kfno_2016_0210
                searchfont(curf,kfno)                              # new 2016_0129
                if cur_verb == 2:                                  # default is R, may be redefined by user
                    y = "ü" + "þ" + curf + c1 + "Q" + "þ" + "cH" + str(charno) + "ý"
                    output.write(y)
                else:
                    if tps[0][0] == 0 and tps[0][1] > 0:           # punctuation could be in texorpdfstring
                        pdfstr(str1[i], output)                    # write texorpdfstring
                    else:    
                        y = "{\\" + curf + c1 + "Q" + "\\" + "cH" + str(charno) + "}"                          
                        if cjk_cjk == 1:                                                                       
                            if str1[i] == "。":                                                                 
                                if i == linelength - 2:                # Chinese period at the end of a line   
                                    y = y + "\\zZ"                     # punc_rq does not include "。"          
                                elif i < linelength - 2:               # Chinese period in the middle of a line
                                    if str1[i+1] == " " or str1[i+1] == "(" or str1[i+1] == "}":               
                                        y = y + "\\zZ"                                                         
                                    elif cjk.match(str1[i+1]) or punc_lq.match(str1[i+1]):                     
                                        y = y + "\\zZ "                                                        
                            elif str1[i] == "、":                                                               
                                if i == linelength - 2:                # Short period at the end of a line     
                                    y = y + "\\zz"                     #                                       
                                elif i < linelength - 2:               # in the middle of a line               
                                    if str1[i+1] == " ":                                                       
                                        y = y + "\\zz"                                                         
                                    elif cjk.match(str1[i+1]) or punc_lq.match(str1[i+1]):                     
                                        y = y + "\\zz "                                                        
                                                                                                               
                            elif punc_rq.match(str1[i]) and str1[i] != "、":                                    
                                if i < linelength - 2:                 # in the middle of a line               
                                    if cjk.match(str1[i+1]) or str1[i+1] == "(":                               
                                        y = y + " "                                                            
                            elif punc_lq.match(str1[i]) and i > 0:                                             
                                if cjk.match(str1[i-1]) or ascii_rq.match(str1[i-1]):                          
                                    y = " " + y                                                                
                        output.write(y)                                                                        

#
# \verb+.+:  any char except * and [space] can be used as delimiter
#
            elif re.match(r'verb', str1[i:]):
                output.write("v")
                if i > 0 and str1[i-1] == "\\":
                    if cur_verb == 0:
                        if str1[i+4] == ("{" or " "):           # some pacackage, eg., biblatex, define \verb{.}
                            output.write("erb")                 #   \verb{.}, \verb[space], and \endverb
                            i = i + 3                           #   do nothing under this situation

                        elif str1[i+4] != "*":                  # cur_verb = 2: we are already in  \verb or verbatim
                            verb_delimiter = str1[i+4]          # cur_verb = 1: finding \verb, but before delimiter
                            output.write("erb")                 # \verb+.+ is different from \section{.}
                            cur_verb = 1                        # so checking c_macro is different
                            if c_macro != []:
                                for k in range(0,len(c_macro)//7):
                                    if re.match(r'(' + '\\' + str(c_macro[k*7]) + r')', str1[i-1:]):
                                        j = len(c_macro[k*7])
                                        if str1[i + j - 1] == str1[i+4]:
                                            cur_macro = k + 1
                            i = i + 3

                        elif str1[i+4] == "*":
                            verb_delimiter = str1[i+5]
                            output.write("erb*")
                            cur_verb = 1
                            if c_macro != []:
                                for k in range(0,len(c_macro)//7):
                                    if re.match(r'(' + '\\' + str(c_macro[k*7]) + r')', str1[i-1:]):
                                        j = len(c_macro[k*7])
                                        if str1[i + j - 1] == str1[i+4]:
                                            cur_macro = k + 1
                            i = i + 4

            elif str1[i] == verb_delimiter:
                output.write(verb_delimiter)
                if cur_verb == 1:
                    bgn.append(verb_delimiter)              # entering \verb, add + to bgn list
                    if cur_macro > 0:
                        fontlist.append([c_macro[7 * (cur_macro - 1) + 5].upper(), 0])
                        curf = fontlist[len(fontlist)-1][0]
                        cur_macro = 0                        # to be revised, need to handle 3 arguments
                    else:
                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1
                    cur_verb = 2                            # this whole chunk also applies  verbatim
                elif cur_verb == 2:                         # Already been in \verb+.+, and leaving
                    if bgn[len(bgn)-1] != verb_delimiter:
                        logging.info("\nLine %d (file %s): The second delimiter of \\verb does not match the first in line %d" % (linei+1, file_name[0]))
                        try:
                            os.system('pause')                                #windows, doesn't require enter
                        except whatever_it_is:
                            os.system('read -p "Press any key to continue"')  #linux
                    else:
                        del (bgn[len(bgn)-1])
                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                        if fontlist[len(fontlist)-1][1] < 0:                # If [r, 0] becomes [r, -1]
                            del fontlist[-1]                                # so delete from font list
                            curf = fontlist[len(fontlist)-1][0]
                    verb_delimiter = ""                     # reset to nul, added 20160831
                    cur_verb = 0                            # leave verbatim

#
# The order is important.
# \begin{.} and \end{.} have to be placed before {

            elif re.match(r'\\begin', str1[i:]):
                output.write("\\")
                if cur_verb < 2:
                    beg = begin.findall(str1[i:])                                  # beg is a list, the first element is  document
                    if beg:
                        output.write("begin{" + beg[0] + "}")
                        bgn.append(beg[0])                                         # eg, bgn = "abstract"  or  "verbatim"

                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1
                        fontlist.append([fontlist[len(fontlist)-1][0], 0])                        # from [M, n] to [M, n+1], [M, 0]

                        if c_env != []:                                             # c_env = [abstract, 1, 0, 0, r, m, m, verbatim, ...]
                            for k in range(0,len(c_env)//7):
                                if re.findall(r'(' + str(c_env[k*7]) + r')', str1[i:]):
                                    cur_env = k + 1
                                    del fontlist[-1]
                                    fontlist.append([c_env[7 * (cur_env - 1) + 4].upper(), 0])    # from [M, n+1], [M, 0] to [M, n+1], [K, 0]
                                    curf = fontlist[len(fontlist)-1][0]

###   what's going on here?
# For documdent, len(c_env)//7 = 2, so we tried twice, and found nothing,
# as a result, fontlist[len(fontlist)-1][1] = 2
# But in fact, what we need is: if failed after all tries, fontlist[len(fontlist)-1][1] = 1
#

                        if beg[len(beg)-1] in ['verbatim', 'Verbatim']:                         # Verbatim  from fancyvrb package
#                            i = linelength - 2                                                  # verbatim must be an independent line,
                            cur_verb = 2                                                        # but Verbatim can have [...] options
                            verbatim_name = beg[0]
                            i = i + 7 + len(beg[0])                            
                        else:
                            i = i + 7 + len(beg[0])

            elif re.match(r'\\end', str1[i:]):
                output.write("\\")
                en = end.findall(str1[i:])                     
                                                               
                if en:                                                                          # en[0] = "abstract"  in  .\end{abstract}                       
                    output.write("end{" + en[0] + "}")                                          #   .match returns only yes/no, not match list                  
                    i = i + 5 + len(en[0])                                                       
                    if cur_verb == 2:                                                           # \begin{verbatim} ... \end{verbatim} allows "\begin{verbatim}" 
                       if en[len(en)-1] == verbatim_name:                                       # in the argument, but "\end{verbatim}" not allowed.
                            del (bgn[len(bgn)-1])                                               # so, if \end{verbatim} found, we are about to leave  verbatim environment  
                            fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                            if fontlist[len(fontlist)-1][1] < 0:
                                del fontlist[-1]
                                curf = fontlist[len(fontlist)-1][0]
                            cur_verb = 0                                                        # \end{verbatim}  it is possible \end{verbatim}}   

                    elif cur_verb < 2:
                        if not bgn or (len(bgn) - 1 < 0):                                       # the case of \newenvironment
                            if en[len(en)-1] != "document":                                     
                                logging.info("\nLine %d (file %s): Too many }'s.\n"  % (linei+1, file_name[0]))
                                try:
                                    os.system('pause')                                          # windows, doesn't require [enter]
                                except whatever_it_is:
                                    os.system('read -p "Press any key to continue"')            

                        if en[len(en)-1] == "document":                                         # user may add  \end{document}  in the middle for testing purpose
                            if len(bgn) == 1 and linei < (len(lines)-1):                        # we add a pseudo  document
                                bgn.append("document")                                           
                                fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1
                        else:
                            del (bgn[len(bgn)-1])
                            fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                            if fontlist[len(fontlist)-1][1] < 0:                                # from [M, n+1], [R, 0] to [M, n]
                                del fontlist[-1]
                                fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                                curf = fontlist[len(fontlist)-1][0]
                            if cur_env > 0:                                                    # cur_env = 1, means environment font is defined by \ctxfdef
                                cur_env = 0                                                    # to be revised, need to handle 3 arguments
#                             else:
#                                 i = i + 5 + len(en[0])

            elif str1[i] == "[":                                                               # 2016.10.10, the case of \section[.] is handled elsewhere
                output.write("[")                                                              # it is legitimate to have ] before [, but not to have } before {

#                 if cur_verb < 2:                                                             # similar to {.} -- not ture. Need rewrite this part.
#                     bgn.append("[")                                                          # Only need to count [ and ] when \ctxfdef is defined.
#                     if cur_macro == 0:
#                         fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1
#                         fontlist.append([fontlist[len(fontlist)-1][0], 0])                      # from [M, n] to [M, n+1], [M, 0]
#                     elif cur_macro > 0:
#                         fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1         # from [M, n] to [M, n+1], [R, 0]
#                         fontlist.append([c_macro[7 * (cur_macro - 1) + 4].upper(), 0])
#                         curf = fontlist[len(fontlist)-1][0]

            elif str1[i] == "]":
                output.write("]")
                if cur_verb < 2 and bgn_s == 1:
                    bgn_s = 0
                    fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                    if fontlist[len(fontlist)-1][1] < 0:                 # from [M, n+1], [R, 0] to [M, n]
                        del fontlist[-1]
                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                        curf = fontlist[len(fontlist)-1][0]

#                 if cur_verb < 2 and not re.search(r'\\kill\s$', str1[i:]):                        # \kill is used in tabbing env for example line
#                     if bgn[len(bgn)-1] != "[":                                                    # $ means search only "end of the string"
#                         print("\nLine %d (file %s): Too many ]'s.\n"  % (linei+1, file_name[0]))  # \s means any white space
#                         try:
#                             os.system('pause')                                                #windows, doesn't require enter
#                         except whatever_it_is:
#                             os.system('read -p "Press any key to continue"')                  #linux
#
#                     else:
#                         del (bgn[len(bgn)-1])
#                     fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
#                     if fontlist[len(fontlist)-1][1] < 0:                 # from [M, n+1], [R, 0] to [M, n]
#                         del fontlist[-1]
#                         fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
#                         curf = fontlist[len(fontlist)-1][0]


            elif str1[i] == "{":
                output.write("{")
                if cur_verb < 2:
                    if tps[0][1] < 1:                                                    # not in texorpdfstring
                        bgn.append("{")
                        net_br = net_br + 1

                        if cur_macro == 0:
                            fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1
                            fontlist.append([fontlist[len(fontlist)-1][0], 0])                      # from [M, n] to [M, n+1], [M, 0]
                        elif cur_macro > 0:
                            fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1         # from [M, n] to [M, n+1], [R, 0]
                            fontlist.append([c_macro[7 * (cur_macro - 1) + 5].upper(), 0])
                            curf = fontlist[len(fontlist)-1][0]
                            cur_macro = 0                           # to be revised, need to handle 3 arguments

                    if tps[0][0] > 0 and tps[0][1] == 0:            # if tps[[0, 0]] do nothing
                        tps[0][0] = tps[0][0] + 1
                    elif tps[0][0] == 0 and tps[0][1] == -1:
                        tps[0][1] = tps[0][1] + 2                   # from [[0, -1]] to [[0, 1]]
                    elif tps[0][0] == 0 and tps[0][1] > 0:
                        tps[0][1] = tps[0][1] + 1

#            tps = [[0, 0]]  not in texorpdfstring
#            tps = [[1, 0]]  in the first pair of braces of \texorpdfstring, and net number of braces is 1 (#{ - #})
#            tps = [[0, -1]]  leaving in the first pair of braces, and prepare to enter the second pair of braces
#            tps = [[0, 1]]  in the second pair of braces, net number of braces is 1 (#{ - #})
#
#            if tps[0][1] > 0: use pdfstr to convert cjk
#

            elif str1[i] == "}":                                         # Check if  "}"  matches with  "{"
                output.write("}")                                        # "\}" does not count as delimiter
                if cur_verb < 2:
                    if tps[0][1] < 1:                                    # not in texorpdfstring

                        if net_br == 0:
                            logging.info("\nLine %d (file %s): Too many }'s.\n"  % (linei+1, file_name[0]))
                            try:
                                os.system('pause')                                #windows, doesn't require enter
                            except whatever_it_is:
                                os.system('read -p "Press any key to continue"')  #linux
                        else:
                            net_br = net_br - 1
                            del (bgn[len(bgn)-1])

#                         if not bgn or len(bgn)-1 < 0:                    # the case of \newenvironment
#                             logging.warning("brackets does not match for {.} in line %d" % (linei+1))
#                         else:
#                             del (bgn[len(bgn)-1])

                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                        if fontlist[len(fontlist)-1][1] < 0:                 # from [M, n+1], [R, 0] to [M, n]
                            del fontlist[-1]
                            if len(fontlist) == 0:
#                                print("Too many }'s in line %d" % (linei+1))
                                logging.info("\nLine %d (file %s): Too many }'s.\n"  % (linei+1, file_name[0]))
                                try:
                                    os.system('pause')                                #windows, doesn't require enter
                                except whatever_it_is:
                                    os.system('read -p "Press any key to continue"')  #linux

                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] - 1
                        curf = fontlist[len(fontlist)-1][0]

                    if tps[0][0] > 0 and tps[0][1] == 0:         # if tps[[0, 0]] do nothing
                        tps[0][0] = tps[0][0] - 1
                        if tps[0][0] == 0:                       # from [[0, 0]] to [[0, -1]]
                            tps[0][1] = -1                       # just about to enter the second {.}
                    elif tps[0][0] == 0 and tps[0][1] > 0:
                        tps[0][1] = tps[0][1] - 1

            elif texorpdf.match(str1[i:]):
                tps = [[1, 0]]
                if str1[i+1] == "t" and str1[i+2] == "e" and str1[i+3] == "x":
                    output.write("\\texorpdfstring{")
                    i = i + 15
                elif str1[i+1] == "b" and str1[i+2] == "k" and str1[i+3] == "m":
                    output.write("\\bkmark{")
                    i = i + 7

#
#   Since checking macro starts with checking "\\", we have to check \\ctxfx first, "x" is fontname
#

            elif str1[i] == "\\" and str1[i+1] == "c" and str1[i+2] == "t" and str1[i+3] == "x" and str1[i+4] == "f" and str1[i+5] != "d" and str1[i+6] != "e":
                if cur_verb == 2:
                    output.write("\\")
                elif cur_verb < 2:
                    n = ctxfs.findall(str1[i:])
                    if n:
                        del fontlist[-1]
                        fontlist.append([n[0].rstrip().upper(), 0])              # from [M, n+1], [R, 0] to [M, n+1], [K, 0]
                        curf = fontlist[len(fontlist)-1][0]
                        if i > 1 and cjk_cjk == 1 and cjk.match(str1[i-2]):
                            output.write("\z")                             # insert \z
                        i = i + 5 + len(n[0]) -1
                        while str1[i+1] == " ":
                            i = i + 1
                        if str1[i+1] == "\n":                              # why is this?
                            output.write("%")

#
#   If c_macro is defined:
#   Note first of all, we need to check if  c_macro != []:
#   c_macro != []  means \ctxfdef exists, so  c_macro[i]  exists,
#     c_macro[0]  is the first command, eg., \section
#     c_macro[7]  is the second command, eg., \bfig
#     c_macro["\section", 0, 1, 0, "m", "bb", "m", "\bfig", 0, 1, 0, "m", "r", "m"]

            elif str1[i] == "\\":
                y = str1[i]
                if cur_verb < 2:
                    if backslash_char.match(str1[i+1]):                          # typeset special chars, eg., \#
                        y = "\\" + str1[i+1]
                        i = i + 1
                    elif c_macro != []:
                        for k in range(0,len(c_macro)//7):
                            if re.match(r'(' + '\\' + str(c_macro[k*7]) + r')', str1[i:]):
                                j = len(c_macro[k*7])                            # eg., j = 8 for \section[.]{.}
                                if str1[i + j] == "{" or str1[i + j] == "[":
                                    cur_macro = k + 1
                                    y = str(c_macro[k*7])
                                    if cjk_ascii == 1 and i > 0 and cjk.match(str1[i-1]):
                                        y = "\\z" + y
                                    if str1[i + j] == "[":                                                  # { and \begin{.} is handled elsewhere
                                        fontlist[len(fontlist)-1][1] = fontlist[len(fontlist)-1][1] + 1     # from [M, n] to [M, n+1], [R, 0]
                                        fontlist.append([c_macro[7 * (cur_macro - 1) + 4].upper(), 0])
                                        curf = fontlist[len(fontlist)-1][0]
                                        y = y + "["
                                        bgn_s = 1
                                        i = i + 1
                                    i = i + j - 1

                                elif str1[i + j] == "*" and (str1[i + j + 1] == "{" or str1[i + j + 1] == "["):
                                    cur_macro = k + 1
                                    if cjk_ascii == 1 and i > 0 and cjk.match(str1[i-1]):
                                        y = "\\z" + y
                output.write(y)

#
# we find a macro which is defined in \ctxfdef, so cur_macro > 0,
# with the next "{", we use the font defined in \ctxfdef.
# If \verb+.+, the approach is slightly different, and we use verb_delimiter
#
# Some python documents:
#
# http://stackoverflow.com/questions/196345/how-to-check-if-a-string-in-python-is-in-ascii
# http://stackoverflow.com/questions/447107/what-is-the-difference-between-encode-decode/449281#449281
# http://stackoverflow.com/questions/7372974/write-ing-an-encoded-string-in-python-3-x

            else:
                y = str1[i]
                strord=ord(str1[i])
                if strord > 256:
                    y = "~~"
                    logging.info("\nLine %d (file %s): Character position %d is a symbol."  % (linei+1, file_name[0], i+1))
                    logging.info("You have to use LaTeX command to typeset the symbol.")
                    logging.info("For example, to typeset %s, you should use LaTeX command  \\neq ." % ('\u2260'))
                    logging.info("For now I will replace it with blank space.\n")
                    try:
                        os.system('pause')  #windows, doesn't require enter
                    except whatever_it_is:
                        os.system('read -p "Press any key to continue"') #linux
                output.write(y)

# moved to front
#             if cjk_ascii == 1 and cjk.match(str1[i]) and i == (linelength -2) and linei < (len(lines)-1):
#                     str2 = lines[linei+1]
#                     if cjk.match(str2[0]):
#                         output.write("\z%")

            i += 1
    writefont(fc)

#     file_name = re.findall(r'name=([\'\w\-\.]+)\s', str(source))
    logging.info('%s processed [%s]', file_name[0], str(linei+1))

    source.close()
    output.close()

# end of ctx_tex function


# Default: inputfile has file extension .ctx,
# May need to handle other cases here
# http://stackoverflow.com/questions/541390/extracting-extension-from-filename-in-python

def main():
    import argparse
    parser = argparse.ArgumentParser(description="cwtex preprocessor version 5.1")
    parser.add_argument("infile")

    parser.add_argument("-a", "--append", action="store_true", help="append xcinput.tex")
    parser.add_argument("-c", "--cjk_ascii", action="store_true", help="revise spacing between cjk and ascii")
    parser.add_argument("-l", "--latin", action="store_true", help="encoding option for latin-1")    
    parser.add_argument("-x", "--no_xcinput", action="store_true", help="do not insert xcinput.tex")    
    parser.add_argument("-z", "--cjk_cjk", action="store_true", help="insert space between cjk's")
    parser.add_argument("-o", "--outputname", help="speccify output filename")

    args = parser.parse_args()

    if args.infile:
        inputfile = args.infile
                           
    if not args.latin:     # 20180304                                                  
        luatex = 1         # if cwtex is run with -l option added, output latin-1 encoding
    else:                  # if no option, output utf-8 (default)
        luatex = 0         # 

    if os.path.isfile(inputfile + ".ctx"):
        inputfile = inputfile + ".ctx"

    if inputfile.split(".")[0] == inputfile:       # No file extension
        file_ctx = open(inputfile, encoding='utf-8')
        outputfile = inputfile + ".tex"
        xlogfile = inputfile + ".xlg"
        outputpdf = inputfile + ".pdf"
    elif inputfile.split(".")[1] == "tex":
        logging.info("\nError:\n Your file extension is .tex,")
        logging.info(" Please rename it to .ctx, and try again.\n")
        try:
            os.system('pause')  #windows, doesn't require enter
        except whatever_it_is:
            os.system('read -p "Press any key to continue"') #linux
        sys.exit()
    else:
        file_ctx = open(inputfile, encoding='utf-8')
        outputfile = inputfile.split(".")[0] + ".tex"
        outputpdf = inputfile.split(".")[0] + ".pdf"
        xlogfile = inputfile.split(".")[0] + ".xlg"

    biblist = []

    fontlist_ini = [["M", 0]]

# 2019.7.7
# If outputname extension is .bbl 
# the we are in the middle of processing biblatex, the temporary file is tmp21-tmp.bbl,
# then we check the original .ctx to see if it contains 
#
#  \newcommand{\bibcwfont}{r}
#
# if yes, then fontlist_ini = [["R", 0]]
#
# http://stackoverflow.com/questions/4940032/search-for-string-in-txt-file-python
# Note also that after line continuation \, there should be no [space]
# For now, only support for \ctxfr

    if args.outputname:
        if args.outputname == None:
            print("You forgot to give output file name.")
        else:
            outputfile = args.outputname
            if outputfile.split(".")[1] == "bbl":      ### processing cwbiblatex
                ctx_file = outputfile.split(".")[0] + ".ctx"

#                 print(inputfile)                     ### eg., temp21-tmp.bbl
#                 print(ctx_file)                      ### eg., examp4.ctx

                import mmap
                with open(ctx_file, 'rb', 0) as file, \
                    mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
                    if re.search(br'\\newcommand{\\bibcwfont}{r}', s):  
                        fontlist_ini = [["R", 0]]
                        
#                     if re.search(br'\\newcommand{\\bibcwfont}{r}', s) or re.search(br'\\ctxfdef{.bbl}{\\ctxfr}', s):  # The second element to be removed
#                         if not re.search(br'%\s*\\newcommand{\\bibcwfont}{r}', s):
#                             fontlist_ini = [["R", 0]]

    if luatex == 1:
        file_tex = open(outputfile, 'w', encoding='utf-8')
#        print("encoding utf-8")
    elif luatex == 0:
        file_tex = open(outputfile, 'w', encoding='latin-1')
#        print("encoding latini-1")        

    if args.append:
        file_xcinput = open('xcinput.tex', 'a', newline="\n")
    else:
        file_xcinput = open('xcinput.tex', 'w', newline="\n")

    xcinput_add = 0
    if args.no_xcinput:
        xcinput_add = 1

    cjk_ascii = 0
    if args.cjk_ascii:
        cjk_ascii = 1

    cjk_cjk = 1
    if args.cjk_cjk:
        cjk_cjk = 0

    file_xcinput.write('\\catcode+252=1 \\catcode+253=2 \\catcode+254=0 \\catcode+251=4\n')
    file_xcinput.write('\\providecommand{\\cH}{\\char}\n')
    if cjk_cjk == 1:
        file_xcinput.write('\\providecommand{\\z}{\\hskip 0.0pt plus0.2pt minus0.1pt}\n')
        file_xcinput.write('\\providecommand{\\zz}{\\hskip 0.6pt plus0.2pt minus0.1pt\\ignorespaces}\n')
        file_xcinput.write('\\providecommand{\\Z}{\\hskip 1.2pt plus0.4pt minus0.2pt}\n')
        file_xcinput.write('\\providecommand{\\zZ}{\\hskip 3.6pt plus1.2pt minus0.8pt}\n')

    curdir = os.getcwd()

    # set up logging to file
    logging.basicConfig(
         filename=xlogfile,
         level=logging.INFO,
         format= '%(message)s',
         datefmt='%H:%M:%S'
     )
# set up logging to console
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)

# set a format which is simpler for console use
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)

# add the handler to the root logger
    logging.getLogger('').addHandler(console)
    logger = logging.getLogger(__name__)

    logging.info('This is cwtex5, version 5.1 beta')
    logging.info('Copyright (c) Tsong-Min Wu and Tsong-Huey Wu')

    ctx_tex(file_ctx, file_tex, fontlist_ini, [], 0 ,0 ,0 ,[], [], [], "" ,"" , xcinput_add, "", 0, file_xcinput, cjk_ascii, cjk_cjk)

    logging.info("Done.\n")
    logging.info("%f seconds" % (time.time() - start_time))
    file_xcinput.close()

# Remove duplicate lines, but order will be changed.
#
    if args.append:
        lines = open('xcinput.tex', 'r').readlines()
        lines_set = set(lines)
        out  = open('xcinput.tex', 'w')
        for line in lines_set:
            out.write(line)
main()


# Reminder for ctx_tex function parameter:
#
#   source, output, fontlist, biblist, cur_macro, cur_env, cur_verb, bgn, c_env, c_macro, beg, en, filelevel, flist_str, flist_level, fc, cjk_ascii
#
# initial value
#   file_ctx, file_tex, [["M", 0]], [],  0    , 0      , 0,        [],  [],    [],      "",  "", 0, "", 0, fcinput.tex, 0


