212 lines
13 KiB
Python
Executable File
212 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# MIT License
|
|
#
|
|
# Copyright (c) 2016 Nathanaël Restori
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
import argparse
|
|
import re
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('infile', nargs='+', type=argparse.FileType('r'))
|
|
args = parser.parse_args()
|
|
|
|
#TODO: list forbidden characters (|)
|
|
# \\\!|\\\;|\\\:|\\\,
|
|
# hphantom ?, hspace ?
|
|
# e^
|
|
# cte, text{, }, t.q.
|
|
|
|
rules_bef = [
|
|
# Add space before and after $ unless at the beginning or the end of a line, after a { or a ( and before a } or a )
|
|
{'symbol': r'(?<!^)\$', '+<': ' ' },
|
|
{'symbol': r'\$(?!$)', '+>': ' ' },
|
|
{'symbol': r'(?<=\( |{ )\$', '-<': r' ', },
|
|
{'symbol': r'\$(?= \)| })', '->': r' ', },
|
|
# No space after \text
|
|
{'symbol': r'\\text', '->': r' ' },
|
|
]
|
|
|
|
rules_math = [
|
|
# Add space around
|
|
{'symbol': r'=', '+<': ' ', '+>': ' ', },
|
|
{'symbol': r'\\cdot', '+<': ' ', '+>': ' ', },
|
|
{'symbol': r'\\quad', '+<': ' ', '+>': ' ', },
|
|
{'symbol': r'\\leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', },
|
|
{'symbol': r'\\Leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', },
|
|
{'symbol': r'\\Leftarrow', '+<': '\\quad ', '+>': ' \\quad', },
|
|
{'symbol': r'\\Rightarrow', '+<': '\\quad ', '+>': ' \\quad', },
|
|
# Standard functions
|
|
{'symbol': '(arc)?sinh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
|
|
{'symbol': '(arc)?cosh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
|
|
{'symbol': '(arc)?tanh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
|
|
{'symbol': '(?<!{)min(?!})', '+<': '\\', '+>': ' ', },
|
|
{'symbol': '(?<!{)max(?!})', '+<': '\\', '+>': ' ', },
|
|
{'symbol': 'ln', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',},
|
|
{'symbol': 'log', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',},
|
|
{'symbol': 'lim(?!its)', '+<': '\\', '+>': ' ', },
|
|
# \left or \right before delimiter and space after
|
|
{'symbol': r'\(', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\[', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\\{', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\\langle', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\)', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\]', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\\}', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\\rangle', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'\\\|', '+>': ' ', 'w!<': r'\\left|right', },
|
|
{'symbol': r'(?<!\\)\|', '+>': ' ', 'w!<': r'\\left|right', },
|
|
# Space before \left or \right but not after
|
|
{'symbol': r'\\left', '+<': ' ', '->': r' ', },
|
|
{'symbol': r'\\right', '+<': ' ', '->': r' ', },
|
|
# No space before ^, _ and !
|
|
{'symbol': r'\^', '-<': r' ', },
|
|
{'symbol': r'_', '-<': r' ', },
|
|
{'symbol': r'!', '-<': r' ', },
|
|
# No space after { and before } (but keep after \{ and after \}
|
|
{'symbol': r'(?<!\\){', '->': r' ', },
|
|
{'symbol': r'(?<!\\)}', '-<': r' ', },
|
|
]
|
|
|
|
rules_text = [
|
|
# Use non-breaking space before :
|
|
{'symbol': r':', '+<': '~', }, # Add space ?
|
|
{'symbol': r'~', '-<': r' ', '->': r' ', },
|
|
]
|
|
|
|
rules_end = [
|
|
# Correct spacing around punctuation.
|
|
{'symbol': r',', '+>': ' ', '-<': r' ', },
|
|
{'symbol': r';', '-<': r' ', }, # Do not add space, cause problems in [a;b]
|
|
# Remove trailing whitespaces
|
|
{'symbol': r'$', '-<': r'[ \t]*', },
|
|
]
|
|
|
|
# {} after ^ and _ ?
|
|
# \text{, } vs something else ?
|
|
# Ensure no cdot after partial frac ( frac{\partial U}{\partial \phi} \cdot)
|
|
|
|
def apply_rules(text, rules):
|
|
for s in rules:
|
|
if s.get('+<'):
|
|
regex = r'(?:' + re.escape(s.get('+<')) + r')?(' + s.get('symbol') + r')'
|
|
subst = s.get('+<').replace('\\', '\\\\') + r'\1'
|
|
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
|
|
if s.get('+>'):
|
|
regex = r'(' + s.get('symbol') + r')(?:' + re.escape(s.get('+>')) + r')?'
|
|
subst = r'\1' + s.get('+>').replace('\\', '\\\\')
|
|
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
|
|
if s.get('-<'):
|
|
regex = r'(?:' + s.get('-<') + r')(' + s.get('symbol') + r')'
|
|
subst = r'\1'
|
|
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
|
|
if s.get('->'):
|
|
regex = r'(' + s.get('symbol') + r')(?:' + s.get('->') + r')'
|
|
subst = r'\1'
|
|
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
|
|
if s.get('w!<'):
|
|
regex = r'(?<!' + s.get('w!<') + r')(' + s.get('symbol') + r')'
|
|
# use findall
|
|
result = re.search(regex, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
if result:
|
|
print("In file " + file_current.name + ": missing " + s.get('w!<') + " before " + s.get('symbol') + " (regex: " + regex + ")")
|
|
# Print what's around match
|
|
#print(text[result.start()-250:result.end()+250])
|
|
#print(text[result.start():result.end()])
|
|
#print(text[result.start()-1:result.end()+1])
|
|
|
|
# Print something like:
|
|
#(?<!\\left)\[(.*?)(?<!\\right)\]
|
|
#\left[\1\right]
|
|
|
|
if s.get('w!>'):
|
|
regex = r'(' + s.get('symbol') + r')(?!' + s.get('w!>') + r')'
|
|
# use findall
|
|
result = re.search(regex, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
|
|
if result:
|
|
print("In file " + file_current.name + ": missing " + s.get('w!>') + " after " + s.get('symbol') + " (regex: " + regex + ")")
|
|
# Print what's around match
|
|
print(text)
|
|
print(text[result.start()-250:result.end()+250])
|
|
print(text[result.start():result.end()])
|
|
print(text[result.start()-1:result.end()+1])
|
|
print(text[result.start()-10:result.end()+10])
|
|
|
|
return text
|
|
|
|
for file_current in args.infile:
|
|
file_content = file_current.read()
|
|
file_original = file_content
|
|
|
|
#TODO: add other cases (\$ for example)
|
|
## Check for $ in comments (we will have troubles if a comment contain an odd number of $)
|
|
#if re.search(r'%.*\$', file_content, flags=re.MULTILINE | re.UNICODE):
|
|
#print("Warning, file " + file_current.name + " contain $ in comments, ignoring file")
|
|
#continue
|
|
|
|
file_content = apply_rules(file_content, rules_bef)
|
|
|
|
splited = re.split(r'(\$.*?\$)', file_content, flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts
|
|
|
|
for i in range(1, len(splited), 2):
|
|
splited_b = re.split(r'(\\text{.*?})', splited[i], flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts
|
|
for j in range(0, len(splited_b), 2):
|
|
splited_b[j] = apply_rules(splited_b[j], rules_math)
|
|
splited[i] = ''.join(splited_b)
|
|
|
|
for i in range(0, len(splited), 2):
|
|
splited[i] = apply_rules(splited[i], rules_text)
|
|
|
|
file_content = ''.join(splited)
|
|
|
|
file_content = apply_rules(file_content, rules_end)
|
|
|
|
file_content = re.sub(r'\\left\\\| (.*?) \\right\\\|', r'\\norm{\1}', file_content, flags=re.MULTILINE)
|
|
file_content = re.sub(r'\\left\| (.*?) \\right\|', r'\\abs{\1}', file_content, flags=re.MULTILINE)
|
|
file_content = re.sub(r'\\left< (.*?) \\right>', r'\\left\\langle \1 \\right\\rangle}', file_content, flags=re.MULTILINE)
|
|
|
|
file_content = re.sub(r'\.\.\.', r'\\dots', file_content, flags=re.MULTILINE)
|
|
|
|
file_content = re.sub(r' \\newline\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
|
|
file_content = re.sub(r' \\\\\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
|
|
file_content = re.sub(r' \\newline\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
|
|
file_content = re.sub(r' \\\\\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
|
|
file_content = re.sub(r' \\\\\n( *)\\end\{tabu\}', r'\n\1\\end{tabu}', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
|
|
|
|
file_content = re.sub(r'\\\\ +\\hline', r'\\\\\\hline', file_content, flags=re.MULTILINE) # Remove spaces between \\ and \hline
|
|
|
|
file_content = apply_rules(file_content, rules_end)
|
|
|
|
# Save only if needed
|
|
if file_original == file_content:
|
|
print("File untouched: " + file_current.name)
|
|
else:
|
|
print("File modified: " + file_current.name)
|
|
with open(file_current.name, "w") as f:
|
|
f.seek(0)
|
|
f.truncate()
|
|
f.write(file_content)
|
|
|