Formulaires/Rules.py

212 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2016 Nathanaël Restori
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
import re
parser = argparse.ArgumentParser()
parser.add_argument('infile', nargs='+', type=argparse.FileType('r'))
args = parser.parse_args()
#TODO: list forbidden characters (|)
# \\\!|\\\;|\\\:|\\\,
# hphantom ?, hspace ?
# e^
# cte, text{, }, t.q.
rules_bef = [
# Add space before and after $ unless at the beginning or the end of a line, after a { or a ( and before a } or a )
{'symbol': r'(?<!^)\$', '+<': ' ' },
{'symbol': r'\$(?!$)', '+>': ' ' },
{'symbol': r'(?<=\( |{ )\$', '-<': r' ', },
{'symbol': r'\$(?= \)| })', '->': r' ', },
# No space after \text
{'symbol': r'\\text', '->': r' ' },
]
rules_math = [
# Add space around
{'symbol': r'=', '+<': ' ', '+>': ' ', },
{'symbol': r'\\cdot', '+<': ' ', '+>': ' ', },
{'symbol': r'\\quad', '+<': ' ', '+>': ' ', },
{'symbol': r'\\leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', },
{'symbol': r'\\Leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', },
{'symbol': r'\\Leftarrow', '+<': '\\quad ', '+>': ' \\quad', },
{'symbol': r'\\Rightarrow', '+<': '\\quad ', '+>': ' \\quad', },
# Standard functions
{'symbol': '(arc)?sinh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
{'symbol': '(arc)?cosh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
{'symbol': '(arc)?tanh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',},
{'symbol': '(?<!{)min(?!})', '+<': '\\', '+>': ' ', },
{'symbol': '(?<!{)max(?!})', '+<': '\\', '+>': ' ', },
{'symbol': 'ln', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',},
{'symbol': 'log', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',},
{'symbol': 'lim(?!its)', '+<': '\\', '+>': ' ', },
# \left or \right before delimiter and space after
{'symbol': r'\(', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\[', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\\{', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\\langle', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\)', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\]', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\\}', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\\rangle', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'\\\|', '+>': ' ', 'w!<': r'\\left|right', },
{'symbol': r'(?<!\\)\|', '+>': ' ', 'w!<': r'\\left|right', },
# Space before \left or \right but not after
{'symbol': r'\\left', '+<': ' ', '->': r' ', },
{'symbol': r'\\right', '+<': ' ', '->': r' ', },
# No space before ^, _ and !
{'symbol': r'\^', '-<': r' ', },
{'symbol': r'_', '-<': r' ', },
{'symbol': r'!', '-<': r' ', },
# No space after { and before } (but keep after \{ and after \}
{'symbol': r'(?<!\\){', '->': r' ', },
{'symbol': r'(?<!\\)}', '-<': r' ', },
]
rules_text = [
# Use non-breaking space before :
{'symbol': r':', '+<': '~', }, # Add space ?
{'symbol': r'~', '-<': r' ', '->': r' ', },
]
rules_end = [
# Correct spacing around punctuation.
{'symbol': r',', '+>': ' ', '-<': r' ', },
{'symbol': r';', '-<': r' ', }, # Do not add space, cause problems in [a;b]
# Remove trailing whitespaces
{'symbol': r'$', '-<': r'[ \t]*', },
]
# {} after ^ and _ ?
# \text{, } vs something else ?
# Ensure no cdot after partial frac ( frac{\partial U}{\partial \phi} \cdot)
def apply_rules(text, rules):
for s in rules:
if s.get('+<'):
regex = r'(?:' + re.escape(s.get('+<')) + r')?(' + s.get('symbol') + r')'
subst = s.get('+<').replace('\\', '\\\\') + r'\1'
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if s.get('+>'):
regex = r'(' + s.get('symbol') + r')(?:' + re.escape(s.get('+>')) + r')?'
subst = r'\1' + s.get('+>').replace('\\', '\\\\')
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if s.get('-<'):
regex = r'(?:' + s.get('-<') + r')(' + s.get('symbol') + r')'
subst = r'\1'
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if s.get('->'):
regex = r'(' + s.get('symbol') + r')(?:' + s.get('->') + r')'
subst = r'\1'
text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if s.get('w!<'):
regex = r'(?<!' + s.get('w!<') + r')(' + s.get('symbol') + r')'
# use findall
result = re.search(regex, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if result:
print("In file " + file_current.name + ": missing " + s.get('w!<') + " before " + s.get('symbol') + " (regex: " + regex + ")")
# Print what's around match
#print(text[result.start()-250:result.end()+250])
#print(text[result.start():result.end()])
#print(text[result.start()-1:result.end()+1])
# Print something like:
#(?<!\\left)\[(.*?)(?<!\\right)\]
#\left[\1\right]
if s.get('w!>'):
regex = r'(' + s.get('symbol') + r')(?!' + s.get('w!>') + r')'
# use findall
result = re.search(regex, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE)
if result:
print("In file " + file_current.name + ": missing " + s.get('w!>') + " after " + s.get('symbol') + " (regex: " + regex + ")")
# Print what's around match
print(text)
print(text[result.start()-250:result.end()+250])
print(text[result.start():result.end()])
print(text[result.start()-1:result.end()+1])
print(text[result.start()-10:result.end()+10])
return text
for file_current in args.infile:
file_content = file_current.read()
file_original = file_content
#TODO: add other cases (\$ for example)
## Check for $ in comments (we will have troubles if a comment contain an odd number of $)
#if re.search(r'%.*\$', file_content, flags=re.MULTILINE | re.UNICODE):
#print("Warning, file " + file_current.name + " contain $ in comments, ignoring file")
#continue
file_content = apply_rules(file_content, rules_bef)
splited = re.split(r'(\$.*?\$)', file_content, flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts
for i in range(1, len(splited), 2):
splited_b = re.split(r'(\\text{.*?})', splited[i], flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts
for j in range(0, len(splited_b), 2):
splited_b[j] = apply_rules(splited_b[j], rules_math)
splited[i] = ''.join(splited_b)
for i in range(0, len(splited), 2):
splited[i] = apply_rules(splited[i], rules_text)
file_content = ''.join(splited)
file_content = apply_rules(file_content, rules_end)
file_content = re.sub(r'\\left\\\| (.*?) \\right\\\|', r'\\norm{\1}', file_content, flags=re.MULTILINE)
file_content = re.sub(r'\\left\| (.*?) \\right\|', r'\\abs{\1}', file_content, flags=re.MULTILINE)
file_content = re.sub(r'\\left< (.*?) \\right>', r'\\left\\langle \1 \\right\\rangle}', file_content, flags=re.MULTILINE)
file_content = re.sub(r'\.\.\.', r'\\dots', file_content, flags=re.MULTILINE)
file_content = re.sub(r' \\newline\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
file_content = re.sub(r' \\\\\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
file_content = re.sub(r' \\newline\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
file_content = re.sub(r' \\\\\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
file_content = re.sub(r' \\\\\n( *)\\end\{tabu\}', r'\n\1\\end{tabu}', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell
file_content = re.sub(r'\\\\ +\\hline', r'\\\\\\hline', file_content, flags=re.MULTILINE) # Remove spaces between \\ and \hline
file_content = apply_rules(file_content, rules_end)
# Save only if needed
if file_original == file_content:
print("File untouched: " + file_current.name)
else:
print("File modified: " + file_current.name)
with open(file_current.name, "w") as f:
f.seek(0)
f.truncate()
f.write(file_content)