#!/usr/bin/env python3 # MIT License # # Copyright (c) 2016 Nathanaƫl Restori # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import argparse import re parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='+', type=argparse.FileType('r')) args = parser.parse_args() #TODO: list forbidden characters (|) # \\\!|\\\;|\\\:|\\\, # hphantom ?, hspace ? # e^ # cte, text{, }, t.q. rules_bef = [ # Add space before and after $ unless at the beginning or the end of a line, after a { or a ( and before a } or a ) {'symbol': r'(?': ' ' }, {'symbol': r'(?<=\( |{ )\$', '-<': r' ', }, {'symbol': r'\$(?= \)| })', '->': r' ', }, # No space after \text {'symbol': r'\\text', '->': r' ' }, ] rules_math = [ # Add space around {'symbol': r'=', '+<': ' ', '+>': ' ', }, {'symbol': r'\\cdot', '+<': ' ', '+>': ' ', }, {'symbol': r'\\quad', '+<': ' ', '+>': ' ', }, {'symbol': r'\\leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', }, {'symbol': r'\\Leftrightarrow', '+<': '\\quad ', '+>': ' \\quad', }, {'symbol': r'\\Leftarrow', '+<': '\\quad ', '+>': ' \\quad', }, {'symbol': r'\\Rightarrow', '+<': '\\quad ', '+>': ' \\quad', }, # Standard functions {'symbol': '(arc)?sinh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',}, {'symbol': '(arc)?cosh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',}, {'symbol': '(arc)?tanh?', '+<': '\\', '+>': ' ', 'w!>': r'h? ?(?:\^(?:{.*}|.))? \\left',}, {'symbol': '(?': ' ', }, {'symbol': '(?': ' ', }, {'symbol': 'ln', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',}, {'symbol': 'log', '+<': '\\', '+>': ' ', 'w!>': r' ?(?:_(?:{.*}|.))? \\left| \\abs',}, {'symbol': 'lim(?!its)', '+<': '\\', '+>': ' ', }, # \left or \right before delimiter and space after {'symbol': r'\(', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\[', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\\{', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\\langle', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\)', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\]', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\\}', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\\rangle', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'\\\|', '+>': ' ', 'w!<': r'\\left|right', }, {'symbol': r'(?': ' ', 'w!<': r'\\left|right', }, # Space before \left or \right but not after {'symbol': r'\\left', '+<': ' ', '->': r' ', }, {'symbol': r'\\right', '+<': ' ', '->': r' ', }, # No space before ^, _ and ! {'symbol': r'\^', '-<': r' ', }, {'symbol': r'_', '-<': r' ', }, {'symbol': r'!', '-<': r' ', }, # No space after { and before } (but keep after \{ and after \} {'symbol': r'(?': r' ', }, {'symbol': r'(?': r' ', }, ] rules_end = [ # Correct spacing around punctuation. {'symbol': r',', '+>': ' ', '-<': r' ', }, {'symbol': r';', '-<': r' ', }, # Do not add space, cause problems in [a;b] # Remove trailing whitespaces {'symbol': r'$', '-<': r'[ \t]*', }, ] # {} after ^ and _ ? # \text{, } vs something else ? # Ensure no cdot after partial frac ( frac{\partial U}{\partial \phi} \cdot) def apply_rules(text, rules): for s in rules: if s.get('+<'): regex = r'(?:' + re.escape(s.get('+<')) + r')?(' + s.get('symbol') + r')' subst = s.get('+<').replace('\\', '\\\\') + r'\1' text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE) if s.get('+>'): regex = r'(' + s.get('symbol') + r')(?:' + re.escape(s.get('+>')) + r')?' subst = r'\1' + s.get('+>').replace('\\', '\\\\') text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE) if s.get('-<'): regex = r'(?:' + s.get('-<') + r')(' + s.get('symbol') + r')' subst = r'\1' text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE) if s.get('->'): regex = r'(' + s.get('symbol') + r')(?:' + s.get('->') + r')' subst = r'\1' text = re.sub(regex, subst, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE) if s.get('w!<'): regex = r'(?'): regex = r'(' + s.get('symbol') + r')(?!' + s.get('w!>') + r')' # use findall result = re.search(regex, text, flags=re.MULTILINE | re.DOTALL | re.UNICODE) if result: print("In file " + file_current.name + ": missing " + s.get('w!>') + " after " + s.get('symbol') + " (regex: " + regex + ")") # Print what's around match print(text) print(text[result.start()-250:result.end()+250]) print(text[result.start():result.end()]) print(text[result.start()-1:result.end()+1]) print(text[result.start()-10:result.end()+10]) return text for file_current in args.infile: file_content = file_current.read() file_original = file_content #TODO: add other cases (\$ for example) ## Check for $ in comments (we will have troubles if a comment contain an odd number of $) #if re.search(r'%.*\$', file_content, flags=re.MULTILINE | re.UNICODE): #print("Warning, file " + file_current.name + " contain $ in comments, ignoring file") #continue file_content = apply_rules(file_content, rules_bef) splited = re.split(r'(\$.*?\$)', file_content, flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts for i in range(1, len(splited), 2): splited_b = re.split(r'(\\text{.*?})', splited[i], flags=re.MULTILINE | re.DOTALL | re.UNICODE) # Split file content in math parts and normal parts for j in range(0, len(splited_b), 2): splited_b[j] = apply_rules(splited_b[j], rules_math) splited[i] = ''.join(splited_b) for i in range(0, len(splited), 2): splited[i] = apply_rules(splited[i], rules_text) file_content = ''.join(splited) file_content = apply_rules(file_content, rules_end) file_content = re.sub(r'\\left\\\| (.*?) \\right\\\|', r'\\norm{\1}', file_content, flags=re.MULTILINE) file_content = re.sub(r'\\left\| (.*?) \\right\|', r'\\abs{\1}', file_content, flags=re.MULTILINE) file_content = re.sub(r'\\left< (.*?) \\right>', r'\\left\\langle \1 \\right\\rangle}', file_content, flags=re.MULTILINE) file_content = re.sub(r'\.\.\.', r'\\dots', file_content, flags=re.MULTILINE) file_content = re.sub(r' \\newline\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell file_content = re.sub(r' \\\\\n&', r'\n&', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell file_content = re.sub(r' \\newline\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell file_content = re.sub(r' \\\\\n\\\\', r'\n\\\\', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell file_content = re.sub(r' \\\\\n( *)\\end\{tabu\}', r'\n\1\\end{tabu}', file_content, flags=re.MULTILINE) # Ensure no newline at the end of a cell file_content = re.sub(r'\\\\ +\\hline', r'\\\\\\hline', file_content, flags=re.MULTILINE) # Remove spaces between \\ and \hline file_content = apply_rules(file_content, rules_end) # Save only if needed if file_original == file_content: print("File untouched: " + file_current.name) else: print("File modified: " + file_current.name) with open(file_current.name, "w") as f: f.seek(0) f.truncate() f.write(file_content)