Validate Office XML
Validate Office document XML files against XSD schemas and tracked changes
Source Code
#!/usr/bin/env python3
"""
Command line tool to validate Office document XML files against XSD schemas and tracked changes.
Usage:
python validate.py <dir> --original <original_file>
"""
metadata = {
"id": "code:document.ooxml.validate",
"name": "Validate Office XML",
"description": "Validate Office document XML files against XSD schemas and tracked changes",
"language": "python",
"packages": [],
"args": [
{"name": "unpacked_dir", "type": "string", "description": "Path to unpacked Office document directory", "position": 0},
{"name": "--original", "type": "string", "description": "Path to original file (.docx/.pptx/.xlsx)", "position": 1}
]
}
import argparse
import sys
from pathlib import Path
# Validation module bullets (resolved to sibling files in scripts/ at runtime)
# code:document.validation.init, code:document.validation.base,
# code:document.validation.docx, code:document.validation.pptx, code:document.validation.redlining
from validation import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
def main():
parser = argparse.ArgumentParser(description="Validate Office document XML files")
parser.add_argument(
"unpacked_dir",
help="Path to unpacked Office document directory",
)
parser.add_argument(
"--original",
required=True,
help="Path to original file (.docx/.pptx/.xlsx)",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose output",
)
args = parser.parse_args()
# Validate paths
unpacked_dir = Path(args.unpacked_dir)
original_file = Path(args.original)
file_extension = original_file.suffix.lower()
assert unpacked_dir.is_dir(), f"Error: {unpacked_dir} is not a directory"
assert original_file.is_file(), f"Error: {original_file} is not a file"
assert file_extension in [".docx", ".pptx", ".xlsx"], (
f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
)
# Run validations
match file_extension:
case ".docx":
validators = [DOCXSchemaValidator, RedliningValidator]
case ".pptx":
validators = [PPTXSchemaValidator]
case _:
print(f"Error: Validation not supported for file type {file_extension}")
sys.exit(1)
# Run validators
success = True
for V in validators:
validator = V(unpacked_dir, original_file, verbose=args.verbose)
if not validator.validate():
success = False
if success:
print("All validations PASSED!")
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()