Python: Extract All Custom Code Into a Documentation File

This is some Python code that reads though a PHPMaker .pmp file and extracts out and documents in markdown all of the custom server/client code in the project. I placed it in a file called “extract_custom_scripts.py”. Usage:

python extract_custom_scripts.py MyPHPMakerProj.pmp

you will get a “MyPHPMakerProj_custom_scripts.md” file as a result:

import sys
import re
import os
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape

def extract_custom_scripts(phpmaker_file, output_file):
    try:
        # Read the file content
        with open(phpmaker_file, 'r', encoding='utf-8') as file:
            content = file.read()
        
        # Find all CustomScript tags using regex
        pattern = r'<CustomScript\s+(.*?)>(.*?)</CustomScript>'
        matches = re.findall(pattern, content, re.DOTALL)
        
        if not matches:
            print(f"No CustomScript elements found in {phpmaker_file}")
            return False
        
        # Write to documentation file
        with open(output_file, 'w', encoding='utf-8') as out:
            out.write(f"# Custom Scripts from {os.path.basename(phpmaker_file)}\n\n")
            
            for i, (attributes, script_content) in enumerate(matches, 1):
                # Extract attributes individually - safely handle missing attributes
                def get_attribute(attr_name):
                    match = re.search(f'{attr_name}="([^"]*)"', attributes)
                    return match.group(1) if match else "N/A"
                
                script_type = get_attribute("ScriptType")
                script_code_type = get_attribute("ScriptCodeType")
                script_name = get_attribute("ScriptName")
                script_ctrl_id = get_attribute("ScriptCtrlID")
                script_l1_key = get_attribute("ScriptL1Key")
                
                # Clean script content (remove CDATA markers if present)
                cleaned_content = script_content.strip()
                if cleaned_content.startswith("<![CDATA["):
                    cleaned_content = cleaned_content[9:]
                if cleaned_content.endswith("]]>"):
                    cleaned_content = cleaned_content[:-3]
                
                # Write the script metadata and content
                out.write(f"- **Script Type:** {script_type}\n")
                out.write(f"- **Script Code Type:** {script_code_type}\n")
                out.write(f"- **Script Name:** {script_name}\n")
                
                # Only include these attributes if they're present
                if script_ctrl_id != "N/A":
                    out.write(f"- **Script Ctrl ID:** {script_ctrl_id}\n")
                if script_l1_key != "N/A":
                    out.write(f"- **Script L1 Key:** {script_l1_key}\n")
                
                out.write("\n### Code\n\n")
                out.write(cleaned_content)
                out.write("\n\n------------\n\n")
            
            print(f"Successfully extracted {len(matches)} CustomScript elements to {output_file}")
            return True
            
    except Exception as e:
        print(f"Error: {str(e)}")
        return False

def main():
    if len(sys.argv) != 2:
        print("Usage: python extract_custom_scripts.py <phpmaker_file>")
        sys.exit(1)
    
    phpmaker_file = sys.argv[1]
    output_file = os.path.splitext(phpmaker_file)[0] + "_custom_scripts.md"
    
    if not os.path.exists(phpmaker_file):
        print(f"Error: File '{phpmaker_file}' not found.")
        sys.exit(1)
    
    if extract_custom_scripts(phpmaker_file, output_file):
        print(f"Documentation file created: {output_file}")
    else:
        print("Failed to create documentation file.")
        sys.exit(1)

if __name__ == "__main__":
    main()
2 Likes