Saturday, May 31, 2025

JSON Schema to Excel Metadata with PK infor as well

 import json

import pandas as pd


def flatten_schema(schema, group="", parent_key=""):

    """Recursively flatten the schema to extract column metadata."""

    rows = []

    properties = schema.get("properties", {})

    required = schema.get("required", [])


    for prop, details in properties.items():

        full_key = f"{parent_key}.{prop}" if parent_key else prop

        entry = {

            "Column Name": full_key,

            "Type": details.get("type", ""),

            "Title": details.get("title", ""),

            "Description": details.get("description", ""),

            "MaxLength": details.get("maxLength", ""),

            "Enum": ", ".join(details.get("enum", [])) if "enum" in details else "",

            "Group": group,

            "Primary Key": "Yes" if details.get("primaryKey", False) else "No"

        }


        if details.get("type") == "object":

            nested_group = full_key

            rows += flatten_schema(details, group=nested_group, parent_key=full_key)

        elif details.get("type") == "array" and "items" in details and details["items"].get("type") == "object":

            nested_group = full_key

            rows += flatten_schema(details["items"], group=nested_group, parent_key=full_key)

        else:

            rows.append(entry)


    return rows


def json_schema_to_excel(schema_path, output_excel="schema_metadata.xlsx"):

    with open(schema_path, "r") as f:

        schema = json.load(f)


    rows = flatten_schema(schema)

    df = pd.DataFrame(rows)

    df.to_excel(output_excel, index=False)

    print(f"✅ Metadata written to Excel: {output_excel}")


if __name__ == "__main__":

    json_schema_to_excel("Output_Schema.json")


No comments:

Post a Comment