Saturday, May 31, 2025

Excel to JSON Schema with Primary Key Indicator

 import pandas as pd

import json

from collections import defaultdict


def excel_to_json_schema_with_primary_key(excel_path, output_schema_path="Output_Schema.json"):

    df = pd.read_excel(excel_path)


    # Fill NaNs

    df = df.fillna("")


    # Normalize keys

    def normalize(s):

        return s.strip() if isinstance(s, str) else s


    df.columns = [normalize(col) for col in df.columns]


    root = {

        "type": "object",

        "properties": {},

        "required": []

    }


    nested_objects = defaultdict(dict)


    for _, row in df.iterrows():

        column_name = row.get("Column Name", "").strip()

        if not column_name:

            continue


        parts = column_name.split(".")

        group = row.get("Group", "").strip()

        current = root

        props = current["properties"]


        # Traverse/create nested groups

        for i, part in enumerate(parts):

            if i == len(parts) - 1:

                # Final leaf

                schema_prop = {

                    "type": row.get("Type", "").strip().lower()

                }


                if row.get("Title"):

                    schema_prop["title"] = row["Title"]


                if row.get("Description"):

                    schema_prop["description"] = row["Description"]


                if row.get("MaxLength") and schema_prop["type"] == "string":

                    schema_prop["maxLength"] = int(row["MaxLength"])


                if row.get("Enum"):

                    schema_prop["enum"] = [e.strip() for e in str(row["Enum"]).split(",")]


                if str(row.get("Primary Key", "")).strip().lower() == "yes":

                    schema_prop["primaryKey"] = True


                # Add required field

                if str(row.get("Required", "")).strip().lower() == "yes":

                    if "required" not in current:

                        current["required"] = []

                    current["required"].append(part)


                props[part] = schema_prop


            else:

                # Intermediate nested object

                if part not in props:

                    props[part] = {

                        "type": "object",

                        "properties": {}

                    }

                current = props[part]

                props = current["properties"]


    with open(output_schema_path, "w") as f:

        json.dump(root, f, indent=2)


    print(f"✅ JSON Schema written to {output_schema_path}")


if __name__ == "__main__":

    excel_to_json_schema_with_primary_key("schema_metadata.xlsx")


No comments:

Post a Comment