import pandas as pd
import json
from collections import defaultdict
def excel_to_json_schema_with_primary_key(excel_path, output_schema_path="Output_Schema.json"):
df = pd.read_excel(excel_path)
# Fill NaNs
df = df.fillna("")
# Normalize keys
def normalize(s):
return s.strip() if isinstance(s, str) else s
df.columns = [normalize(col) for col in df.columns]
root = {
"type": "object",
"properties": {},
"required": []
}
nested_objects = defaultdict(dict)
for _, row in df.iterrows():
column_name = row.get("Column Name", "").strip()
if not column_name:
continue
parts = column_name.split(".")
group = row.get("Group", "").strip()
current = root
props = current["properties"]
# Traverse/create nested groups
for i, part in enumerate(parts):
if i == len(parts) - 1:
# Final leaf
schema_prop = {
"type": row.get("Type", "").strip().lower()
}
if row.get("Title"):
schema_prop["title"] = row["Title"]
if row.get("Description"):
schema_prop["description"] = row["Description"]
if row.get("MaxLength") and schema_prop["type"] == "string":
schema_prop["maxLength"] = int(row["MaxLength"])
if row.get("Enum"):
schema_prop["enum"] = [e.strip() for e in str(row["Enum"]).split(",")]
if str(row.get("Primary Key", "")).strip().lower() == "yes":
schema_prop["primaryKey"] = True
# Add required field
if str(row.get("Required", "")).strip().lower() == "yes":
if "required" not in current:
current["required"] = []
current["required"].append(part)
props[part] = schema_prop
else:
# Intermediate nested object
if part not in props:
props[part] = {
"type": "object",
"properties": {}
}
current = props[part]
props = current["properties"]
with open(output_schema_path, "w") as f:
json.dump(root, f, indent=2)
print(f"✅ JSON Schema written to {output_schema_path}")
if __name__ == "__main__":
excel_to_json_schema_with_primary_key("schema_metadata.xlsx")
No comments:
Post a Comment