Friday, June 20, 2025

CICD

 stages:

  - prepare

  - generate_json

  - test_json

  - data_dictionary

  - metadata

  - finalize


default:

  image: python:3.11

  before_script:

    - pip install -r requirements.txt

    - mkdir -p artifacts


variables:

  EXCEL_INPUT: "data/source_metadata.xlsx"

  SAMPLE_JSON: "data/sample_data.json"

  OUTPUT_SCHEMA: "artifacts/generated_schema.json"

  DATA_DICTIONARY: "artifacts/data_dictionary.xlsx"

  STTM_OUTPUT: "artifacts/sttm.xlsx"

  CODE_COLUMNS_OUTPUT: "artifacts/code_columns.xlsx"


# Step 1: Convert Excel to JSON Schema

generate_schema:

  stage: generate_json

  script:

    - python scripts/excel_to_json_schema.py $EXCEL_INPUT $OUTPUT_SCHEMA

  artifacts:

    paths:

      - $OUTPUT_SCHEMA


# Step 2: Validate sample data against generated JSON Schema

test_schema_with_data:

  stage: test_json

  script:

    - python scripts/validate_json_sample.py $OUTPUT_SCHEMA $SAMPLE_JSON

  dependencies:

    - generate_schema


# Step 3: Generate Data Dictionary from JSON Schema

generate_data_dictionary:

  stage: data_dictionary

  script:

    - python scripts/json_to_data_dictionary.py $OUTPUT_SCHEMA $DATA_DICTIONARY

  dependencies:

    - generate_schema

  artifacts:

    paths:

      - $DATA_DICTIONARY


# Step 4: Generate STTM from Data Dictionary

generate_sttm:

  stage: metadata

  script:

    - python scripts/data_dictionary_to_sttm.py $DATA_DICTIONARY $STTM_OUTPUT

  dependencies:

    - generate_data_dictionary

  artifacts:

    paths:

      - $STTM_OUTPUT


# Step 5: Identify code-value columns

identify_code_columns:

  stage: finalize

  script:

    - python scripts/identify_code_columns.py $DATA_DICTIONARY $CODE_COLUMNS_OUTPUT

  dependencies:

    - generate_data_dictionary

  artifacts:

    paths:

      - $CODE_COLUMNS_OUTPUT


No comments:

Post a Comment