diff --git a/src/agentready/assessors/__init__.py b/src/agentready/assessors/__init__.py index d68ea971..6d0124c5 100644 --- a/src/agentready/assessors/__init__.py +++ b/src/agentready/assessors/__init__.py @@ -13,6 +13,12 @@ TypeAnnotationsAssessor, ) from .containers import ContainerSetupAssessor +from .dbt import ( + DbtDataTestsAssessor, + DbtModelDocumentationAssessor, + DbtProjectConfigAssessor, + DbtProjectStructureAssessor, +) from .documentation import ( ArchitectureDecisionsAssessor, CLAUDEmdAssessor, @@ -47,7 +53,7 @@ def create_all_assessors() -> list[BaseAssessor]: - """Create all 25 assessors for assessment. + """Create all assessors for assessment. Centralized factory function to eliminate duplication across CLI commands. Returns all implemented and stub assessors. @@ -56,14 +62,16 @@ def create_all_assessors() -> list[BaseAssessor]: List of all assessor instances """ assessors = [ - # Tier 1 Essential (6 assessors - up from 5) + # Tier 1 Essential (10 assessors - up from 6) CLAUDEmdAssessor(), READMEAssessor(), TypeAnnotationsAssessor(), StandardLayoutAssessor(), DependencyPinningAssessor(), # Renamed from LockFilesAssessor DependencySecurityAssessor(), # NEW: Merged dependency_freshness + security_scanning - # Tier 2 Critical (10 assessors - 7 implemented, 3 stubs) + DbtProjectConfigAssessor(), # NEW: dbt project configuration + DbtModelDocumentationAssessor(), # NEW: dbt model documentation + # Tier 2 Critical (12 assessors - up from 10) TestCoverageAssessor(), PreCommitHooksAssessor(), ConventionalCommitsAssessor(), @@ -74,6 +82,8 @@ def create_all_assessors() -> list[BaseAssessor]: ConciseDocumentationAssessor(), InlineDocumentationAssessor(), CyclomaticComplexityAssessor(), # Actually Tier 3, but including here + DbtDataTestsAssessor(), # NEW: dbt data tests + DbtProjectStructureAssessor(), # NEW: dbt project structure # Tier 3 Important (7 implemented) ArchitectureDecisionsAssessor(), IssuePRTemplatesAssessor(), diff --git a/src/agentready/assessors/dbt.py b/src/agentready/assessors/dbt.py new file mode 100644 index 00000000..a319378c --- /dev/null +++ b/src/agentready/assessors/dbt.py @@ -0,0 +1,779 @@ +"""dbt (data build tool) SQL repository assessors. + +Evaluates dbt projects against core best practices for AI-assisted development. +Covers project configuration, documentation, testing, and structure. +""" + +from pathlib import Path + +import yaml + +from ..models.attribute import Attribute +from ..models.finding import Citation, Finding, Remediation +from ..models.repository import Repository +from .base import BaseAssessor + +# ============================================================================ +# Shared Utility Functions +# ============================================================================ + + +def _is_dbt_project(repository: Repository) -> bool: + """Check if repository is a dbt project. + + Args: + repository: Repository entity + + Returns: + True if dbt_project.yml exists at repository root + """ + return (repository.path / "dbt_project.yml").exists() + + +def _find_yaml_files(directory: Path, pattern: str = "*.yml") -> list[Path]: + """Find YAML files matching pattern recursively. + + Args: + directory: Directory to search + pattern: Glob pattern (default: "*.yml") + + Returns: + List of matching .yml and .yaml file paths + """ + yml_files = list(directory.rglob(pattern)) + yaml_files = list(directory.rglob(pattern.replace("yml", "yaml"))) + return yml_files + yaml_files + + +def _parse_yaml_safe(path: Path) -> dict: + """Parse YAML file with error handling. + + Args: + path: Path to YAML file + + Returns: + Parsed YAML content as dict, or empty dict on error + """ + try: + with open(path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except Exception: + return {} + + +# ============================================================================ +# Tier 1 Essential Assessors (20% total weight) +# ============================================================================ + + +class DbtProjectConfigAssessor(BaseAssessor): + """Assesses dbt_project.yml configuration validity. + + Tier 1 Essential (10% weight) - Without valid configuration, dbt won't run. + """ + + @property + def attribute_id(self) -> str: + return "dbt_project_config" + + @property + def tier(self) -> int: + return 1 # Essential + + @property + def attribute(self) -> Attribute: + return Attribute( + id=self.attribute_id, + name="dbt Project Configuration", + category="dbt SQL Projects", + tier=self.tier, + description="Valid dbt_project.yml with required fields", + criteria="dbt_project.yml exists with name, config-version, profile", + default_weight=0.10, + ) + + def is_applicable(self, repository: Repository) -> bool: + """Applicable only to dbt projects.""" + return _is_dbt_project(repository) + + def assess(self, repository: Repository) -> Finding: + """Check for valid dbt_project.yml configuration. + + Pass criteria: + - dbt_project.yml exists at repository root + - Contains required fields: name, config-version, profile + - Has model-paths configured (default: ["models"]) + + Scoring: Binary (100 if valid, 0 if missing/invalid) + """ + dbt_project_path = repository.path / "dbt_project.yml" + + # Check file exists + if not dbt_project_path.exists(): + return Finding( + attribute=self.attribute, + status="fail", + score=0.0, + measured_value="missing", + threshold="valid dbt_project.yml", + evidence=["dbt_project.yml not found at repository root"], + remediation=self._create_remediation(), + error_message=None, + ) + + # Parse YAML + config = _parse_yaml_safe(dbt_project_path) + + if not config: + return Finding.error( + self.attribute, + reason="Could not parse dbt_project.yml (invalid YAML syntax)", + ) + + # Check required fields + required_fields = { + "name": config.get("name"), + "config-version": config.get("config-version"), + "profile": config.get("profile"), + } + + missing_fields = [ + field for field, value in required_fields.items() if not value + ] + + # Check optional but recommended fields + has_model_paths = bool(config.get("model-paths")) + has_dbt_version = bool(config.get("require-dbt-version")) + + if missing_fields: + return Finding( + attribute=self.attribute, + status="fail", + score=0.0, + measured_value=f"missing fields: {', '.join(missing_fields)}", + threshold="all required fields present", + evidence=[ + "dbt_project.yml found but missing required fields:", + *[f" - {field}: ✗" for field in missing_fields], + ], + remediation=self._create_remediation(), + error_message=None, + ) + + # Valid configuration + evidence = [ + "dbt_project.yml found with all required fields:", + f" - name: {config.get('name')}", + f" - config-version: {config.get('config-version')}", + f" - profile: {config.get('profile')}", + ] + + if has_model_paths: + evidence.append(f" - model-paths: {config.get('model-paths')}") + + if has_dbt_version: + evidence.append( + f" - require-dbt-version: {config.get('require-dbt-version')} (reproducibility)" + ) + + return Finding( + attribute=self.attribute, + status="pass", + score=100.0, + measured_value="valid configuration", + threshold="all required fields present", + evidence=evidence, + remediation=None, + error_message=None, + ) + + def _create_remediation(self) -> Remediation: + """Create remediation guidance for invalid dbt_project.yml.""" + return Remediation( + summary="Create valid dbt_project.yml with required fields", + steps=[ + "Create dbt_project.yml in repository root", + "Add required fields: name, config-version, profile", + "Configure model-paths (recommended: ['models'])", + "Add require-dbt-version for reproducibility", + "Run 'dbt debug' to validate configuration", + ], + tools=["dbt-core"], + commands=[ + "dbt init # Create new dbt project", + "dbt debug # Validate dbt_project.yml configuration", + ], + examples=["""# Minimal valid dbt_project.yml +name: 'my_dbt_project' +config-version: 2 +profile: 'default' + +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] + +# Reproducibility (recommended) +require-dbt-version: ">=1.0.0" + +# Model configuration +models: + my_dbt_project: + materialized: view"""], + citations=[ + Citation( + source="dbt Labs Documentation", + title="dbt_project.yml Reference", + url="https://docs.getdbt.com/reference/dbt_project.yml", + relevance="Official dbt configuration reference", + ) + ], + ) + + +class DbtModelDocumentationAssessor(BaseAssessor): + """Assesses dbt model documentation coverage. + + Tier 1 Essential (10% weight) - Critical for AI understanding model purpose and lineage. + """ + + @property + def attribute_id(self) -> str: + return "dbt_model_documentation" + + @property + def tier(self) -> int: + return 1 # Essential + + @property + def attribute(self) -> Attribute: + return Attribute( + id=self.attribute_id, + name="dbt Model Documentation", + category="dbt SQL Projects", + tier=self.tier, + description="Model descriptions in schema YAML files", + criteria="≥80% of models have descriptions in schema.yml", + default_weight=0.10, + ) + + def is_applicable(self, repository: Repository) -> bool: + """Applicable only to dbt projects.""" + return _is_dbt_project(repository) + + def assess(self, repository: Repository) -> Finding: + """Check dbt model documentation coverage. + + Pass criteria: + - schema.yml or _models.yml files exist in models/ subdirectories + - Models have descriptions (not empty/placeholder text) + - ≥80% of models documented + + Scoring: Proportional + - 100% if ≥80% models documented + - 50% if ≥50% models documented + - 0% if <25% models documented + """ + models_dir = repository.path / "models" + + if not models_dir.exists(): + return Finding( + attribute=self.attribute, + status="fail", + score=0.0, + measured_value="no models/ directory", + threshold="≥80% models documented", + evidence=["models/ directory not found"], + remediation=self._create_remediation(), + error_message=None, + ) + + # Count total SQL models + sql_files = list(models_dir.rglob("*.sql")) + total_models = len(sql_files) + + if total_models == 0: + return Finding.not_applicable( + self.attribute, reason="No SQL models found in models/" + ) + + # Find and parse schema YAML files (any .yml/.yaml file in models/) + # dbt supports multiple naming conventions: schema.yml, _models.yml, or one file per model + schema_files = _find_yaml_files(models_dir, "*.yml") + + # Extract documented model names + documented_models = set() + placeholder_texts = {"todo", "tbd", "fixme", "placeholder", "description"} + + for schema_file in schema_files: + schema_data = _parse_yaml_safe(schema_file) + + # Extract models with descriptions + models_list = schema_data.get("models", []) + for model in models_list: + if not isinstance(model, dict): + continue + + model_name = model.get("name", "") + description = model.get("description", "").strip().lower() + + # Check if description is meaningful (not empty or placeholder) + if description and not any( + placeholder in description for placeholder in placeholder_texts + ): + documented_models.add(model_name) + + documented_count = len(documented_models) + coverage_percent = ( + (documented_count / total_models) * 100 if total_models > 0 else 0 + ) + + # Calculate proportional score + score = self.calculate_proportional_score( + measured_value=coverage_percent, + threshold=80.0, + higher_is_better=True, + ) + + status = "pass" if score >= 75 else "fail" + + evidence = [ + f"Documented models: {documented_count}/{total_models}", + f"Coverage: {coverage_percent:.1f}%", + f"Schema files found: {len(schema_files)}", + ] + + return Finding( + attribute=self.attribute, + status=status, + score=score, + measured_value=f"{coverage_percent:.1f}%", + threshold="≥80%", + evidence=evidence, + remediation=self._create_remediation() if status == "fail" else None, + error_message=None, + ) + + def _create_remediation(self) -> Remediation: + """Create remediation guidance for missing model documentation.""" + return Remediation( + summary="Add descriptions to schema.yml files for each model", + steps=[ + "Create or update schema.yml files in models/ subdirectories", + "Add description field for each model explaining its purpose", + "Include column descriptions for important fields", + "Document data lineage and transformations", + "Run 'dbt docs generate' to validate documentation", + ], + tools=["dbt-core", "dbt-codegen (optional)"], + commands=[ + "dbt docs generate # Generate documentation site", + "dbt docs serve # View documentation locally", + ], + examples=["""# models/staging/schema.yml +version: 2 + +models: + - name: stg_customers + description: > + Staging table for customer data from the raw CRM system. + Includes basic cleaning and standardization of customer records. + columns: + - name: customer_id + description: Unique identifier for each customer + tests: + - unique + - not_null + - name: customer_name + description: Full name of the customer + - name: created_at + description: Timestamp when customer record was created"""], + citations=[ + Citation( + source="dbt Labs Documentation", + title="dbt Documentation Guide", + url="https://docs.getdbt.com/docs/collaborate/documentation", + relevance="Best practices for documenting dbt models", + ) + ], + ) + + +# ============================================================================ +# Tier 2 Critical Assessors (6% total weight) +# ============================================================================ + + +class DbtDataTestsAssessor(BaseAssessor): + """Assesses dbt data test coverage. + + Tier 2 Critical (3% weight) - Validates data quality, prevents breaking changes. + """ + + @property + def attribute_id(self) -> str: + return "dbt_data_tests" + + @property + def tier(self) -> int: + return 2 # Critical + + @property + def attribute(self) -> Attribute: + return Attribute( + id=self.attribute_id, + name="dbt Data Tests", + category="dbt SQL Projects", + tier=self.tier, + description="Generic tests on model primary keys", + criteria="≥80% of models have unique/not_null tests on primary key", + default_weight=0.03, + ) + + def is_applicable(self, repository: Repository) -> bool: + """Applicable only to dbt projects.""" + return _is_dbt_project(repository) + + def assess(self, repository: Repository) -> Finding: + """Check dbt data test coverage. + + Pass criteria: + - Generic tests configured in schema YAML files + - Every model has unique + not_null tests on primary key + - Singular tests in tests/ directory (bonus) + + Scoring: Proportional + - Count models with PK tests vs total models + - 100% if ≥80% coverage, 50% if ≥50%, 0% if <25% + """ + models_dir = repository.path / "models" + + if not models_dir.exists(): + return Finding( + attribute=self.attribute, + status="fail", + score=0.0, + measured_value="no models/ directory", + threshold="≥80% models with PK tests", + evidence=["models/ directory not found"], + remediation=self._create_remediation(), + error_message=None, + ) + + # Count total SQL models + sql_files = list(models_dir.rglob("*.sql")) + total_models = len(sql_files) + + if total_models == 0: + return Finding.not_applicable( + self.attribute, reason="No SQL models found in models/" + ) + + # Find and parse schema YAML files (any .yml/.yaml file in models/) + # dbt supports multiple naming conventions: schema.yml, _models.yml, or one file per model + schema_files = _find_yaml_files(models_dir, "*.yml") + + # Extract models with PK tests (unique + not_null) + models_with_pk_tests = set() + + for schema_file in schema_files: + schema_data = _parse_yaml_safe(schema_file) + + models_list = schema_data.get("models", []) + for model in models_list: + if not isinstance(model, dict): + continue + + model_name = model.get("name", "") + columns = model.get("columns", []) + + # Check if any column has both unique and not_null tests + for column in columns: + if not isinstance(column, dict): + continue + + tests = column.get("tests", []) + + # Tests can be strings or dicts + test_names = set() + for test in tests: + if isinstance(test, str): + test_names.add(test) + elif isinstance(test, dict): + # Extract test name from dict keys + test_names.update(test.keys()) + + # Check for unique and not_null tests + has_unique = "unique" in test_names + has_not_null = "not_null" in test_names + + if has_unique and has_not_null: + models_with_pk_tests.add(model_name) + break # Found PK tests for this model + + tested_count = len(models_with_pk_tests) + coverage_percent = ( + (tested_count / total_models) * 100 if total_models > 0 else 0 + ) + + # Check for singular tests (bonus) + tests_dir = repository.path / "tests" + singular_tests = list(tests_dir.rglob("*.sql")) if tests_dir.exists() else [] + + # Calculate proportional score + score = self.calculate_proportional_score( + measured_value=coverage_percent, + threshold=80.0, + higher_is_better=True, + ) + + status = "pass" if score >= 75 else "fail" + + evidence = [ + f"Models with PK tests: {tested_count}/{total_models}", + f"Coverage: {coverage_percent:.1f}%", + f"Schema files found: {len(schema_files)}", + ] + + if singular_tests: + evidence.append(f"Singular tests: {len(singular_tests)} (bonus)") + + return Finding( + attribute=self.attribute, + status=status, + score=score, + measured_value=f"{coverage_percent:.1f}%", + threshold="≥80%", + evidence=evidence, + remediation=self._create_remediation() if status == "fail" else None, + error_message=None, + ) + + def _create_remediation(self) -> Remediation: + """Create remediation guidance for missing data tests.""" + return Remediation( + summary="Add unique/not_null tests to schema.yml for model primary keys", + steps=[ + "Identify primary key columns for each model", + "Add tests block to schema.yml with unique and not_null tests", + "Add relationship tests for foreign keys (recommended)", + "Create singular tests for complex business logic", + "Run 'dbt test' to validate all tests pass", + ], + tools=["dbt-core", "dbt-utils", "dbt-expectations"], + commands=[ + "dbt test # Run all tests", + "dbt test --select # Test specific model", + "dbt test --select test_type:generic # Run generic tests only", + ], + examples=["""# models/staging/schema.yml +version: 2 + +models: + - name: stg_orders + description: Staging table for order data + columns: + - name: order_id + description: Unique identifier for each order + tests: + - unique + - not_null + - name: customer_id + description: Foreign key to customers table + tests: + - not_null + - relationships: + to: ref('stg_customers') + field: customer_id + - name: order_total + description: Total order amount + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0"""], + citations=[ + Citation( + source="dbt Labs Documentation", + title="dbt Data Tests Guide", + url="https://docs.getdbt.com/docs/build/data-tests", + relevance="Comprehensive guide to dbt testing", + ) + ], + ) + + +class DbtProjectStructureAssessor(BaseAssessor): + """Assesses dbt project directory structure. + + Tier 2 Critical (3% weight) - Helps AI navigate staging/marts layers and understand data flow. + """ + + @property + def attribute_id(self) -> str: + return "dbt_project_structure" + + @property + def tier(self) -> int: + return 2 # Critical + + @property + def attribute(self) -> Attribute: + return Attribute( + id=self.attribute_id, + name="dbt Project Structure", + category="dbt SQL Projects", + tier=self.tier, + description="Organized staging/marts directory structure", + criteria="models/ with staging/ and marts/ subdirectories", + default_weight=0.03, + ) + + def is_applicable(self, repository: Repository) -> bool: + """Applicable only to dbt projects.""" + return _is_dbt_project(repository) + + def assess(self, repository: Repository) -> Finding: + """Check dbt project structure. + + Pass criteria: + - models/ directory exists + - Recommended subdirectories: staging/, marts/ + - Optional: intermediate/, tests/, macros/ + - Avoid flat models/ with 50+ files + + Scoring: Proportional composite + - 40% - Has models/ with subdirectories (not flat) + - 30% - Has staging/ and marts/ layers + - 30% - Has tests/ or macros/ directories + """ + models_dir = repository.path / "models" + + if not models_dir.exists(): + return Finding( + attribute=self.attribute, + status="fail", + score=0.0, + measured_value="no models/ directory", + threshold="organized structure", + evidence=["models/ directory not found"], + remediation=self._create_remediation(), + error_message=None, + ) + + # Check for flat structure (50+ files in root models/) + root_sql_files = list(models_dir.glob("*.sql")) + is_flat = len(root_sql_files) >= 50 + + # Check for recommended subdirectories + has_staging = (models_dir / "staging").exists() + has_marts = (models_dir / "marts").exists() + has_intermediate = (models_dir / "intermediate").exists() + + # Check for supporting directories + has_tests = (repository.path / "tests").exists() + has_macros = (repository.path / "macros").exists() + + # Calculate composite score + structure_score = 0.0 + + # Component 1: Not flat (40%) + if not is_flat: + structure_score += 40.0 + + # Component 2: Has staging and marts (30%) + if has_staging and has_marts: + structure_score += 30.0 + elif has_staging or has_marts: + structure_score += 15.0 # Partial credit + + # Component 3: Has tests or macros (30%) + if has_tests and has_macros: + structure_score += 30.0 + elif has_tests or has_macros: + structure_score += 15.0 # Partial credit + + status = "pass" if structure_score >= 75 else "fail" + + evidence = [ + f"Structure score: {structure_score:.0f}/100", + " - models/ directory: ✓", + f" - staging/ layer: {'✓' if has_staging else '✗'}", + f" - marts/ layer: {'✓' if has_marts else '✗'}", + ] + + if has_intermediate: + evidence.append(" - intermediate/ layer: ✓ (bonus)") + + evidence.append(f" - tests/ directory: {'✓' if has_tests else '✗'}") + evidence.append(f" - macros/ directory: {'✓' if has_macros else '✗'}") + + if is_flat: + evidence.append( + f" - ⚠ Flat structure: {len(root_sql_files)} files in models/ root" + ) + + return Finding( + attribute=self.attribute, + status=status, + score=structure_score, + measured_value=f"{structure_score:.0f}/100", + threshold="≥75/100", + evidence=evidence, + remediation=self._create_remediation() if status == "fail" else None, + error_message=None, + ) + + def _create_remediation(self) -> Remediation: + """Create remediation guidance for poor project structure.""" + return Remediation( + summary="Organize models into staging/, intermediate/, and marts/ layers", + steps=[ + "Create staging/ subdirectory for raw source transformations", + "Create marts/ subdirectory for business-facing analytics models", + "Optionally create intermediate/ for intermediate transformations", + "Create tests/ directory for singular tests", + "Create macros/ directory for reusable SQL logic", + "Move existing models into appropriate subdirectories", + "Update model refs() to reflect new structure", + ], + tools=["dbt-core"], + commands=[ + "# Manually reorganize directory structure", + "# Update model references: ref('staging/stg_customers')", + ], + examples=["""# Recommended dbt project structure: +my_dbt_project/ +├── dbt_project.yml +├── models/ +│ ├── staging/ # Source system transformations +│ │ ├── _staging.yml # Source configurations +│ │ ├── schema.yml # Model documentation +│ │ ├── stg_customers.sql +│ │ └── stg_orders.sql +│ ├── intermediate/ # Intermediate transformations (optional) +│ │ ├── schema.yml +│ │ └── int_order_items.sql +│ └── marts/ # Business-facing analytics models +│ ├── core/ # Core business entities +│ │ ├── schema.yml +│ │ ├── dim_customers.sql +│ │ └── fct_orders.sql +│ └── marketing/ # Department-specific models +│ ├── schema.yml +│ └── customer_ltv.sql +├── tests/ # Singular tests +│ └── assert_positive_order_totals.sql +├── macros/ # Reusable SQL logic +│ └── cents_to_dollars.sql +├── seeds/ # CSV reference data +└── analyses/ # Ad-hoc analyses"""], + citations=[ + Citation( + source="dbt Labs Best Practices", + title="How we structure our dbt projects", + url="https://docs.getdbt.com/best-practices/how-we-structure/1-guide-overview", + relevance="Official guide for organizing dbt projects", + ) + ], + ) diff --git a/src/agentready/services/language_detector.py b/src/agentready/services/language_detector.py index 82b5fc6f..ccb03861 100644 --- a/src/agentready/services/language_detector.py +++ b/src/agentready/services/language_detector.py @@ -49,6 +49,7 @@ class LanguageDetector: ".sh": "Shell", ".bash": "Shell", ".zsh": "Shell", + ".sql": "SQL", ".md": "Markdown", ".yaml": "YAML", ".yml": "YAML", diff --git a/tests/fixtures/dbt_projects/flat_structure/dbt_project.yml b/tests/fixtures/dbt_projects/flat_structure/dbt_project.yml new file mode 100644 index 00000000..40a8e5de --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/dbt_project.yml @@ -0,0 +1,5 @@ +name: 'flat_structure' +config-version: 2 +profile: 'default' + +model-paths: ["models"] diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_1.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_1.sql new file mode 100644 index 00000000..43258a71 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_1.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_10.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_10.sql new file mode 100644 index 00000000..333eef9f --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_10.sql @@ -0,0 +1 @@ +select 10 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_11.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_11.sql new file mode 100644 index 00000000..d3a6701a --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_11.sql @@ -0,0 +1 @@ +select 11 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_12.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_12.sql new file mode 100644 index 00000000..a41c2aab --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_12.sql @@ -0,0 +1 @@ +select 12 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_13.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_13.sql new file mode 100644 index 00000000..36c55354 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_13.sql @@ -0,0 +1 @@ +select 13 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_14.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_14.sql new file mode 100644 index 00000000..fd190c0e --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_14.sql @@ -0,0 +1 @@ +select 14 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_15.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_15.sql new file mode 100644 index 00000000..63399aaf --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_15.sql @@ -0,0 +1 @@ +select 15 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_16.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_16.sql new file mode 100644 index 00000000..2c4d552c --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_16.sql @@ -0,0 +1 @@ +select 16 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_17.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_17.sql new file mode 100644 index 00000000..3191e633 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_17.sql @@ -0,0 +1 @@ +select 17 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_18.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_18.sql new file mode 100644 index 00000000..957b18f4 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_18.sql @@ -0,0 +1 @@ +select 18 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_19.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_19.sql new file mode 100644 index 00000000..fcd3c5f2 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_19.sql @@ -0,0 +1 @@ +select 19 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_2.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_2.sql new file mode 100644 index 00000000..33560d6c --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_2.sql @@ -0,0 +1 @@ +select 2 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_20.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_20.sql new file mode 100644 index 00000000..548190f9 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_20.sql @@ -0,0 +1 @@ +select 20 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_21.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_21.sql new file mode 100644 index 00000000..7b330452 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_21.sql @@ -0,0 +1 @@ +select 21 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_22.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_22.sql new file mode 100644 index 00000000..51c6cb6c --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_22.sql @@ -0,0 +1 @@ +select 22 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_23.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_23.sql new file mode 100644 index 00000000..7458cae3 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_23.sql @@ -0,0 +1 @@ +select 23 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_24.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_24.sql new file mode 100644 index 00000000..b67fcc66 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_24.sql @@ -0,0 +1 @@ +select 24 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_25.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_25.sql new file mode 100644 index 00000000..17265271 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_25.sql @@ -0,0 +1 @@ +select 25 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_26.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_26.sql new file mode 100644 index 00000000..75670a0d --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_26.sql @@ -0,0 +1 @@ +select 26 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_27.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_27.sql new file mode 100644 index 00000000..2c264078 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_27.sql @@ -0,0 +1 @@ +select 27 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_28.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_28.sql new file mode 100644 index 00000000..0cecb7fa --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_28.sql @@ -0,0 +1 @@ +select 28 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_29.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_29.sql new file mode 100644 index 00000000..0cd5d829 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_29.sql @@ -0,0 +1 @@ +select 29 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_3.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_3.sql new file mode 100644 index 00000000..a22afde6 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_3.sql @@ -0,0 +1 @@ +select 3 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_30.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_30.sql new file mode 100644 index 00000000..25985c01 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_30.sql @@ -0,0 +1 @@ +select 30 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_31.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_31.sql new file mode 100644 index 00000000..04f2fc4a --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_31.sql @@ -0,0 +1 @@ +select 31 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_32.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_32.sql new file mode 100644 index 00000000..5eb7b75e --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_32.sql @@ -0,0 +1 @@ +select 32 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_33.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_33.sql new file mode 100644 index 00000000..dff22e1f --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_33.sql @@ -0,0 +1 @@ +select 33 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_34.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_34.sql new file mode 100644 index 00000000..82a346e8 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_34.sql @@ -0,0 +1 @@ +select 34 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_35.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_35.sql new file mode 100644 index 00000000..97e0b84d --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_35.sql @@ -0,0 +1 @@ +select 35 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_36.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_36.sql new file mode 100644 index 00000000..108ebaa7 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_36.sql @@ -0,0 +1 @@ +select 36 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_37.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_37.sql new file mode 100644 index 00000000..e89cf8b2 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_37.sql @@ -0,0 +1 @@ +select 37 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_38.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_38.sql new file mode 100644 index 00000000..07ce3e04 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_38.sql @@ -0,0 +1 @@ +select 38 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_39.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_39.sql new file mode 100644 index 00000000..873e7c82 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_39.sql @@ -0,0 +1 @@ +select 39 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_4.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_4.sql new file mode 100644 index 00000000..1f7d87c5 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_4.sql @@ -0,0 +1 @@ +select 4 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_40.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_40.sql new file mode 100644 index 00000000..82cd022b --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_40.sql @@ -0,0 +1 @@ +select 40 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_41.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_41.sql new file mode 100644 index 00000000..2fe1a0f6 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_41.sql @@ -0,0 +1 @@ +select 41 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_42.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_42.sql new file mode 100644 index 00000000..ccd4b3fa --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_42.sql @@ -0,0 +1 @@ +select 42 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_43.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_43.sql new file mode 100644 index 00000000..fd36ca7d --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_43.sql @@ -0,0 +1 @@ +select 43 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_44.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_44.sql new file mode 100644 index 00000000..69bfc213 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_44.sql @@ -0,0 +1 @@ +select 44 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_45.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_45.sql new file mode 100644 index 00000000..29034124 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_45.sql @@ -0,0 +1 @@ +select 45 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_46.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_46.sql new file mode 100644 index 00000000..c618ee62 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_46.sql @@ -0,0 +1 @@ +select 46 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_47.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_47.sql new file mode 100644 index 00000000..7ff2ab4d --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_47.sql @@ -0,0 +1 @@ +select 47 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_48.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_48.sql new file mode 100644 index 00000000..7a9c75ef --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_48.sql @@ -0,0 +1 @@ +select 48 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_49.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_49.sql new file mode 100644 index 00000000..35fb4166 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_49.sql @@ -0,0 +1 @@ +select 49 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_5.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_5.sql new file mode 100644 index 00000000..c6cbcf41 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_5.sql @@ -0,0 +1 @@ +select 5 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_50.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_50.sql new file mode 100644 index 00000000..f10f806e --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_50.sql @@ -0,0 +1 @@ +select 50 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_51.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_51.sql new file mode 100644 index 00000000..f24cadc8 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_51.sql @@ -0,0 +1 @@ +select 51 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_52.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_52.sql new file mode 100644 index 00000000..287efb07 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_52.sql @@ -0,0 +1 @@ +select 52 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_53.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_53.sql new file mode 100644 index 00000000..85d8af72 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_53.sql @@ -0,0 +1 @@ +select 53 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_54.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_54.sql new file mode 100644 index 00000000..294f14e5 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_54.sql @@ -0,0 +1 @@ +select 54 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_55.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_55.sql new file mode 100644 index 00000000..4845b187 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_55.sql @@ -0,0 +1 @@ +select 55 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_6.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_6.sql new file mode 100644 index 00000000..ed89a0ba --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_6.sql @@ -0,0 +1 @@ +select 6 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_7.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_7.sql new file mode 100644 index 00000000..ce468818 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_7.sql @@ -0,0 +1 @@ +select 7 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_8.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_8.sql new file mode 100644 index 00000000..ace0d7db --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_8.sql @@ -0,0 +1 @@ +select 8 as id diff --git a/tests/fixtures/dbt_projects/flat_structure/models/model_9.sql b/tests/fixtures/dbt_projects/flat_structure/models/model_9.sql new file mode 100644 index 00000000..aa1baba4 --- /dev/null +++ b/tests/fixtures/dbt_projects/flat_structure/models/model_9.sql @@ -0,0 +1 @@ +select 9 as id diff --git a/tests/fixtures/dbt_projects/minimal_valid/dbt_project.yml b/tests/fixtures/dbt_projects/minimal_valid/dbt_project.yml new file mode 100644 index 00000000..f9570768 --- /dev/null +++ b/tests/fixtures/dbt_projects/minimal_valid/dbt_project.yml @@ -0,0 +1,5 @@ +name: 'minimal_valid' +config-version: 2 +profile: 'default' + +model-paths: ["models"] diff --git a/tests/fixtures/dbt_projects/minimal_valid/models/sample_model.sql b/tests/fixtures/dbt_projects/minimal_valid/models/sample_model.sql new file mode 100644 index 00000000..43258a71 --- /dev/null +++ b/tests/fixtures/dbt_projects/minimal_valid/models/sample_model.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/tests/fixtures/dbt_projects/missing_docs/dbt_project.yml b/tests/fixtures/dbt_projects/missing_docs/dbt_project.yml new file mode 100644 index 00000000..3f9712e1 --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_docs/dbt_project.yml @@ -0,0 +1,5 @@ +name: 'missing_docs' +config-version: 2 +profile: 'default' + +model-paths: ["models"] diff --git a/tests/fixtures/dbt_projects/missing_docs/models/model1.sql b/tests/fixtures/dbt_projects/missing_docs/models/model1.sql new file mode 100644 index 00000000..43258a71 --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_docs/models/model1.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/tests/fixtures/dbt_projects/missing_docs/models/model2.sql b/tests/fixtures/dbt_projects/missing_docs/models/model2.sql new file mode 100644 index 00000000..33560d6c --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_docs/models/model2.sql @@ -0,0 +1 @@ +select 2 as id diff --git a/tests/fixtures/dbt_projects/missing_tests/dbt_project.yml b/tests/fixtures/dbt_projects/missing_tests/dbt_project.yml new file mode 100644 index 00000000..77e5a706 --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_tests/dbt_project.yml @@ -0,0 +1,5 @@ +name: 'missing_tests' +config-version: 2 +profile: 'default' + +model-paths: ["models"] diff --git a/tests/fixtures/dbt_projects/missing_tests/models/customers.sql b/tests/fixtures/dbt_projects/missing_tests/models/customers.sql new file mode 100644 index 00000000..1f532d3f --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_tests/models/customers.sql @@ -0,0 +1 @@ +select id as customer_id from raw_customers diff --git a/tests/fixtures/dbt_projects/missing_tests/models/schema.yml b/tests/fixtures/dbt_projects/missing_tests/models/schema.yml new file mode 100644 index 00000000..c5c5c740 --- /dev/null +++ b/tests/fixtures/dbt_projects/missing_tests/models/schema.yml @@ -0,0 +1,8 @@ +version: 2 + +models: + - name: customers + description: Customer data without tests + columns: + - name: customer_id + description: Customer identifier diff --git a/tests/fixtures/dbt_projects/non_dbt/README.md b/tests/fixtures/dbt_projects/non_dbt/README.md new file mode 100644 index 00000000..29ca946f --- /dev/null +++ b/tests/fixtures/dbt_projects/non_dbt/README.md @@ -0,0 +1,3 @@ +# Non-dbt Project + +This is a regular Python project without dbt. diff --git a/tests/fixtures/dbt_projects/non_dbt/sample.sql b/tests/fixtures/dbt_projects/non_dbt/sample.sql new file mode 100644 index 00000000..61946da5 --- /dev/null +++ b/tests/fixtures/dbt_projects/non_dbt/sample.sql @@ -0,0 +1,2 @@ +-- Regular SQL file, not a dbt project +SELECT * FROM users; diff --git a/tests/fixtures/dbt_projects/well_structured/dbt_project.yml b/tests/fixtures/dbt_projects/well_structured/dbt_project.yml new file mode 100644 index 00000000..a3fcd910 --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/dbt_project.yml @@ -0,0 +1,13 @@ +name: 'well_structured' +config-version: 2 +profile: 'default' + +model-paths: ["models"] +test-paths: ["tests"] +macro-paths: ["macros"] + +require-dbt-version: ">=1.0.0" + +models: + well_structured: + materialized: view diff --git a/tests/fixtures/dbt_projects/well_structured/macros/sample_macro.sql b/tests/fixtures/dbt_projects/well_structured/macros/sample_macro.sql new file mode 100644 index 00000000..56927cb2 --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/macros/sample_macro.sql @@ -0,0 +1,3 @@ +{% macro cents_to_dollars(column_name) %} + ({{ column_name }} / 100)::decimal(16,2) +{% endmacro %} diff --git a/tests/fixtures/dbt_projects/well_structured/models/marts/dim_customers.sql b/tests/fixtures/dbt_projects/well_structured/models/marts/dim_customers.sql new file mode 100644 index 00000000..43f540a9 --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/models/marts/dim_customers.sql @@ -0,0 +1,4 @@ +select + customer_id, + customer_name +from {{ ref('stg_customers') }} diff --git a/tests/fixtures/dbt_projects/well_structured/models/marts/schema.yml b/tests/fixtures/dbt_projects/well_structured/models/marts/schema.yml new file mode 100644 index 00000000..42e1b7cf --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/models/marts/schema.yml @@ -0,0 +1,11 @@ +version: 2 + +models: + - name: dim_customers + description: Customer dimension table for analytics + columns: + - name: customer_id + description: Unique identifier for each customer + tests: + - unique + - not_null diff --git a/tests/fixtures/dbt_projects/well_structured/models/staging/schema.yml b/tests/fixtures/dbt_projects/well_structured/models/staging/schema.yml new file mode 100644 index 00000000..e13bf9dd --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/models/staging/schema.yml @@ -0,0 +1,13 @@ +version: 2 + +models: + - name: stg_customers + description: Staging table for customer data from the raw CRM system + columns: + - name: customer_id + description: Unique identifier for each customer + tests: + - unique + - not_null + - name: customer_name + description: Full name of the customer diff --git a/tests/fixtures/dbt_projects/well_structured/models/staging/stg_customers.sql b/tests/fixtures/dbt_projects/well_structured/models/staging/stg_customers.sql new file mode 100644 index 00000000..9eb3b31b --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/models/staging/stg_customers.sql @@ -0,0 +1,4 @@ +select + id as customer_id, + name as customer_name +from raw_customers diff --git a/tests/fixtures/dbt_projects/well_structured/tests/assert_positive_ids.sql b/tests/fixtures/dbt_projects/well_structured/tests/assert_positive_ids.sql new file mode 100644 index 00000000..1a105cc3 --- /dev/null +++ b/tests/fixtures/dbt_projects/well_structured/tests/assert_positive_ids.sql @@ -0,0 +1,2 @@ +select * from {{ ref('stg_customers') }} +where customer_id < 0 diff --git a/tests/unit/test_assessors_dbt.py b/tests/unit/test_assessors_dbt.py new file mode 100644 index 00000000..3ef357aa --- /dev/null +++ b/tests/unit/test_assessors_dbt.py @@ -0,0 +1,727 @@ +"""Unit tests for dbt assessors.""" + +from pathlib import Path + +import pytest + +from agentready.assessors.dbt import ( + DbtDataTestsAssessor, + DbtModelDocumentationAssessor, + DbtProjectConfigAssessor, + DbtProjectStructureAssessor, + _find_yaml_files, + _is_dbt_project, + _parse_yaml_safe, +) +from agentready.models.repository import Repository + +# ============================================================================ +# Test Fixtures +# ============================================================================ + + +@pytest.fixture +def minimal_valid_repo(tmp_path): + """Minimal valid dbt project.""" + fixture_dir = ( + Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "minimal_valid" + ) + return Repository( + path=fixture_dir, + name="minimal_valid", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=2, + total_lines=10, + ) + + +@pytest.fixture +def well_structured_repo(tmp_path): + """Well-structured dbt project with best practices.""" + fixture_dir = ( + Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "well_structured" + ) + return Repository( + path=fixture_dir, + name="well_structured", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 2}, + total_files=10, + total_lines=50, + ) + + +@pytest.fixture +def missing_docs_repo(tmp_path): + """Valid dbt project but no documentation.""" + fixture_dir = ( + Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "missing_docs" + ) + return Repository( + path=fixture_dir, + name="missing_docs", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 2}, + total_files=3, + total_lines=10, + ) + + +@pytest.fixture +def missing_tests_repo(tmp_path): + """Valid dbt project but no tests.""" + fixture_dir = ( + Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "missing_tests" + ) + return Repository( + path=fixture_dir, + name="missing_tests", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=2, + total_lines=10, + ) + + +@pytest.fixture +def flat_structure_repo(tmp_path): + """Valid dbt project but flat structure.""" + fixture_dir = ( + Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "flat_structure" + ) + return Repository( + path=fixture_dir, + name="flat_structure", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 55}, + total_files=56, + total_lines=100, + ) + + +@pytest.fixture +def non_dbt_repo(tmp_path): + """Regular project without dbt.""" + fixture_dir = Path(__file__).parent.parent / "fixtures" / "dbt_projects" / "non_dbt" + return Repository( + path=fixture_dir, + name="non_dbt", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=2, + total_lines=5, + ) + + +# ============================================================================ +# Utility Function Tests +# ============================================================================ + + +class TestUtilityFunctions: + """Test shared utility functions.""" + + def test_is_dbt_project_true(self, minimal_valid_repo): + """Test _is_dbt_project returns True for dbt project.""" + assert _is_dbt_project(minimal_valid_repo) is True + + def test_is_dbt_project_false(self, non_dbt_repo): + """Test _is_dbt_project returns False for non-dbt project.""" + assert _is_dbt_project(non_dbt_repo) is False + + def test_find_yaml_files(self, well_structured_repo): + """Test _find_yaml_files finds YAML files recursively.""" + models_dir = well_structured_repo.path / "models" + yaml_files = _find_yaml_files(models_dir, "*schema.yml") + + assert len(yaml_files) >= 2 # At least staging and marts schema.yml + assert all(f.suffix in [".yml", ".yaml"] for f in yaml_files) + + def test_parse_yaml_safe_valid(self, minimal_valid_repo): + """Test _parse_yaml_safe parses valid YAML.""" + dbt_project_path = minimal_valid_repo.path / "dbt_project.yml" + data = _parse_yaml_safe(dbt_project_path) + + assert isinstance(data, dict) + assert data["name"] == "minimal_valid" + assert data["config-version"] == 2 + + def test_parse_yaml_safe_invalid(self, tmp_path): + """Test _parse_yaml_safe returns empty dict for invalid YAML.""" + invalid_yaml = tmp_path / "invalid.yml" + invalid_yaml.write_text("invalid: yaml: content: [") + + data = _parse_yaml_safe(invalid_yaml) + assert data == {} + + def test_parse_yaml_safe_nonexistent(self, tmp_path): + """Test _parse_yaml_safe returns empty dict for nonexistent file.""" + nonexistent = tmp_path / "nonexistent.yml" + data = _parse_yaml_safe(nonexistent) + assert data == {} + + +# ============================================================================ +# DbtProjectConfigAssessor Tests +# ============================================================================ + + +class TestDbtProjectConfigAssessor: + """Test DbtProjectConfigAssessor.""" + + @pytest.fixture + def assessor(self): + """Create assessor instance.""" + return DbtProjectConfigAssessor() + + def test_attribute_id(self, assessor): + """Test attribute_id property.""" + assert assessor.attribute_id == "dbt_project_config" + + def test_tier(self, assessor): + """Test tier property.""" + assert assessor.tier == 1 # Essential + + def test_is_applicable_dbt_project(self, assessor, minimal_valid_repo): + """Test is_applicable returns True for dbt project.""" + assert assessor.is_applicable(minimal_valid_repo) is True + + def test_is_applicable_non_dbt(self, assessor, non_dbt_repo): + """Test is_applicable returns False for non-dbt project.""" + assert assessor.is_applicable(non_dbt_repo) is False + + def test_assess_valid_minimal(self, assessor, minimal_valid_repo): + """Test assess with minimal valid configuration.""" + finding = assessor.assess(minimal_valid_repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + assert "name" in str(finding.evidence) + assert "config-version" in str(finding.evidence) + assert finding.remediation is None + + def test_assess_valid_with_version(self, assessor, well_structured_repo): + """Test assess with dbt version specified.""" + finding = assessor.assess(well_structured_repo) + + assert finding.status == "pass" + assert finding.score == 100.0 + assert "require-dbt-version" in str(finding.evidence) + + def test_assess_missing_file(self, assessor, non_dbt_repo): + """Test assess when dbt_project.yml missing.""" + finding = assessor.assess(non_dbt_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "not found" in str(finding.evidence) + assert finding.remediation is not None + + def test_assess_invalid_yaml(self, assessor, tmp_path): + """Test assess with invalid YAML syntax.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + # Create invalid dbt_project.yml + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("invalid: yaml: [") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=1, + total_lines=10, + ) + finding = assessor.assess(repo) + + assert finding.status == "error" + assert "invalid YAML" in finding.error_message + + def test_assess_missing_required_fields(self, assessor, tmp_path): + """Test assess with missing required fields.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + # Create incomplete dbt_project.yml + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\n") # Missing config-version and profile + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=1, + total_lines=10, + ) + finding = assessor.assess(repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "missing fields" in finding.measured_value + assert finding.remediation is not None + + def test_remediation_content(self, assessor): + """Test remediation has all required components.""" + remediation = assessor._create_remediation() + + assert len(remediation.steps) >= 3 + assert len(remediation.tools) >= 1 + assert "dbt-core" in remediation.tools + assert len(remediation.commands) >= 1 + assert len(remediation.examples) >= 1 + assert len(remediation.citations) >= 1 + + +# ============================================================================ +# DbtModelDocumentationAssessor Tests +# ============================================================================ + + +class TestDbtModelDocumentationAssessor: + """Test DbtModelDocumentationAssessor.""" + + @pytest.fixture + def assessor(self): + """Create assessor instance.""" + return DbtModelDocumentationAssessor() + + def test_attribute_id(self, assessor): + """Test attribute_id property.""" + assert assessor.attribute_id == "dbt_model_documentation" + + def test_tier(self, assessor): + """Test tier property.""" + assert assessor.tier == 1 # Essential + + def test_is_applicable_dbt_project(self, assessor, minimal_valid_repo): + """Test is_applicable returns True for dbt project.""" + assert assessor.is_applicable(minimal_valid_repo) is True + + def test_is_applicable_non_dbt(self, assessor, non_dbt_repo): + """Test is_applicable returns False for non-dbt project.""" + assert assessor.is_applicable(non_dbt_repo) is False + + def test_assess_well_documented(self, assessor, well_structured_repo): + """Test assess with well-documented models.""" + finding = assessor.assess(well_structured_repo) + + assert finding.status == "pass" + assert finding.score == 100.0 # Both models documented + assert "100.0%" in finding.measured_value + assert finding.remediation is None + + def test_assess_no_documentation(self, assessor, missing_docs_repo): + """Test assess with no documentation.""" + finding = assessor.assess(missing_docs_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "0.0%" in finding.measured_value + assert finding.remediation is not None + + def test_assess_partial_documentation(self, assessor, missing_tests_repo): + """Test assess with partial documentation (has schema.yml but documented).""" + finding = assessor.assess(missing_tests_repo) + + # This project has 1 model with description + assert finding.status == "pass" + assert finding.score == 100.0 # 1/1 = 100% + + def test_assess_no_models_directory(self, assessor, non_dbt_repo): + """Test assess when models/ directory missing.""" + finding = assessor.assess(non_dbt_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "no models/ directory" in finding.measured_value + + def test_assess_no_sql_files(self, assessor, tmp_path): + """Test assess when models/ exists but no SQL files.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + models_dir = tmp_path / "models" + models_dir.mkdir() + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 0}, + total_files=0, + total_lines=0, + ) + finding = assessor.assess(repo) + + assert finding.status == "not_applicable" + assert "No SQL models found" in str(finding.evidence) + + def test_proportional_scoring(self, assessor, tmp_path): + """Test proportional scoring for partial documentation.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + # Create 10 models, document 5 + models_dir = tmp_path / "models" + models_dir.mkdir() + + for i in range(10): + (models_dir / f"model_{i}.sql").write_text(f"select {i} as id") + + # Document 5 models (50% coverage) + schema_yml = models_dir / "schema.yml" + schema_yml.write_text("""version: 2 +models: + - name: model_0 + description: This is model zero with data + - name: model_1 + description: This is model one with data + - name: model_2 + description: This is model two with data + - name: model_3 + description: This is model three with data + - name: model_4 + description: This is model four with data +""") + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 10}, + total_files=10, + total_lines=100, + ) + finding = assessor.assess(repo) + + assert finding.status == "fail" + # 50% coverage → 50/80 * 100 = 62.5 score + assert 60.0 <= finding.score <= 65.0 + + def test_remediation_content(self, assessor): + """Test remediation has all required components.""" + remediation = assessor._create_remediation() + + assert len(remediation.steps) >= 3 + assert len(remediation.tools) >= 1 + assert len(remediation.commands) >= 1 + assert "dbt docs generate" in str(remediation.commands) + assert len(remediation.examples) >= 1 + assert len(remediation.citations) >= 1 + + +# ============================================================================ +# DbtDataTestsAssessor Tests +# ============================================================================ + + +class TestDbtDataTestsAssessor: + """Test DbtDataTestsAssessor.""" + + @pytest.fixture + def assessor(self): + """Create assessor instance.""" + return DbtDataTestsAssessor() + + def test_attribute_id(self, assessor): + """Test attribute_id property.""" + assert assessor.attribute_id == "dbt_data_tests" + + def test_tier(self, assessor): + """Test tier property.""" + assert assessor.tier == 2 # Critical + + def test_is_applicable_dbt_project(self, assessor, minimal_valid_repo): + """Test is_applicable returns True for dbt project.""" + assert assessor.is_applicable(minimal_valid_repo) is True + + def test_is_applicable_non_dbt(self, assessor, non_dbt_repo): + """Test is_applicable returns False for non-dbt project.""" + assert assessor.is_applicable(non_dbt_repo) is False + + def test_assess_with_tests(self, assessor, well_structured_repo): + """Test assess with models having PK tests.""" + finding = assessor.assess(well_structured_repo) + + assert finding.status == "pass" + assert finding.score == 100.0 # Both models have unique+not_null + assert "100.0%" in finding.measured_value + assert "Singular tests" in str(finding.evidence) # Has tests/ directory + assert finding.remediation is None + + def test_assess_no_tests(self, assessor, missing_tests_repo): + """Test assess with no tests.""" + finding = assessor.assess(missing_tests_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "0.0%" in finding.measured_value + assert finding.remediation is not None + + def test_assess_no_models_directory(self, assessor, non_dbt_repo): + """Test assess when models/ directory missing.""" + finding = assessor.assess(non_dbt_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "no models/ directory" in finding.measured_value + + def test_assess_no_sql_files(self, assessor, tmp_path): + """Test assess when models/ exists but no SQL files.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + models_dir = tmp_path / "models" + models_dir.mkdir() + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 0}, + total_files=0, + total_lines=0, + ) + finding = assessor.assess(repo) + + assert finding.status == "not_applicable" + assert "No SQL models found" in str(finding.evidence) + + def test_proportional_scoring(self, assessor, tmp_path): + """Test proportional scoring for partial test coverage.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + # Create 10 models, test 5 (50% coverage) + models_dir = tmp_path / "models" + models_dir.mkdir() + + for i in range(10): + (models_dir / f"model_{i}.sql").write_text(f"select {i} as id") + + # Test 5 models (50% coverage → 62.5 score → fail) + schema_yml = models_dir / "schema.yml" + models_yaml = [] + for i in range(5): + models_yaml.append(f""" - name: model_{i} + columns: + - name: id + tests: + - unique + - not_null +""") + + schema_yml.write_text("version: 2\nmodels:\n" + "\n".join(models_yaml)) + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 10}, + total_files=10, + total_lines=100, + ) + finding = assessor.assess(repo) + + assert finding.status == "fail" + # 50% coverage → 50/80 * 100 = 62.5 score + assert 60.0 <= finding.score <= 65.0 + + def test_remediation_content(self, assessor): + """Test remediation has all required components.""" + remediation = assessor._create_remediation() + + assert len(remediation.steps) >= 3 + assert len(remediation.tools) >= 1 + assert "dbt-core" in remediation.tools + assert len(remediation.commands) >= 1 + assert "dbt test" in str(remediation.commands) + assert len(remediation.examples) >= 1 + assert len(remediation.citations) >= 1 + + +# ============================================================================ +# DbtProjectStructureAssessor Tests +# ============================================================================ + + +class TestDbtProjectStructureAssessor: + """Test DbtProjectStructureAssessor.""" + + @pytest.fixture + def assessor(self): + """Create assessor instance.""" + return DbtProjectStructureAssessor() + + def test_attribute_id(self, assessor): + """Test attribute_id property.""" + assert assessor.attribute_id == "dbt_project_structure" + + def test_tier(self, assessor): + """Test tier property.""" + assert assessor.tier == 2 # Critical + + def test_is_applicable_dbt_project(self, assessor, minimal_valid_repo): + """Test is_applicable returns True for dbt project.""" + assert assessor.is_applicable(minimal_valid_repo) is True + + def test_is_applicable_non_dbt(self, assessor, non_dbt_repo): + """Test is_applicable returns False for non-dbt project.""" + assert assessor.is_applicable(non_dbt_repo) is False + + def test_assess_well_structured(self, assessor, well_structured_repo): + """Test assess with well-structured project.""" + finding = assessor.assess(well_structured_repo) + + assert finding.status == "pass" + assert finding.score == 100.0 # Has staging, marts, tests, macros + assert "staging/ layer: ✓" in str(finding.evidence) + assert "marts/ layer: ✓" in str(finding.evidence) + assert "tests/ directory: ✓" in str(finding.evidence) + assert "macros/ directory: ✓" in str(finding.evidence) + assert finding.remediation is None + + def test_assess_flat_structure(self, assessor, flat_structure_repo): + """Test assess with flat structure (50+ files in root).""" + finding = assessor.assess(flat_structure_repo) + + assert finding.status == "fail" + assert finding.score < 75.0 + assert "Flat structure" in str(finding.evidence) + assert finding.remediation is not None + + def test_assess_minimal_structure(self, assessor, minimal_valid_repo): + """Test assess with minimal structure (just models/).""" + finding = assessor.assess(minimal_valid_repo) + + assert finding.status == "fail" + # Only has models/ (not flat), missing staging/marts/tests/macros + # Score: 40 (not flat) + 0 (no layers) + 0 (no tests/macros) = 40 + assert finding.score == 40.0 + + def test_assess_no_models_directory(self, assessor, non_dbt_repo): + """Test assess when models/ directory missing.""" + finding = assessor.assess(non_dbt_repo) + + assert finding.status == "fail" + assert finding.score == 0.0 + assert "no models/ directory" in finding.measured_value + + def test_assess_partial_structure(self, assessor, tmp_path): + """Test assess with partial structure (only staging, no marts).""" + # Create .git directory + (tmp_path / ".git").mkdir() + + models_dir = tmp_path / "models" + models_dir.mkdir() + (models_dir / "staging").mkdir() + (models_dir / "staging" / "model.sql").write_text("select 1") + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 1}, + total_files=1, + total_lines=10, + ) + finding = assessor.assess(repo) + + assert finding.status == "fail" + # Score: 40 (not flat) + 15 (staging only) + 0 (no tests/macros) = 55 + assert finding.score == 55.0 + + def test_composite_scoring(self, assessor, tmp_path): + """Test composite scoring components.""" + # Create .git directory + (tmp_path / ".git").mkdir() + + models_dir = tmp_path / "models" + models_dir.mkdir() + + # Create staging and marts + (models_dir / "staging").mkdir() + (models_dir / "marts").mkdir() + (models_dir / "staging" / "model.sql").write_text("select 1") + (models_dir / "marts" / "model.sql").write_text("select 1") + + # Create tests directory + tests_dir = tmp_path / "tests" + tests_dir.mkdir() + (tests_dir / "test.sql").write_text("select 1") + + dbt_project = tmp_path / "dbt_project.yml" + dbt_project.write_text("name: 'test'\nconfig-version: 2\nprofile: 'default'") + + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc123", + languages={"SQL": 2}, + total_files=3, + total_lines=10, + ) + finding = assessor.assess(repo) + + # Score: 40 (not flat) + 30 (staging+marts) + 15 (tests only) = 85 + assert finding.status == "pass" + assert finding.score == 85.0 + + def test_remediation_content(self, assessor): + """Test remediation has all required components.""" + remediation = assessor._create_remediation() + + assert len(remediation.steps) >= 3 + assert len(remediation.tools) >= 1 + assert len(remediation.commands) >= 0 # May not have commands + assert len(remediation.examples) >= 1 + assert "staging/" in remediation.examples[0] + assert "marts/" in remediation.examples[0] + assert len(remediation.citations) >= 1 diff --git a/uv.lock b/uv.lock index 161d1306..26e71717 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.12" [[package]] name = "agentready" -version = "2.27.0" +version = "2.29.5" source = { editable = "." } dependencies = [ { name = "anthropic" },