From ee296efe085431e866907beaf77d4b06b69dd4ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Barnab=C3=A1s=20Domozi?= <barnabas.domozi.ext@ericsson.com>
Date: Thu, 5 Mar 2026 18:14:35 +0100
Subject: [PATCH] Include skiplist file in the analysis info

This patch introduces a new table called AnalysisInfoFile,
which is intended to store files related to analysis information.
The actual file contents are stored separately in the FileContent
table.

During CodeChecker analyze, we create a new directory called
conf/ in the report directory.
The entire conf/ directory will be added to the massStoreRun
ZIP file.

With this PR, only the skipfile is copied to the conf directory,
but this could be extended later.
---
 analyzer/codechecker_analyzer/cli/analyze.py  | 29 ++++++
 web/client/codechecker_client/cli/store.py    |  7 ++
 .../codechecker_server/api/mass_store_run.py  | 91 ++++++++++++-------
 .../codechecker_server/api/report_server.py   | 16 +++-
 .../codechecker_server/database/db_cleanup.py |  8 +-
 .../database/run_db_model.py                  | 29 ++++++
 web/server/codechecker_server/metadata.py     |  5 +
 ...5047b6513_add_analysis_info_files_table.py | 51 +++++++++++
 .../store/test_proj/divide_zero/skipfile      |  1 +
 web/tests/functional/store/test_store.py      | 19 ++++
 10 files changed, 218 insertions(+), 38 deletions(-)
 create mode 100644 web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py
 create mode 100644 web/tests/functional/store/test_proj/divide_zero/skipfile

diff --git a/analyzer/codechecker_analyzer/cli/analyze.py b/analyzer/codechecker_analyzer/cli/analyze.py
index 7daae8ffad..8ee5474165 100644
--- a/analyzer/codechecker_analyzer/cli/analyze.py
+++ b/analyzer/codechecker_analyzer/cli/analyze.py
@@ -1094,6 +1094,34 @@ def __update_review_status_config(args):
         os.symlink(args.review_status_config, rs_config_to_send)
 
 
+def __update_analysis_config_files(args):
+    """
+    Copy analysis related configuration files (e.g. skipfile)
+    to report_dir/conf/.
+    This directory will be included in the ZIP file,
+    which will be stored on the server.
+    """
+    conf_dir = os.path.join(args.output_path, "conf")
+
+    # Remove any config files used during previous analysis
+    if os.path.isdir(conf_dir):
+        shutil.rmtree(conf_dir)
+
+    # Create a new conf directory
+    os.makedirs(conf_dir)
+
+    def add_file_to_conf_dir(file_path: str):
+        if not os.path.isfile(file_path):
+            return
+
+        file_path = os.path.abspath(file_path)
+        filename = os.path.basename(file_path)
+        shutil.copyfile(file_path, os.path.join(conf_dir, filename))
+
+    if 'skipfile' in args:
+        add_file_to_conf_dir(args.skipfile)
+
+
 def __cleanup_metadata(metadata_prev, metadata):
     """ Cleanup metadata.
 
@@ -1455,6 +1483,7 @@ def main(args):
 
     __update_skip_file(args)
     __update_review_status_config(args)
+    __update_analysis_config_files(args)
 
     LOG.debug("Cleanup metadata file started.")
     __cleanup_metadata(metadata_prev, metadata)
diff --git a/web/client/codechecker_client/cli/store.py b/web/client/codechecker_client/cli/store.py
index afe025b2c3..c835b96d36 100644
--- a/web/client/codechecker_client/cli/store.py
+++ b/web/client/codechecker_client/cli/store.py
@@ -505,6 +505,13 @@ def assemble_zip(inputs,
             files_to_compress[os.path.dirname(review_status_file_path)]\
                 .add(review_status_file_path)
 
+        # Add files from report_dir/conf/ directory
+        conf_dir = os.path.join(dir_path, "conf")
+        if os.path.isdir(conf_dir):
+            for file in os.listdir(os.fsencode(conf_dir)):
+                conf_file = os.path.join(conf_dir, os.fsdecode(file))
+                files_to_compress[conf_dir].add(conf_file)
+
     LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...")
 
     analyzer_result_file_reports = parse_analyzer_result_files(
diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py
index 64a319991d..e806e198dd 100644
--- a/web/server/codechecker_server/api/mass_store_run.py
+++ b/web/server/codechecker_server/api/mass_store_run.py
@@ -15,11 +15,13 @@
 from collections import defaultdict
 from datetime import datetime, timedelta
 import fnmatch
+import hashlib
 from hashlib import sha256
 import json
 import os
 from pathlib import Path
 import sqlalchemy
+from sqlalchemy.orm import Session as SA_Session
 import tempfile
 import time
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, \
@@ -46,7 +48,7 @@
 from ..database.config_db_model import Product
 from ..database.database import DBSession
 from ..database.run_db_model import \
-    AnalysisInfo, AnalysisInfoChecker, AnalyzerStatistic, \
+    AnalysisInfo, AnalysisInfoChecker, AnalysisInfoFile, AnalyzerStatistic, \
     BugPathEvent, BugReportPoint, \
     Checker, \
     ExtendedReportData, \
@@ -814,8 +816,8 @@ def __add_file_content(
         self,
         session: DBSession,
         source_file_name: str,
-        content_hash: Optional[str]
-    ):
+        content_hash: Optional[str] = None
+    ) -> str:
         """
         Add the necessary file contents. If content_hash in None then this
         function calculates the content hash. Or if it's available at the
@@ -871,6 +873,8 @@ def __add_file_content(
                 # the meantime.
                 session.rollback()
 
+        return content_hash
+
     def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]):
         """
         Stores the identifiers "(analyzer, checker_name)" in the database into
@@ -1000,6 +1004,32 @@ def __store_analysis_statistics(
 
             session.add(analyzer_statistics)
 
+    def __store_analysis_info_files(
+        self,
+        session: SA_Session,
+        analysis_info_id: int,
+        report_dir_path: str
+    ):
+        """ Store analyzer related config files (e.g. skipfile) """
+        conf_dir_path = os.path.join(report_dir_path, "conf")
+        zip_conf_dir = os.path.join(
+            self._zip_dir, "reports",
+            hashlib.md5(conf_dir_path.encode('utf-8')).hexdigest())
+
+        if not os.path.isdir(zip_conf_dir):
+            return
+
+        for file in os.listdir(os.fsencode(zip_conf_dir)):
+            conf_file = os.path.join(zip_conf_dir, os.fsdecode(file))
+            content_hash = self.__add_file_content(session, conf_file)
+
+            if (not session.get(AnalysisInfoFile,
+                                (analysis_info_id, content_hash))):
+                session.add(AnalysisInfoFile(
+                    analysis_info_id=analysis_info_id,
+                    filename=os.path.basename(conf_file),
+                    content_hash=content_hash))
+
     def __store_analysis_info(
         self,
         session: DBSession,
@@ -1012,37 +1042,30 @@ def __store_analysis_info(
                     analyzer_command.encode("utf-8"),
                     zlib.Z_BEST_COMPRESSION)
 
-                analysis_info_rows = session \
-                    .query(AnalysisInfo) \
-                    .filter(AnalysisInfo.analyzer_command == cmd) \
-                    .all()
-
-                if analysis_info_rows:
-                    # It is possible when multiple runs are stored
-                    # simultaneously to the server with the same analysis
-                    # command that multiple entries are stored into the
-                    # database. In this case we will select the first one.
-                    analysis_info = analysis_info_rows[0]
-                else:
-                    analysis_info = AnalysisInfo(analyzer_command=cmd)
-
-                    # Obtain the ID eagerly to be able to use the M-to-N table.
-                    session.add(analysis_info)
-                    session.flush()
-                    session.refresh(analysis_info, ["id"])
-
-                    for analyzer in mip.analyzers:
-                        q = session \
-                            .query(Checker) \
-                            .filter(Checker.analyzer_name == analyzer)
-                        db_checkers = {r.checker_name: r for r in q.all()}
-
-                        connection_rows = [AnalysisInfoChecker(
-                            analysis_info, db_checkers[chk], is_enabled)
-                            for chk, is_enabled
-                            in mip.checkers.get(analyzer, {}).items()]
-                        for r in connection_rows:
-                            session.add(r)
+                analysis_info = AnalysisInfo(analyzer_command=cmd)
+
+                # Obtain the ID eagerly to be able to use the M-to-N table.
+                session.add(analysis_info)
+                session.flush()
+                session.refresh(analysis_info, ["id"])
+
+                for analyzer in mip.analyzers:
+                    q = session \
+                        .query(Checker) \
+                        .filter(Checker.analyzer_name == analyzer)
+                    db_checkers = {r.checker_name: r for r in q.all()}
+
+                    connection_rows = [AnalysisInfoChecker(
+                        analysis_info, db_checkers[chk], is_enabled)
+                        for chk, is_enabled
+                        in mip.checkers.get(analyzer, {}).items()]
+                    for r in connection_rows:
+                        session.add(r)
+
+                if mip.report_dir_path:
+                    self.__store_analysis_info_files(session,
+                                                     analysis_info.id,
+                                                     mip.report_dir_path)
 
                 run_history.analysis_info.append(analysis_info)
                 self.__analysis_info[src_dir_path] = analysis_info
diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py
index 373f9f09fa..3dda35053f 100644
--- a/web/server/codechecker_server/api/report_server.py
+++ b/web/server/codechecker_server/api/report_server.py
@@ -63,7 +63,8 @@
 from ..database.config_db_model import Product
 from ..database.database import conv, DBSession, escape_like
 from ..database.run_db_model import \
-    AnalysisInfo, AnalysisInfoChecker as DB_AnalysisInfoChecker, \
+    AnalysisInfo, \
+    AnalysisInfoChecker as DB_AnalysisInfoChecker, AnalysisInfoFile, \
     AnalyzerStatistic, \
     BugPathEvent, BugReportPoint, \
     CleanupPlan, CleanupPlanReportHash, Checker, Comment, \
@@ -1723,6 +1724,19 @@ def getAnalysisInfo(self, analysis_info_filter, limit, offset):
                         checkers[analyzer][checker] = API_AnalysisInfoChecker(
                             enabled=enabled)
 
+                    analysis_config_files = session \
+                        .query(AnalysisInfoFile.filename,
+                               FileContent.content) \
+                        .join(FileContent, AnalysisInfoFile.content_hash
+                              == FileContent.content_hash) \
+                        .filter(AnalysisInfoFile.analysis_info_id
+                                == cmd.id).all()
+
+                    # Append analysis files to the command string
+                    for filename, content in analysis_config_files:
+                        command += f"\n\n{filename}:\n"
+                        command += zlib.decompress(content).decode("utf-8")
+
                     res.append(ttypes.AnalysisInfo(
                         analyzerCommand=html.escape(command),
                         checkers=checkers))
diff --git a/web/server/codechecker_server/database/db_cleanup.py b/web/server/codechecker_server/database/db_cleanup.py
index 638e9c9a3b..9569e414c5 100644
--- a/web/server/codechecker_server/database/db_cleanup.py
+++ b/web/server/codechecker_server/database/db_cleanup.py
@@ -13,6 +13,7 @@
 from typing import Dict
 
 import sqlalchemy
+from sqlalchemy import union
 
 from codechecker_api.codeCheckerDBAccess_v6.ttypes import Severity
 
@@ -21,7 +22,7 @@
 
 from .database import DBSession
 from .run_db_model import \
-    AnalysisInfo, \
+    AnalysisInfo, AnalysisInfoFile, \
     BugPathEvent, BugReportPoint, \
     Comment, Checker, \
     File, FileContent, \
@@ -108,8 +109,9 @@ def remove_unused_files(product):
             if total_count:
                 LOG.debug("%d dangling files deleted.", total_count)
 
-            files = session.query(File.content_hash) \
-                .group_by(File.content_hash)
+            files = union(
+                session.query(File.content_hash),
+                session.query(AnalysisInfoFile.content_hash))
 
             session.query(FileContent) \
                 .filter(FileContent.content_hash.notin_(files)) \
diff --git a/web/server/codechecker_server/database/run_db_model.py b/web/server/codechecker_server/database/run_db_model.py
index 4d346c9513..6884a00a14 100644
--- a/web/server/codechecker_server/database/run_db_model.py
+++ b/web/server/codechecker_server/database/run_db_model.py
@@ -79,12 +79,41 @@ def __init__(self,
         self.enabled = is_enabled
 
 
+class AnalysisInfoFile(Base):
+    __tablename__ = "analysis_info_files"
+
+    analysis_info_id = Column(Integer,
+                              ForeignKey("analysis_info.id",
+                                         deferrable=True,
+                                         initially="DEFERRED",
+                                         ondelete="CASCADE"),
+                              primary_key=True)
+
+    filename = Column(String, nullable=False)
+
+    content_hash = Column(String,
+                          ForeignKey("file_contents.content_hash",
+                                     deferrable=True,
+                                     initially="DEFERRED",
+                                     ondelete="CASCADE"),
+                          primary_key=True)
+
+    def __init__(self,
+                 analysis_info_id: int,
+                 filename: str,
+                 content_hash: str):
+        self.analysis_info_id = analysis_info_id
+        self.filename = filename
+        self.content_hash = content_hash
+
+
 class AnalysisInfo(Base):
     __tablename__ = "analysis_info"
 
     id = Column(Integer, autoincrement=True, primary_key=True)
     analyzer_command = Column(LargeBinary)
     available_checkers = relationship(AnalysisInfoChecker, uselist=True)
+    analyzer_files = relationship(AnalysisInfoFile, uselist=True)
 
     def __init__(self, analyzer_command: bytes):
         self.analyzer_command = analyzer_command
diff --git a/web/server/codechecker_server/metadata.py b/web/server/codechecker_server/metadata.py
index 02128f2805..c78e31b193 100644
--- a/web/server/codechecker_server/metadata.py
+++ b/web/server/codechecker_server/metadata.py
@@ -66,6 +66,8 @@ def __init__(self, metadata_file_path):
         self.disabled_checkers: DisabledCheckers = set()
         self.checker_to_analyzer: CheckerToAnalyzer = {}
 
+        self.report_dir_path = None
+
         self.__metadata_dict: Dict[str, Any] = {}
         if os.path.isfile(metadata_file_path):
             self.__metadata_dict = cast(Dict[str, Any],
@@ -184,6 +186,9 @@ def __process_metadata_info_v2(self):
             if tool['name'] == 'codechecker' and 'version' in tool:
                 cc_versions.add(tool['version'])
 
+            if tool['name'] == 'codechecker':
+                self.report_dir_path = tool.get('output_path')
+
             if 'command' in tool:
                 check_commands.add(' '.join(tool['command']))
 
diff --git a/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py b/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py
new file mode 100644
index 0000000000..96f8b65e4e
--- /dev/null
+++ b/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py
@@ -0,0 +1,51 @@
+"""
+Add analysis_info_files table
+
+Revision ID: 29e5047b6513
+Revises:     198654dac219
+Create Date: 2026-03-05 17:35:36.286847
+"""
+
+from logging import getLogger
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# Revision identifiers, used by Alembic.
+revision = '29e5047b6513'
+down_revision = '198654dac219'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    LOG = getLogger("migration/report")
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        'analysis_info_files',
+        sa.Column('analysis_info_id', sa.Integer(), nullable=False),
+        sa.Column('filename', sa.String(), nullable=False),
+        sa.Column('content_hash', sa.String(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ['analysis_info_id'], ['analysis_info.id'],
+            name=op.f(
+                'fk_analysis_info_files_analysis_info_id_analysis_info'),
+            ondelete='CASCADE', initially='DEFERRED', deferrable=True),
+        sa.ForeignKeyConstraint(
+            ['content_hash'], ['file_contents.content_hash'],
+            name=op.f(
+                'fk_analysis_info_files_content_hash_file_contents'),
+            ondelete='CASCADE', initially='DEFERRED', deferrable=True),
+        sa.PrimaryKeyConstraint(
+            'analysis_info_id', 'content_hash',
+            name=op.f('pk_analysis_info_files'))
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    LOG = getLogger("migration/report")
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('analysis_info_files')
+    # ### end Alembic commands ###
diff --git a/web/tests/functional/store/test_proj/divide_zero/skipfile b/web/tests/functional/store/test_proj/divide_zero/skipfile
new file mode 100644
index 0000000000..4bf46ff0fd
--- /dev/null
+++ b/web/tests/functional/store/test_proj/divide_zero/skipfile
@@ -0,0 +1 @@
+-*.txt
diff --git a/web/tests/functional/store/test_store.py b/web/tests/functional/store/test_store.py
index 18d7e42d0d..9718f1a3b0 100644
--- a/web/tests/functional/store/test_store.py
+++ b/web/tests/functional/store/test_store.py
@@ -264,6 +264,8 @@ def test_store_multiple_report_dirs(self):
         cfg['reportdir'] = report_dir1
         cfg['checkers'] = [
             '-d', 'core.DivideZero', '-e', 'deadcode.DeadStores']
+        cfg['skip_file'] = os.path.join(self._divide_zero_workspace,
+                                        'skipfile')
         codechecker.analyze(cfg, self._divide_zero_workspace)
 
         with open(os.path.join(report_dir1, 'metadata.json'), 'r+',
@@ -278,6 +280,7 @@ def test_store_multiple_report_dirs(self):
         cfg['reportdir'] = report_dir2
         cfg['checkers'] = [
             '-e', 'core.DivideZero', '-d', 'deadcode.DeadStores']
+        cfg.pop('skip_file')
         codechecker.analyze(cfg, self._divide_zero_workspace)
 
         def store_multiple_report_dirs(report_dirs):
@@ -323,12 +326,28 @@ def store_multiple_report_dirs(report_dirs):
             analysis_info_filter = AnalysisInfoFilter(runId=report['runId'])
             analysis_info = self._cc_client.getAnalysisInfo(
                 analysis_info_filter, limit, offset)
+
             self.assertEqual(len(analysis_info), 2)
             self.assertTrue(
                 any(report_dir1 in i.analyzerCommand for i in analysis_info))
             self.assertTrue(
                 any(report_dir2 in i.analyzerCommand for i in analysis_info))
 
+            # Skip file content
+            skip_file_info = "skipfile:\n-*.txt"
+
+            # During the analysis of report_dir1, we used a skipfile,
+            # and the skipfile content should appear in the analyzer command.
+            self.assertTrue(all(skip_file_info in
+                                i.analyzerCommand for i in analysis_info
+                                if report_dir1 in i.analyzerCommand))
+
+            # No skipfile was used during the analysis of report_dir2,
+            # so we shouldn't see skipfile content in this case.
+            self.assertFalse(any(skip_file_info in
+                                 i.analyzerCommand for i in analysis_info
+                                 if report_dir2 in i.analyzerCommand))
+
             self.assertTrue(all(
                 '<' not in i.analyzerCommand for i in analysis_info))
             self.assertTrue(any(