diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index a68f99425cb..8398d0b99c5 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1900,6 +1900,7 @@ version = "0.0.0" dependencies = [ "anyhow", "clap", + "codex-utils-absolute-path", "multimap", "pretty_assertions", "serde", diff --git a/codex-rs/config/src/config_requirements.rs b/codex-rs/config/src/config_requirements.rs index db81b45e355..d895ef07365 100644 --- a/codex-rs/config/src/config_requirements.rs +++ b/codex-rs/config/src/config_requirements.rs @@ -1147,6 +1147,7 @@ mod tests { matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["rm"]), decision: Decision::Forbidden, + resolved_program: None, justification: None, }], } diff --git a/codex-rs/core/src/config_loader/tests.rs b/codex-rs/core/src/config_loader/tests.rs index 3eab05ba183..72c4b287303 100644 --- a/codex-rs/core/src/config_loader/tests.rs +++ b/codex-rs/core/src/config_loader/tests.rs @@ -1390,6 +1390,7 @@ prefix_rules = [ matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["rm"]), decision: Decision::Forbidden, + resolved_program: None, justification: None, }], } @@ -1415,6 +1416,7 @@ prefix_rules = [ matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "status"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }], } @@ -1426,6 +1428,7 @@ prefix_rules = [ matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["hg", "status"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }], } @@ -1509,6 +1512,7 @@ prefix_rules = [] matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], decision: Decision::Forbidden, + resolved_program: None, justification: None, }], } @@ -1547,6 +1551,7 @@ prefix_rules = [] matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], decision: Decision::Forbidden, + resolved_program: None, justification: None, }], } @@ -1561,6 +1566,7 @@ prefix_rules = [] matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["git".to_string(), "push".to_string()], decision: Decision::Prompt, + resolved_program: None, justification: None, }], } diff --git a/codex-rs/core/src/exec_policy.rs b/codex-rs/core/src/exec_policy.rs index e390996c4e4..b781f0bc7de 100644 --- a/codex-rs/core/src/exec_policy.rs +++ b/codex-rs/core/src/exec_policy.rs @@ -472,17 +472,7 @@ pub async fn load_exec_policy(config_stack: &ConfigLayerStack) -> Result String { + let mut path = if cfg!(windows) { + PathBuf::from(r"C:\") + } else { + PathBuf::from("/") + }; + for segment in segments { + path.push(segment); + } + path.to_string_lossy().into_owned() + } + + fn starlark_string(value: &str) -> String { + value.replace('\\', "\\\\").replace('"', "\\\"") + } + #[tokio::test] async fn returns_empty_policy_when_no_policy_files_exist() { let temp_dir = tempdir().expect("create temp dir"); @@ -949,6 +956,7 @@ mod tests { matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], decision: Decision::Forbidden, + resolved_program: None, justification: None, }], }, @@ -991,6 +999,59 @@ mod tests { Ok(()) } + #[tokio::test] + async fn preserves_host_executables_when_requirements_overlay_is_present() -> anyhow::Result<()> + { + let temp_dir = tempdir()?; + let policy_dir = temp_dir.path().join(RULES_DIR_NAME); + fs::create_dir_all(&policy_dir)?; + let git_path = host_absolute_path(&["usr", "bin", "git"]); + let git_path_literal = starlark_string(&git_path); + fs::write( + policy_dir.join("host.rules"), + format!( + r#" +host_executable(name = "git", paths = ["{git_path_literal}"]) +"# + ), + )?; + + let mut requirements_exec_policy = Policy::empty(); + requirements_exec_policy.add_network_rule( + "blocked.example.com", + codex_execpolicy::NetworkRuleProtocol::Https, + Decision::Forbidden, + None, + )?; + + let requirements = ConfigRequirements { + exec_policy: Some(codex_config::Sourced::new( + codex_config::RequirementsExecPolicy::new(requirements_exec_policy), + codex_config::RequirementSource::Unknown, + )), + ..ConfigRequirements::default() + }; + let dot_codex_folder = AbsolutePathBuf::from_absolute_path(temp_dir.path())?; + let layer = ConfigLayerEntry::new( + ConfigLayerSource::Project { dot_codex_folder }, + TomlValue::Table(Default::default()), + ); + let config_stack = + ConfigLayerStack::new(vec![layer], requirements, ConfigRequirementsToml::default())?; + + let policy = load_exec_policy(&config_stack).await?; + + assert_eq!( + policy + .host_executables() + .get("git") + .expect("missing git host executable") + .as_ref(), + [AbsolutePathBuf::try_from(git_path)?] + ); + Ok(()) + } + #[tokio::test] async fn ignores_policies_outside_policy_dir() { let temp_dir = tempdir().expect("create temp dir"); @@ -1106,6 +1167,7 @@ mod tests { matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["rm".to_string()], decision: Decision::Forbidden, + resolved_program: None, justification: None, }], }, @@ -1117,6 +1179,7 @@ mod tests { matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["ls".to_string()], decision: Decision::Prompt, + resolved_program: None, justification: None, }], }, @@ -1983,6 +2046,7 @@ prefix_rule( let matched_rules_prompt = vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["cargo".to_string()], decision: Decision::Prompt, + resolved_program: None, justification: None, }]; assert_eq!( @@ -1996,6 +2060,7 @@ prefix_rule( let matched_rules_allow = vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["cargo".to_string()], decision: Decision::Allow, + resolved_program: None, justification: None, }]; assert_eq!( @@ -2009,6 +2074,7 @@ prefix_rule( let matched_rules_forbidden = vec![RuleMatch::PrefixRuleMatch { matched_prefix: vec!["cargo".to_string()], decision: Decision::Forbidden, + resolved_program: None, justification: None, }]; assert_eq!( diff --git a/codex-rs/execpolicy/Cargo.toml b/codex-rs/execpolicy/Cargo.toml index 890c23faa79..2105ce27d53 100644 --- a/codex-rs/execpolicy/Cargo.toml +++ b/codex-rs/execpolicy/Cargo.toml @@ -19,6 +19,7 @@ workspace = true [dependencies] anyhow = { workspace = true } clap = { workspace = true, features = ["derive"] } +codex-utils-absolute-path = { workspace = true } multimap = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/codex-rs/execpolicy/README.md b/codex-rs/execpolicy/README.md index 30fc57184bb..a57276767a4 100644 --- a/codex-rs/execpolicy/README.md +++ b/codex-rs/execpolicy/README.md @@ -2,8 +2,8 @@ ## Overview -- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, justification?, match?, not_match?)`. -- This release covers the prefix-rule subset of the execpolicy language; a richer language will follow. +- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, justification?, match?, not_match?)` plus `host_executable(name=..., paths=[...])`. +- This release covers the prefix-rule subset of the execpolicy language plus host executable metadata; a richer language will follow. - Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`. - `justification` is an optional human-readable rationale for why a rule exists. It can be provided for any `decision` and may be surfaced in different contexts (for example, in approval prompts or rejection messages). When `decision = "forbidden"` is used, include a recommended alternative in the `justification`, when appropriate (e.g., ``"Use `jj` instead of `git`."``). - `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`). @@ -24,6 +24,27 @@ prefix_rule( ) ``` +- Host executable metadata can optionally constrain which absolute paths may + resolve through basename rules: + +```starlark +host_executable( + name = "git", + paths = [ + "/Users/example/.openai/bin/git", + "/opt/homebrew/bin/git", + "/usr/bin/git", + ], +) +``` + +- Matching semantics: + - execpolicy always tries exact first-token matches first. + - With host-executable resolution disabled, `/usr/bin/git status` only matches a rule whose first token is `/usr/bin/git`. + - With host-executable resolution enabled, if no exact rule matches, execpolicy may fall back from `/usr/bin/git` to basename rules for `git`. + - If `host_executable(name="git", ...)` exists, basename fallback is only allowed for listed absolute paths. + - If no `host_executable()` entry exists for a basename, basename fallback is allowed. + ## CLI - From the Codex CLI, run `codex execpolicy check` subcommand with one or more policy files (for example `src/default.rules`) to check a command: @@ -32,6 +53,15 @@ prefix_rule( codex execpolicy check --rules path/to/policy.rules git status ``` +- To opt into basename fallback for absolute program paths, pass `--resolve-host-executables`: + +```bash +codex execpolicy check \ + --rules path/to/policy.rules \ + --resolve-host-executables \ + /usr/bin/git status +``` + - Pass multiple `--rules` flags to merge rules, evaluated in the order provided, and use `--pretty` for formatted JSON. - You can also run the standalone dev binary directly during development: @@ -52,6 +82,7 @@ cargo run -p codex-execpolicy -- check --rules path/to/policy.rules git status "prefixRuleMatch": { "matchedPrefix": ["", "..."], "decision": "allow|prompt|forbidden", + "resolvedProgram": "/absolute/path/to/program", "justification": "..." } } @@ -62,6 +93,7 @@ cargo run -p codex-execpolicy -- check --rules path/to/policy.rules git status - When no rules match, `matchedRules` is an empty array and `decision` is omitted. - `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched. +- `resolvedProgram` is omitted unless an absolute executable path matched via basename fallback. - The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`). Note: `execpolicy` commands are still in preview. The API may have breaking changes in the future. diff --git a/codex-rs/execpolicy/src/error.rs b/codex-rs/execpolicy/src/error.rs index 25f9227210d..0f2be753aa6 100644 --- a/codex-rs/execpolicy/src/error.rs +++ b/codex-rs/execpolicy/src/error.rs @@ -38,16 +38,47 @@ pub enum Error { ExampleDidNotMatch { rules: Vec, examples: Vec, + location: Option, }, #[error("expected example to not match rule `{rule}`: {example}")] - ExampleDidMatch { rule: String, example: String }, + ExampleDidMatch { + rule: String, + example: String, + location: Option, + }, #[error("starlark error: {0}")] Starlark(StarlarkError), } impl Error { + pub fn with_location(self, location: ErrorLocation) -> Self { + match self { + Error::ExampleDidNotMatch { + rules, + examples, + location: None, + } => Error::ExampleDidNotMatch { + rules, + examples, + location: Some(location), + }, + Error::ExampleDidMatch { + rule, + example, + location: None, + } => Error::ExampleDidMatch { + rule, + example, + location: Some(location), + }, + other => other, + } + } + pub fn location(&self) -> Option { match self { + Error::ExampleDidNotMatch { location, .. } + | Error::ExampleDidMatch { location, .. } => location.clone(), Error::Starlark(err) => err.span().map(|span| { let resolved = span.resolve_span(); ErrorLocation { diff --git a/codex-rs/execpolicy/src/execpolicycheck.rs b/codex-rs/execpolicy/src/execpolicycheck.rs index f191f4b12e8..1ff8937a5d9 100644 --- a/codex-rs/execpolicy/src/execpolicycheck.rs +++ b/codex-rs/execpolicy/src/execpolicycheck.rs @@ -7,6 +7,7 @@ use clap::Parser; use serde::Serialize; use crate::Decision; +use crate::MatchOptions; use crate::Policy; use crate::PolicyParser; use crate::RuleMatch; @@ -22,6 +23,11 @@ pub struct ExecPolicyCheckCommand { #[arg(long)] pub pretty: bool, + /// Resolve absolute program paths against basename rules, gated by any + /// `host_executable()` definitions in the loaded policy files. + #[arg(long)] + pub resolve_host_executables: bool, + /// Command tokens to check against the policy. #[arg( value_name = "COMMAND", @@ -36,7 +42,13 @@ impl ExecPolicyCheckCommand { /// Load the policies for this command, evaluate the command, and render JSON output. pub fn run(&self) -> Result<()> { let policy = load_policies(&self.rules)?; - let matched_rules = policy.matches_for_command(&self.command, None); + let matched_rules = policy.matches_for_command_with_options( + &self.command, + None, + &MatchOptions { + resolve_host_executables: self.resolve_host_executables, + }, + ); let json = format_matches_json(&matched_rules, self.pretty)?; println!("{json}"); diff --git a/codex-rs/execpolicy/src/executable_name.rs b/codex-rs/execpolicy/src/executable_name.rs new file mode 100644 index 00000000000..bb3e8a536c2 --- /dev/null +++ b/codex-rs/execpolicy/src/executable_name.rs @@ -0,0 +1,29 @@ +use std::path::Path; + +#[cfg(windows)] +const WINDOWS_EXECUTABLE_SUFFIXES: [&str; 4] = [".exe", ".cmd", ".bat", ".com"]; + +pub(crate) fn executable_lookup_key(raw: &str) -> String { + #[cfg(windows)] + { + let raw = raw.to_ascii_lowercase(); + for suffix in WINDOWS_EXECUTABLE_SUFFIXES { + if raw.ends_with(suffix) { + let stripped_len = raw.len() - suffix.len(); + return raw[..stripped_len].to_string(); + } + } + raw + } + + #[cfg(not(windows))] + { + raw.to_string() + } +} + +pub(crate) fn executable_path_lookup_key(path: &Path) -> Option { + path.file_name() + .and_then(|name| name.to_str()) + .map(executable_lookup_key) +} diff --git a/codex-rs/execpolicy/src/lib.rs b/codex-rs/execpolicy/src/lib.rs index a9e9faa56de..8cb89c36e43 100644 --- a/codex-rs/execpolicy/src/lib.rs +++ b/codex-rs/execpolicy/src/lib.rs @@ -2,6 +2,7 @@ pub mod amend; pub mod decision; pub mod error; pub mod execpolicycheck; +mod executable_name; pub mod parser; pub mod policy; pub mod rule; @@ -18,6 +19,7 @@ pub use error::TextRange; pub use execpolicycheck::ExecPolicyCheckCommand; pub use parser::PolicyParser; pub use policy::Evaluation; +pub use policy::MatchOptions; pub use policy::Policy; pub use rule::NetworkRuleProtocol; pub use rule::Rule; diff --git a/codex-rs/execpolicy/src/parser.rs b/codex-rs/execpolicy/src/parser.rs index 8490c4848c7..46764d0e07b 100644 --- a/codex-rs/execpolicy/src/parser.rs +++ b/codex-rs/execpolicy/src/parser.rs @@ -1,6 +1,8 @@ +use codex_utils_absolute_path::AbsolutePathBuf; use multimap::MultiMap; use shlex; use starlark::any::ProvidesStaticType; +use starlark::codemap::FileSpan; use starlark::environment::GlobalsBuilder; use starlark::environment::Module; use starlark::eval::Evaluator; @@ -13,11 +15,18 @@ use starlark::values::list::UnpackList; use starlark::values::none::NoneType; use std::cell::RefCell; use std::cell::RefMut; +use std::collections::HashMap; +use std::path::Path; use std::sync::Arc; use crate::decision::Decision; use crate::error::Error; +use crate::error::ErrorLocation; use crate::error::Result; +use crate::error::TextPosition; +use crate::error::TextRange; +use crate::executable_name::executable_lookup_key; +use crate::executable_name::executable_path_lookup_key; use crate::rule::NetworkRule; use crate::rule::NetworkRuleProtocol; use crate::rule::PatternToken; @@ -47,6 +56,7 @@ impl PolicyParser { /// Parses a policy, tagging parser errors with `policy_identifier` so failures include the /// identifier alongside line numbers. pub fn parse(&mut self, policy_identifier: &str, policy_file_contents: &str) -> Result<()> { + let pending_validation_count = self.builder.borrow().pending_example_validations.len(); let mut dialect = Dialect::Extended.clone(); dialect.enable_f_strings = true; let ast = AstModule::parse( @@ -62,6 +72,9 @@ impl PolicyParser { eval.extra = Some(&self.builder); eval.eval_module(ast, &globals).map_err(Error::Starlark)?; } + self.builder + .borrow() + .validate_pending_examples_from(pending_validation_count)?; Ok(()) } @@ -74,6 +87,8 @@ impl PolicyParser { struct PolicyBuilder { rules_by_program: MultiMap, network_rules: Vec, + host_executables_by_name: HashMap>, + pending_example_validations: Vec, } impl PolicyBuilder { @@ -81,6 +96,8 @@ impl PolicyBuilder { Self { rules_by_program: MultiMap::new(), network_rules: Vec::new(), + host_executables_by_name: HashMap::new(), + pending_example_validations: Vec::new(), } } @@ -93,11 +110,64 @@ impl PolicyBuilder { self.network_rules.push(rule); } + fn add_host_executable(&mut self, name: String, paths: Vec) { + self.host_executables_by_name.insert(name, paths.into()); + } + + fn add_pending_example_validation( + &mut self, + rules: Vec, + matches: Vec>, + not_matches: Vec>, + location: Option, + ) { + self.pending_example_validations + .push(PendingExampleValidation { + rules, + matches, + not_matches, + location, + }); + } + + fn validate_pending_examples_from(&self, start: usize) -> Result<()> { + for validation in &self.pending_example_validations[start..] { + let mut rules_by_program = MultiMap::new(); + for rule in &validation.rules { + rules_by_program.insert(rule.program().to_string(), rule.clone()); + } + + let policy = crate::policy::Policy::from_parts( + rules_by_program, + Vec::new(), + self.host_executables_by_name.clone(), + ); + validate_not_match_examples(&policy, &validation.rules, &validation.not_matches) + .map_err(|error| attach_validation_location(error, validation.location.clone()))?; + validate_match_examples(&policy, &validation.rules, &validation.matches) + .map_err(|error| attach_validation_location(error, validation.location.clone()))?; + } + + Ok(()) + } + fn build(self) -> crate::policy::Policy { - crate::policy::Policy::from_parts(self.rules_by_program, self.network_rules) + crate::policy::Policy::from_parts( + self.rules_by_program, + self.network_rules, + self.host_executables_by_name, + ) } } +#[derive(Debug)] +struct PendingExampleValidation { + rules: Vec, + matches: Vec>, + not_matches: Vec>, + location: Option, +} + fn parse_pattern<'v>(pattern: UnpackList>) -> Result> { let tokens: Vec = pattern .items @@ -150,6 +220,36 @@ fn parse_examples<'v>(examples: UnpackList>) -> Result examples.items.into_iter().map(parse_example).collect() } +fn parse_literal_absolute_path(raw: &str) -> Result { + if !Path::new(raw).is_absolute() { + return Err(Error::InvalidRule(format!( + "host_executable paths must be absolute (got {raw})" + ))); + } + + AbsolutePathBuf::try_from(raw.to_string()) + .map_err(|error| Error::InvalidRule(format!("invalid absolute path `{raw}`: {error}"))) +} + +fn validate_host_executable_name(name: &str) -> Result<()> { + if name.is_empty() { + return Err(Error::InvalidRule( + "host_executable name cannot be empty".to_string(), + )); + } + + let path = Path::new(name); + if path.components().count() != 1 + || path.file_name().and_then(|value| value.to_str()) != Some(name) + { + return Err(Error::InvalidRule(format!( + "host_executable name must be a bare executable name (got {name})" + ))); + } + + Ok(()) +} + fn parse_network_rule_decision(raw: &str) -> Result { match raw { "deny" => Ok(Decision::Forbidden), @@ -157,6 +257,30 @@ fn parse_network_rule_decision(raw: &str) -> Result { } } +fn error_location_from_file_span(span: FileSpan) -> ErrorLocation { + let resolved = span.resolve_span(); + ErrorLocation { + path: span.filename().to_string(), + range: TextRange { + start: TextPosition { + line: resolved.begin.line + 1, + column: resolved.begin.column + 1, + }, + end: TextPosition { + line: resolved.end.line + 1, + column: resolved.end.column + 1, + }, + }, + } +} + +fn attach_validation_location(error: Error, location: Option) -> Error { + match location { + Some(location) => error.with_location(location), + None => error, + } +} + fn parse_example<'v>(value: Value<'v>) -> Result> { if let Some(raw) = value.unpack_str() { parse_string_example(raw) @@ -251,6 +375,9 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { .map(parse_examples) .transpose()? .unwrap_or_default(); + let location = eval + .call_stack_top_location() + .map(error_location_from_file_span); let mut builder = policy_builder(eval); @@ -275,9 +402,7 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { }) .collect(); - validate_not_match_examples(&rules, ¬_matches)?; - validate_match_examples(&rules, &matches)?; - + builder.add_pending_example_validation(rules.clone(), matches, not_matches, location); rules.into_iter().for_each(|rule| builder.add_rule(rule)); Ok(NoneType) } @@ -308,4 +433,41 @@ fn policy_builtins(builder: &mut GlobalsBuilder) { }); Ok(NoneType) } + + fn host_executable<'v>( + name: &'v str, + paths: UnpackList>, + eval: &mut Evaluator<'v, '_, '_>, + ) -> anyhow::Result { + validate_host_executable_name(name)?; + + let mut parsed_paths = Vec::new(); + for value in paths.items { + let raw = value.unpack_str().ok_or_else(|| { + Error::InvalidRule(format!( + "host_executable paths must be strings (got {})", + value.get_type() + )) + })?; + let path = parse_literal_absolute_path(raw)?; + let Some(path_name) = executable_path_lookup_key(path.as_path()) else { + return Err(Error::InvalidRule(format!( + "host_executable path `{raw}` must have basename `{name}`" + )) + .into()); + }; + if path_name != executable_lookup_key(name) { + return Err(Error::InvalidRule(format!( + "host_executable path `{raw}` must have basename `{name}`" + )) + .into()); + } + if !parsed_paths.iter().any(|existing| existing == &path) { + parsed_paths.push(path); + } + } + + policy_builder(eval).add_host_executable(executable_lookup_key(name), parsed_paths); + Ok(NoneType) + } } diff --git a/codex-rs/execpolicy/src/policy.rs b/codex-rs/execpolicy/src/policy.rs index faa74cc517c..3102264e391 100644 --- a/codex-rs/execpolicy/src/policy.rs +++ b/codex-rs/execpolicy/src/policy.rs @@ -1,6 +1,7 @@ use crate::decision::Decision; use crate::error::Error; use crate::error::Result; +use crate::executable_name::executable_path_lookup_key; use crate::rule::NetworkRule; use crate::rule::NetworkRuleProtocol; use crate::rule::PatternToken; @@ -9,31 +10,41 @@ use crate::rule::PrefixRule; use crate::rule::RuleMatch; use crate::rule::RuleRef; use crate::rule::normalize_network_rule_host; +use codex_utils_absolute_path::AbsolutePathBuf; use multimap::MultiMap; use serde::Deserialize; use serde::Serialize; +use std::collections::HashMap; use std::sync::Arc; type HeuristicsFallback<'a> = Option<&'a dyn Fn(&[String]) -> Decision>; +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct MatchOptions { + pub resolve_host_executables: bool, +} + #[derive(Clone, Debug)] pub struct Policy { rules_by_program: MultiMap, network_rules: Vec, + host_executables_by_name: HashMap>, } impl Policy { pub fn new(rules_by_program: MultiMap) -> Self { - Self::from_parts(rules_by_program, Vec::new()) + Self::from_parts(rules_by_program, Vec::new(), HashMap::new()) } pub fn from_parts( rules_by_program: MultiMap, network_rules: Vec, + host_executables_by_name: HashMap>, ) -> Self { Self { rules_by_program, network_rules, + host_executables_by_name, } } @@ -49,6 +60,10 @@ impl Policy { &self.network_rules } + pub fn host_executables(&self) -> &HashMap> { + &self.host_executables_by_name + } + pub fn get_allowed_prefixes(&self) -> Vec> { let mut prefixes = Vec::new(); @@ -119,6 +134,36 @@ impl Policy { Ok(()) } + pub fn set_host_executable_paths(&mut self, name: String, paths: Vec) { + self.host_executables_by_name.insert(name, paths.into()); + } + + pub fn merge_overlay(&self, overlay: &Policy) -> Policy { + let mut combined_rules = self.rules_by_program.clone(); + for (program, rules) in overlay.rules_by_program.iter_all() { + for rule in rules { + combined_rules.insert(program.clone(), rule.clone()); + } + } + + let mut combined_network_rules = self.network_rules.clone(); + combined_network_rules.extend(overlay.network_rules.iter().cloned()); + + let mut host_executables_by_name = self.host_executables_by_name.clone(); + host_executables_by_name.extend( + overlay + .host_executables_by_name + .iter() + .map(|(name, paths)| (name.clone(), paths.clone())), + ); + + Policy::from_parts( + combined_rules, + combined_network_rules, + host_executables_by_name, + ) + } + pub fn compiled_network_domains(&self) -> (Vec, Vec) { let mut allowed = Vec::new(); let mut denied = Vec::new(); @@ -144,7 +189,25 @@ impl Policy { where F: Fn(&[String]) -> Decision, { - let matched_rules = self.matches_for_command(cmd, Some(heuristics_fallback)); + let matched_rules = self.matches_for_command_with_options( + cmd, + Some(heuristics_fallback), + &MatchOptions::default(), + ); + Evaluation::from_matches(matched_rules) + } + + pub fn check_with_options( + &self, + cmd: &[String], + heuristics_fallback: &F, + options: &MatchOptions, + ) -> Evaluation + where + F: Fn(&[String]) -> Decision, + { + let matched_rules = + self.matches_for_command_with_options(cmd, Some(heuristics_fallback), options); Evaluation::from_matches(matched_rules) } @@ -154,6 +217,20 @@ impl Policy { commands: Commands, heuristics_fallback: &F, ) -> Evaluation + where + Commands: IntoIterator, + Commands::Item: AsRef<[String]>, + F: Fn(&[String]) -> Decision, + { + self.check_multiple_with_options(commands, heuristics_fallback, &MatchOptions::default()) + } + + pub fn check_multiple_with_options( + &self, + commands: Commands, + heuristics_fallback: &F, + options: &MatchOptions, + ) -> Evaluation where Commands: IntoIterator, Commands::Item: AsRef<[String]>, @@ -162,7 +239,11 @@ impl Policy { let matched_rules: Vec = commands .into_iter() .flat_map(|command| { - self.matches_for_command(command.as_ref(), Some(heuristics_fallback)) + self.matches_for_command_with_options( + command.as_ref(), + Some(heuristics_fallback), + options, + ) }) .collect(); @@ -181,14 +262,25 @@ impl Policy { cmd: &[String], heuristics_fallback: HeuristicsFallback<'_>, ) -> Vec { - let matched_rules: Vec = match cmd.first() { - Some(first) => self - .rules_by_program - .get_vec(first) - .map(|rules| rules.iter().filter_map(|rule| rule.matches(cmd)).collect()) - .unwrap_or_default(), - None => Vec::new(), - }; + self.matches_for_command_with_options(cmd, heuristics_fallback, &MatchOptions::default()) + } + + pub fn matches_for_command_with_options( + &self, + cmd: &[String], + heuristics_fallback: HeuristicsFallback<'_>, + options: &MatchOptions, + ) -> Vec { + let matched_rules = self + .match_exact_rules(cmd) + .filter(|matched_rules| !matched_rules.is_empty()) + .or_else(|| { + options + .resolve_host_executables + .then(|| self.match_host_executable_rules(cmd)) + .filter(|matched_rules| !matched_rules.is_empty()) + }) + .unwrap_or_default(); if matched_rules.is_empty() && let Some(heuristics_fallback) = heuristics_fallback @@ -201,6 +293,45 @@ impl Policy { matched_rules } } + + fn match_exact_rules(&self, cmd: &[String]) -> Option> { + let first = cmd.first()?; + Some( + self.rules_by_program + .get_vec(first) + .map(|rules| rules.iter().filter_map(|rule| rule.matches(cmd)).collect()) + .unwrap_or_default(), + ) + } + + fn match_host_executable_rules(&self, cmd: &[String]) -> Vec { + let Some(first) = cmd.first() else { + return Vec::new(); + }; + let Ok(program) = AbsolutePathBuf::try_from(first.clone()) else { + return Vec::new(); + }; + let Some(basename) = executable_path_lookup_key(program.as_path()) else { + return Vec::new(); + }; + let Some(rules) = self.rules_by_program.get_vec(&basename) else { + return Vec::new(); + }; + if let Some(paths) = self.host_executables_by_name.get(&basename) + && !paths.iter().any(|path| path == &program) + { + return Vec::new(); + } + + let basename_command = std::iter::once(basename) + .chain(cmd.iter().skip(1).cloned()) + .collect::>(); + rules + .iter() + .filter_map(|rule| rule.matches(&basename_command)) + .map(|rule_match| rule_match.with_resolved_program(&program)) + .collect() + } } fn upsert_domain(entries: &mut Vec, host: &str) { diff --git a/codex-rs/execpolicy/src/rule.rs b/codex-rs/execpolicy/src/rule.rs index 0d8bfc243a9..15401ec878d 100644 --- a/codex-rs/execpolicy/src/rule.rs +++ b/codex-rs/execpolicy/src/rule.rs @@ -1,6 +1,9 @@ use crate::decision::Decision; use crate::error::Error; use crate::error::Result; +use crate::policy::MatchOptions; +use crate::policy::Policy; +use codex_utils_absolute_path::AbsolutePathBuf; use serde::Deserialize; use serde::Serialize; use shlex::try_join; @@ -63,6 +66,8 @@ pub enum RuleMatch { #[serde(rename = "matchedPrefix")] matched_prefix: Vec, decision: Decision, + #[serde(rename = "resolvedProgram", skip_serializing_if = "Option::is_none")] + resolved_program: Option, /// Optional rationale for why this rule exists. /// /// This can be supplied for any decision and may be surfaced in different contexts @@ -83,6 +88,23 @@ impl RuleMatch { Self::HeuristicsRuleMatch { decision, .. } => *decision, } } + + pub fn with_resolved_program(self, resolved_program: &AbsolutePathBuf) -> Self { + match self { + Self::PrefixRuleMatch { + matched_prefix, + decision, + justification, + .. + } => Self::PrefixRuleMatch { + matched_prefix, + decision, + resolved_program: Some(resolved_program.clone()), + justification, + }, + other => other, + } + } } #[derive(Clone, Debug, Eq, PartialEq)] @@ -210,6 +232,7 @@ impl Rule for PrefixRule { .map(|matched_prefix| RuleMatch::PrefixRuleMatch { matched_prefix, decision: self.decision, + resolved_program: None, justification: self.justification.clone(), }) } @@ -220,11 +243,21 @@ impl Rule for PrefixRule { } /// Count how many rules match each provided example and error if any example is unmatched. -pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec]) -> Result<()> { +pub(crate) fn validate_match_examples( + policy: &Policy, + rules: &[RuleRef], + matches: &[Vec], +) -> Result<()> { let mut unmatched_examples = Vec::new(); + let options = MatchOptions { + resolve_host_executables: true, + }; for example in matches { - if rules.iter().any(|rule| rule.matches(example).is_some()) { + if !policy + .matches_for_command_with_options(example, None, &options) + .is_empty() + { continue; } @@ -240,21 +273,31 @@ pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec] Err(Error::ExampleDidNotMatch { rules: rules.iter().map(|rule| format!("{rule:?}")).collect(), examples: unmatched_examples, + location: None, }) } } /// Ensure that no rule matches any provided negative example. pub(crate) fn validate_not_match_examples( - rules: &[RuleRef], + policy: &Policy, + _rules: &[RuleRef], not_matches: &[Vec], ) -> Result<()> { + let options = MatchOptions { + resolve_host_executables: true, + }; + for example in not_matches { - if let Some(rule) = rules.iter().find(|rule| rule.matches(example).is_some()) { + if let Some(rule) = policy + .matches_for_command_with_options(example, None, &options) + .first() + { return Err(Error::ExampleDidMatch { rule: format!("{rule:?}"), example: try_join(example.iter().map(String::as_str)) .unwrap_or_else(|_| "unable to render example".to_string()), + location: None, }); } } diff --git a/codex-rs/execpolicy/tests/basic.rs b/codex-rs/execpolicy/tests/basic.rs index eec7cec651a..fa0e01d6d75 100644 --- a/codex-rs/execpolicy/tests/basic.rs +++ b/codex-rs/execpolicy/tests/basic.rs @@ -1,5 +1,6 @@ use std::any::Any; use std::fs; +use std::path::PathBuf; use std::sync::Arc; use anyhow::Context; @@ -7,6 +8,7 @@ use anyhow::Result; use codex_execpolicy::Decision; use codex_execpolicy::Error; use codex_execpolicy::Evaluation; +use codex_execpolicy::MatchOptions; use codex_execpolicy::NetworkRuleProtocol; use codex_execpolicy::Policy; use codex_execpolicy::PolicyParser; @@ -16,6 +18,7 @@ use codex_execpolicy::blocking_append_allow_prefix_rule; use codex_execpolicy::rule::PatternToken; use codex_execpolicy::rule::PrefixPattern; use codex_execpolicy::rule::PrefixRule; +use codex_utils_absolute_path::AbsolutePathBuf; use pretty_assertions::assert_eq; use tempfile::tempdir; @@ -31,6 +34,35 @@ fn prompt_all(_: &[String]) -> Decision { Decision::Prompt } +fn absolute_path(path: &str) -> AbsolutePathBuf { + AbsolutePathBuf::try_from(path.to_string()) + .unwrap_or_else(|error| panic!("expected absolute path `{path}`: {error}")) +} + +fn host_absolute_path(segments: &[&str]) -> String { + let mut path = if cfg!(windows) { + PathBuf::from(r"C:\") + } else { + PathBuf::from("/") + }; + for segment in segments { + path.push(segment); + } + path.to_string_lossy().into_owned() +} + +fn host_executable_name(name: &str) -> String { + if cfg!(windows) { + format!("{name}.exe") + } else { + name.to_string() + } +} + +fn starlark_string(value: &str) -> String { + value.replace('\\', "\\\\").replace('"', "\\\"") +} + #[derive(Clone, Debug, Eq, PartialEq)] enum RuleSnapshot { Prefix(PrefixRule), @@ -125,6 +157,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "status"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -156,6 +189,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["rm"]), decision: Decision::Forbidden, + resolved_program: None, justification: Some("destructive command".to_string()), }], }, @@ -184,6 +218,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["ls"]), decision: Decision::Allow, + resolved_program: None, justification: Some("safe and commonly used".to_string()), }], }, @@ -236,6 +271,7 @@ fn add_prefix_rule_extends_policy() -> Result<()> { matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["ls", "-l"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }], }, @@ -305,6 +341,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }], }, @@ -319,11 +356,13 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + resolved_program: None, justification: None, }, ], @@ -381,6 +420,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["bash", "-c"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -394,6 +434,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["sh", "-l"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -440,6 +481,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["npm", "i", "--legacy-peer-deps"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -456,6 +498,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["npm", "install", "--no-save"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -486,6 +529,7 @@ prefix_rule( matched_rules: vec![RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "status"]), decision: Decision::Allow, + resolved_program: None, justification: None, }], }, @@ -533,11 +577,13 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + resolved_program: None, justification: None, }, ], @@ -576,16 +622,19 @@ prefix_rule( RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git"]), decision: Decision::Prompt, + resolved_program: None, justification: None, }, RuleMatch::PrefixRuleMatch { matched_prefix: tokens(&["git", "commit"]), decision: Decision::Forbidden, + resolved_program: None, justification: None, }, ], @@ -612,3 +661,303 @@ fn heuristics_match_is_returned_when_no_policy_matches() { evaluation ); } + +#[test] +fn parses_host_executable_paths() -> Result<()> { + let homebrew_git = host_absolute_path(&["opt", "homebrew", "bin", "git"]); + let usr_git = host_absolute_path(&["usr", "bin", "git"]); + let homebrew_git_literal = starlark_string(&homebrew_git); + let usr_git_literal = starlark_string(&usr_git); + let policy_src = format!( + r#" +host_executable( + name = "git", + paths = [ + "{homebrew_git_literal}", + "{usr_git_literal}", + "{usr_git_literal}", + ], +) +"# + ); + let mut parser = PolicyParser::new(); + parser.parse("test.rules", &policy_src)?; + let policy = parser.build(); + + assert_eq!( + policy + .host_executables() + .get("git") + .expect("missing git host executable") + .as_ref(), + [absolute_path(&homebrew_git), absolute_path(&usr_git)] + ); + Ok(()) +} + +#[test] +fn host_executable_rejects_non_absolute_path() { + let policy_src = r#" +host_executable(name = "git", paths = ["git"]) +"#; + let mut parser = PolicyParser::new(); + let err = parser + .parse("test.rules", policy_src) + .expect_err("expected parse error"); + assert!( + err.to_string() + .contains("host_executable paths must be absolute") + ); +} + +#[test] +fn host_executable_rejects_name_with_path_separator() { + let git_path = host_absolute_path(&["usr", "bin", "git"]); + let git_path_literal = starlark_string(&git_path); + let policy_src = + format!(r#"host_executable(name = "{git_path_literal}", paths = ["{git_path_literal}"])"#); + let mut parser = PolicyParser::new(); + let err = parser + .parse("test.rules", &policy_src) + .expect_err("expected parse error"); + assert!( + err.to_string() + .contains("host_executable name must be a bare executable name") + ); +} + +#[test] +fn host_executable_rejects_path_with_wrong_basename() { + let rg_path = host_absolute_path(&["usr", "bin", "rg"]); + let rg_path_literal = starlark_string(&rg_path); + let policy_src = format!(r#"host_executable(name = "git", paths = ["{rg_path_literal}"])"#); + let mut parser = PolicyParser::new(); + let err = parser + .parse("test.rules", &policy_src) + .expect_err("expected parse error"); + assert!(err.to_string().contains("must have basename `git`")); +} + +#[test] +fn host_executable_last_definition_wins() -> Result<()> { + let usr_git = host_absolute_path(&["usr", "bin", "git"]); + let homebrew_git = host_absolute_path(&["opt", "homebrew", "bin", "git"]); + let usr_git_literal = starlark_string(&usr_git); + let homebrew_git_literal = starlark_string(&homebrew_git); + let mut parser = PolicyParser::new(); + parser.parse( + "shared.rules", + &format!(r#"host_executable(name = "git", paths = ["{usr_git_literal}"])"#), + )?; + parser.parse( + "user.rules", + &format!(r#"host_executable(name = "git", paths = ["{homebrew_git_literal}"])"#), + )?; + let policy = parser.build(); + + assert_eq!( + policy + .host_executables() + .get("git") + .expect("missing git host executable") + .as_ref(), + [absolute_path(&homebrew_git)] + ); + Ok(()) +} + +#[test] +fn host_executable_resolution_uses_basename_rule_when_allowed() -> Result<()> { + let git_name = host_executable_name("git"); + let git_path = host_absolute_path(&["usr", "bin", &git_name]); + let git_path_literal = starlark_string(&git_path); + let policy_src = format!( + r#" +prefix_rule(pattern = ["git", "status"], decision = "prompt") +host_executable(name = "git", paths = ["{git_path_literal}"]) +"# + ); + let mut parser = PolicyParser::new(); + parser.parse("test.rules", &policy_src)?; + let policy = parser.build(); + + let evaluation = policy.check_with_options( + &[git_path.clone(), "status".to_string()], + &allow_all, + &MatchOptions { + resolve_host_executables: true, + }, + ); + assert_eq!( + evaluation, + Evaluation { + decision: Decision::Prompt, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: tokens(&["git", "status"]), + decision: Decision::Prompt, + resolved_program: Some(absolute_path(&git_path)), + justification: None, + }], + } + ); + Ok(()) +} + +#[test] +fn prefix_rule_examples_honor_host_executable_resolution() -> Result<()> { + let allowed_git_name = host_executable_name("git"); + let allowed_git = host_absolute_path(&["usr", "bin", &allowed_git_name]); + let other_git = host_absolute_path(&["opt", "homebrew", "bin", &allowed_git_name]); + let allowed_git_literal = starlark_string(&allowed_git); + let other_git_literal = starlark_string(&other_git); + let policy_src = format!( + r#" +prefix_rule( + pattern = ["git", "status"], + match = ["{allowed_git_literal} status"], + not_match = ["{other_git_literal} status"], +) +host_executable(name = "git", paths = ["{allowed_git_literal}"]) +"# + ); + + let mut parser = PolicyParser::new(); + parser.parse("test.rules", &policy_src)?; + + Ok(()) +} + +#[test] +fn host_executable_resolution_respects_explicit_empty_allowlist() -> Result<()> { + let policy_src = r#" +prefix_rule(pattern = ["git"], decision = "prompt") +host_executable(name = "git", paths = []) +"#; + let mut parser = PolicyParser::new(); + parser.parse("test.rules", policy_src)?; + let policy = parser.build(); + let git_path = host_absolute_path(&["usr", "bin", "git"]); + + let evaluation = policy.check_with_options( + &[git_path.clone(), "status".to_string()], + &allow_all, + &MatchOptions { + resolve_host_executables: true, + }, + ); + assert_eq!( + evaluation, + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::HeuristicsRuleMatch { + command: vec![git_path, "status".to_string()], + decision: Decision::Allow, + }], + } + ); + Ok(()) +} + +#[test] +fn host_executable_resolution_ignores_path_not_in_allowlist() -> Result<()> { + let allowed_git = host_absolute_path(&["usr", "bin", "git"]); + let other_git = host_absolute_path(&["opt", "homebrew", "bin", "git"]); + let allowed_git_literal = starlark_string(&allowed_git); + let policy_src = format!( + r#" +prefix_rule(pattern = ["git"], decision = "prompt") +host_executable(name = "git", paths = ["{allowed_git_literal}"]) +"# + ); + let mut parser = PolicyParser::new(); + parser.parse("test.rules", &policy_src)?; + let policy = parser.build(); + + let evaluation = policy.check_with_options( + &[other_git.clone(), "status".to_string()], + &allow_all, + &MatchOptions { + resolve_host_executables: true, + }, + ); + assert_eq!( + evaluation, + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::HeuristicsRuleMatch { + command: vec![other_git, "status".to_string()], + decision: Decision::Allow, + }], + } + ); + Ok(()) +} + +#[test] +fn host_executable_resolution_falls_back_without_mapping() -> Result<()> { + let policy_src = r#" +prefix_rule(pattern = ["git"], decision = "prompt") +"#; + let mut parser = PolicyParser::new(); + parser.parse("test.rules", policy_src)?; + let policy = parser.build(); + let git_path = host_absolute_path(&["usr", "bin", "git"]); + + let evaluation = policy.check_with_options( + &[git_path.clone(), "status".to_string()], + &allow_all, + &MatchOptions { + resolve_host_executables: true, + }, + ); + assert_eq!( + evaluation, + Evaluation { + decision: Decision::Prompt, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: tokens(&["git"]), + decision: Decision::Prompt, + resolved_program: Some(absolute_path(&git_path)), + justification: None, + }], + } + ); + Ok(()) +} + +#[test] +fn host_executable_resolution_does_not_override_exact_match() -> Result<()> { + let git_path = host_absolute_path(&["usr", "bin", "git"]); + let git_path_literal = starlark_string(&git_path); + let policy_src = format!( + r#" +prefix_rule(pattern = ["{git_path_literal}"], decision = "allow") +prefix_rule(pattern = ["git"], decision = "prompt") +host_executable(name = "git", paths = ["{git_path_literal}"]) +"# + ); + let mut parser = PolicyParser::new(); + parser.parse("test.rules", &policy_src)?; + let policy = parser.build(); + + let evaluation = policy.check_with_options( + &[git_path.clone(), "status".to_string()], + &allow_all, + &MatchOptions { + resolve_host_executables: true, + }, + ); + assert_eq!( + evaluation, + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: vec![git_path], + decision: Decision::Allow, + resolved_program: None, + justification: None, + }], + } + ); + Ok(()) +}