diff --git a/deps/tla2tools.jar/Makefile b/deps/tla2tools.jar/Makefile new file mode 100644 index 00000000..76a80f1f --- /dev/null +++ b/deps/tla2tools.jar/Makefile @@ -0,0 +1,11 @@ +TLA_TOOLS_JAR_URL=https://github.com/tlaplus/tlaplus/releases/download/v1.8.0/tla2tools.jar + +tla2tools.jar: + wget --progress=dot:giga $(TLA_TOOLS_JAR_URL) + +clean: + rm tla2sany.jar + +sany: tla2tools.jar + +.PHONY: sany clean diff --git a/deps/tla2tools.jar/dune b/deps/tla2tools.jar/dune new file mode 100644 index 00000000..1a580bfc --- /dev/null +++ b/deps/tla2tools.jar/dune @@ -0,0 +1,9 @@ +; Download SANY +(rule + (deps "Makefile") + (targets tla2tools.jar) + (action (run "make" "-C" "." "sany"))) + +(install + (section (site (tlapm backends))) + (files (tla2tools.jar as bin/tla2tools.jar))) diff --git a/dune-project b/dune-project index 63304037..7ce0e2f0 100644 --- a/dune-project +++ b/dune-project @@ -51,6 +51,7 @@ ppx_inline_test ppx_assert ppx_deriving + xmlm ounit2) (depopts lsp ; https://github.com/ocaml/ocaml-lsp diff --git a/src/dune b/src/dune index 25ab6ca9..a52f191d 100644 --- a/src/dune +++ b/src/dune @@ -22,6 +22,7 @@ dune-site dune-build-info camlzip ; main deps. + xmlm sexplib) ; for inline tests only (ppx_assert). (foreign_stubs diff --git a/src/params.ml b/src/params.ml index 0894bdea..677f7fb9 100644 --- a/src/params.ml +++ b/src/params.ml @@ -56,6 +56,11 @@ let prefer_stdlib = ref false (* If set to true, the TLAPM will prefer the modules from the STDLIB instead of modules with the same names in the search path. *) +type parser = | Tlapm | Sany +let parser_backend = ref Tlapm + +let module_jar_paths = ref [] + let noproving = ref false (* Don't send any obligation to the back-ends. *) let printallobs = ref false diff --git a/src/params.mli b/src/params.mli index adf1ed3e..ac8344ea 100644 --- a/src/params.mli +++ b/src/params.mli @@ -7,6 +7,9 @@ val toolbox: bool ref val toolbox_vsn: int ref val use_stdin: bool ref val prefer_stdlib: bool ref +type parser = | Tlapm | Sany +val parser_backend: parser ref +val module_jar_paths : string list ref (* expr/fmt.ml *) val debugging: string -> bool diff --git a/src/paths.ml b/src/paths.ml index 7d03a34c..a48fa1e6 100644 --- a/src/paths.ml +++ b/src/paths.ml @@ -15,15 +15,25 @@ let backend_paths = let stdlib_paths = site_paths Setup_paths.Sites.stdlib [ "lib"; "tlapm"; "stdlib" ] +let backend_path_elems = + let site_bin bs = Filename.concat bs "bin" in + let site_isa bs = List.fold_left Filename.concat bs [ "Isabelle"; "bin" ] in + let site_paths bs = [ site_bin bs; site_isa bs ] in + List.concat (List.map site_paths backend_paths) + (** If the backends site is not available ([]), then look for executables in the PATH, otherwise we are in the dune-based build and should look for the backends in the specified site locations. *) let backend_path_string = - let site_bin bs = Filename.concat bs "bin" in - let site_isa bs = List.fold_left Filename.concat bs [ "Isabelle"; "bin" ] in - let site_paths bs = [ site_bin bs; site_isa bs ] in - let path_elems = List.concat (List.map site_paths backend_paths) in - Printf.sprintf "%s:%s" (String.concat ":" path_elems) (Sys.getenv "PATH") + let paths = String.concat ":" backend_path_elems in + try Printf.sprintf "%s:%s" paths (Sys.getenv "PATH") + with Not_found -> paths + + +let backend_classpath_string jar_file = + let classpath = List.map (fun path -> Filename.concat path jar_file) backend_path_elems |> String.concat ":" in + try Printf.sprintf "%s:%s" classpath (Sys.getenv "CLASSPATH") + with Not_found -> classpath let find_path_containing paths file = let find_actual path = Sys.file_exists (Filename.concat path file) in diff --git a/src/paths.mli b/src/paths.mli index c3cdab3a..54cc59f2 100644 --- a/src/paths.mli +++ b/src/paths.mli @@ -1,4 +1,5 @@ val backend_path_string : string +val backend_classpath_string : string -> string val backend_paths : string list val stdlib_paths : string list val find_path_containing : string list -> string -> string option diff --git a/src/sany/sany.ml b/src/sany/sany.ml new file mode 100644 index 00000000..dd6f70d5 --- /dev/null +++ b/src/sany/sany.ml @@ -0,0 +1,1410 @@ +(** This module converts SANY's abstract syntax tree (AST) into TLAPM's AST. + The two trees are quite different. SANY makes great use of globally unique + identifiers to reference one entity from another; for example, when the + symbol "x" is declared in a module, it is given a unique integer ID, and + all subsequent references to "x" use that ID. TLAPM, in contrast, uses De + Bruijn indices to represent variable references and has no equivalent to + a global symbol table. TLAPM also makes greater use of variants, as would + be expected in OCaml code; SANY is written in Java, so has a greater focus + on abstracting different AST nodes into a single AST node type. Nowhere is + this more apparent than in SANY's OpApplNode type, which is used for + everything from simple expressions like 1 + 3 to complex constructs like + \A x, y, z \in S : P. + + Much of the challenge of this module, in addition to the sheer number of + TLA+ syntax node types it has to convert, is the difficulty in mapping + the information in each SANY AST node to the fields expected in each + TLAPM node type. Often, TLAPM fields have no obvious use at this point in + the parsing process; they are clearly set up to be used later on during + proof elaboration or level-checking. TLAPM also wraps many AST nodes with + a generic key-value map used to store all kinds of things, most + prominently location & level information. SANY does not have an + equivalent, and prefers to store such information directly as fields in + each AST node. Internally, SANY actually has two different AST formats: + a very low-level one with close conformance to TLA+ syntax, and a more + abstract which is presented to us here. Thus the SANY AST has already + been processed significantly, and we are translating it to a form that is + comparatively much rougher & earlier in the parse process. + + There are two places in this conversion code where we revert to actually + just parsing the underlying "raw" TLA+ syntax from SANY's AST: proof names + and references to named instanced modules. We need to parse proof names + because we have to extract the proof level, which TLAPM expects as some + metadata attached to proof objects. SANY could possibly be modified to + export proof level data with its XML, but for now we just extract the + level from the proof name. We need to parse references to named instanced + modules because if a module is imported with M == INSTANCE Modname, all + definitions from Modname will be inlined in the importing module while + prefixed like M!Defname. This is mostly fine because TLAPM runs its own + De Bruijn-index-based name resolution algorithm, except for the case where + Defname == OtherDefname. Then when analyzing M!Defname, TLAPM will search + for OtherDefname instead of M!OtherDefname. Thus we need to "undo" the + SANY name resolution process further by filtering out all inlined operators + and breaking references like M!Defname down into [M; Defname] components. + We crudely do this by splitting on !, making allowances for the possibility + that the !! operator is used. + + Given these challenges, much SANY information such as identifier reference + IDs and levels are attached as metadata to TLAPM AST nodes for use later + on: not as the basis for final calculations, but rather to cross-check + them with TLAPM's own internal calculations. In particular, the difference + between UIDs and De Bruijn indices is so large that it is not feasible to + directly translate the logic without significant risk of introducing bugs. + Instead, the conversion to De Bruijn indices is modified to check that the + calculation matches the original UID-based reference, and error if not. + While this does not alleviate TLAPM maintenance costs as much as hoped, + it at least provides a strong safeguard against bugs. Future work can + possibly at least remove level-checking from TLAPM. +*) + +open Property;; +open Module.T;; +open Expr.T;; +open Proof.T;; +open Util;; + +type language_feature = + | RecursiveOperator + | InstanceProofStep + | Subexpression + +exception Unsupported_language_feature of Loc.locus option * language_feature + +type conversion_failure_kind = + | NotYetImplemented + | InvalidBoundsOrOperands + +exception Conversion_failure of conversion_failure_kind * string option * string + +(** Utility function constructing & raising an exception for when conversion + of a TLA+ language feature is not yet implemented, although is planned to + be; Unsupported_language_feature is used when support is not currently + planned although could be added in the future. +*) +let todo (category : string) (msg : string) (loc : Xml.location option) : 'a = + raise (Conversion_failure (NotYetImplemented, Option.map Xml.show_location loc, Printf.sprintf "%s not yet implemented: %s" category msg)) + +(** Utility function constructing & raising an exception for when conversion + of a TLA+ language construct fails due to invalid bounds or operands from + SANY's parse output. This can broadly be viewed as a way to account for + the projection from Java's type system to OCaml's variants, in that Java + allows representation of invalid data (for example: more than one arg to + existential quantification) which is occluded by OCaml's variants. +*) +let conversion_failure (msg : string) (loc : Xml.location option) : 'a = + raise (Conversion_failure (InvalidBoundsOrOperands, Option.map Xml.show_location loc, msg)) + +(** A module-global table of SANY AST entities, indexed by UID. +*) +let entries : Xml.entry_kind Coll.Im.t ref = ref Coll.Im.empty + +(** Converts SANY's location format to TLAPM's, for attachment to node + metadata. Since the SANY location does not include data for byte offsets + from beginning of file, we set those to 0 here. +*) +let convert_location ({column = (col_start, col_finish); line = (line_start, line_finish); filename} : Xml.location) : Loc.locus = { + start = Actual { + line = line_start; + bol = 0; + col = col_start; + }; + stop = Actual { + line = line_finish; + bol = 0; + col = col_finish; + }; + file = filename ^ ".tla"; +} + +(** An attempt to reduce code duplication between tuple & non-tuple bounds by + wrapping them in a variant. +*) +type bounds_kind = + | Tuply of tuply_bounds + | NonTuply of bounds + +(** Wraps the given proof step with its name in the metadata. +*) +let attach_proof_step_name (proof_name : stepno) (step : 'a) : 'a = + assign step Props.step proof_name + +(** Wrap the given object in location data. + TODO: also wrap with level data. +*) +let attach_props (props : Xml.node) (value : 'a) : 'a wrapped = + match props.location with + | Some loc -> Util.locate value (convert_location loc) + | None -> noprops value + +(** Look up the given ref in the global entries table, failing if not found. +*) +let resolve_ref (node : Xml.node) (uid : int) : Xml.entry = + match Coll.Im.find_opt uid !entries with + | Some kind -> {uid; kind} + | None -> conversion_failure ("Unresolved reference to entry UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for module nodes. +*) +let resolve_module_node (node : Xml.node) (uid : int) : Xml.module_node = + match (resolve_ref node uid).kind with + | ModuleNode mule -> mule + | _ -> conversion_failure ("Expected module node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for operator declaration nodes. +*) +let resolve_op_decl_node (node : Xml.node) (uid : int) : Xml.op_decl_node = + match (resolve_ref node uid).kind with + | OpDeclNode odn -> odn + | _ -> conversion_failure ("Expected operator declaration node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for user-defined operators. +*) +let resolve_user_defined_op_kind (node : Xml.node) (uid : int) : Xml.user_defined_op_kind = + match (resolve_ref node uid).kind with + | UserDefinedOpKind udok -> udok + | _ -> conversion_failure ("Expected user defined operator for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for operator parameter nodes. +*) +let resolve_formal_param_node (node : Xml.node) (uid : int) : Xml.formal_param_node = + match (resolve_ref node uid).kind with + | Xml.FormalParamNode xml -> xml + | _ -> conversion_failure ("Expected formal parameter node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for theorem definition nodes. +*) +let resolve_theorem_def_node (node : Xml.node) (uid : int) : Xml.theorem_def_node = + match (resolve_ref node uid).kind with + | TheoremDefNode xml -> xml + | _ -> conversion_failure ("Expected theorem definition node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for assume definition nodes. +*) +let resolve_assume_def_node (node : Xml.node) (uid : int) : Xml.assume_def_node = + match (resolve_ref node uid).kind with + | AssumeDefNode xml -> xml + | _ -> conversion_failure ("Expected assume definition node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for theorem nodes. +*) +let resolve_theorem_node (node : Xml.node) (uid : int) : Xml.theorem_node = + match (resolve_ref node uid).kind with + | TheoremNode xml -> xml + | _ -> conversion_failure ("Expected theorem node for UID: " ^ string_of_int uid) node.location + +(** A typed version of resolve_ref for bound symbols. +*) +let resolve_bound_symbol (node : Xml.node) (uid : int) : hint = + match (resolve_ref node uid).kind with + | FormalParamNode ({arity = 0} as xml) -> attach_props xml.node xml.name + | FormalParamNode _ -> conversion_failure ("Bound symbol cannot be an operator: " ^ string_of_int uid) node.location + | _ -> conversion_failure ("Unresolved formal parameter node UID: " ^ string_of_int uid) node.location + +(** Resolves definitions referenced in BY proofs or USE/HIDE statements. +*) +let resolve_def (node : Xml.node) (ref : int) : use_def wrapped = + match (resolve_ref node ref).kind with + | UserDefinedOpKind op -> Dvar op.name |> attach_props op.node + | TheoremDefNode thm -> Dvar thm.name |> attach_props thm.node + | other -> conversion_failure ("Invalid definition reference in BY proof: " ^ (Xml.show_entry_kind other)) node.location + +(** Predicate for quickly checking whether a given UID corresponds to the + given built-in operator. +*) +let is_builtin_op (node : Xml.node) (uid : int) (op : Xml.built_in_operator) : bool = + match (resolve_ref node uid).kind with + | BuiltInKind {operator} when operator = op -> true + | _ -> false + +(** Unboxes the Leibniz param into a formal param node and converts it into + the form usually (but not always; see labels) used within TLAPM. +*) +let convert_leibniz_param_node (node : Xml.node) ({ref} : Xml.leibniz_param) : (hint * shape) = + let fpn = resolve_formal_param_node node ref in + attach_props fpn.node fpn.name, + match fpn.arity with + | 0 -> Shape_expr + | n -> Shape_op n + +(** An OpApplNode's operands can be either expressions or operator arguments. + Often we only want them to be expressions. This function coerces the list + items into expressions, raising an error if they are operators. +*) +let as_expr_ls (name : string) (loc : Xml.location option) (operands : Xml.expr_or_op_arg list) : Xml.expression list = + let exprs = List.filter_map + (fun (operand : Xml.expr_or_op_arg) -> match operand with Expression e -> Some e | _ -> None) + operands + in if List.length exprs <> List.length operands + then conversion_failure (Format.sprintf "Expected all operands to be expressions in %s" name) loc + else exprs + +(** Several places require special handling of the last element of a list, + for example proof steps which end in a QED and CASE pairs which end + (possibly) in an OTHER statement. This utility function helps with that. +*) +let split_last_ls (node : Xml.node) (ls : 'a list) : 'a list * 'a = + match List.rev ls with + | [] -> conversion_failure "Cannot get last element of empty list" node.location + | hd :: tl -> (List.rev tl, hd) + +(** Utility function to convert a list of operands to a list of expression pairs. +*) +let as_pair (node : Xml.node) (operand : Xml.expr_or_op_arg) : (Xml.expression * Xml.expression) = + match operand with + | Expression OpApplNode {operator; bound_symbols = []; operands = [Expression left; Expression right]} -> ( + match (resolve_ref node operator).kind with + | BuiltInKind {operator = Pair} -> (left, right) + | _ -> conversion_failure "Expected pair of expressions" node.location + ) | _ -> conversion_failure "Expected pair of expressions" node.location + +(** Converts a SANY built-in operator to a TLAPM built-in operator. This is + only defined for a subset of the operators that SANY considers built-in, + and not all operators that TLAPM considers built-in are represented in + the SANY built-in operators. TLAPM also considers all the standard module + operators to be built-in operators. +*) +let sany_to_tlapm_builtin (node : Xml.node) (builtin : Xml.built_in_operator) : Builtin.builtin = + match builtin with + (* Reserved words *) + | TRUE -> Builtin.TRUE + | FALSE -> Builtin.FALSE + | BOOLEAN -> Builtin.BOOLEAN + | STRING -> Builtin.STRING + (* Prefix operators *) + | LogicalNegation -> Builtin.Neg + | UNION -> Builtin.UNION + | SUBSET -> Builtin.SUBSET + | DOMAIN -> Builtin.DOMAIN + | ENABLED -> Builtin.ENABLED + | UNCHANGED -> Builtin.UNCHANGED + | Always -> (Builtin.Box false) (* TODO: figure out meaning of false parameter *) + | Eventually -> Builtin.Diamond + (* Postfix operators *) + | Prime -> Builtin.Prime + (* Infix operators *) + | SetIn -> Builtin.Mem + | SetNotIn -> Builtin.Notmem + | Implies -> Builtin.Implies + | Equivalent -> Builtin.Equiv + | Conjunction -> Builtin.Conj + | Disjunction -> Builtin.Disj + | Equals -> Builtin.Eq + | NotEquals -> Builtin.Neq + | SetMinus -> Builtin.Setminus + | Union -> Builtin.Cup + | Intersect -> Builtin.Cap + | SubsetEq -> Builtin.Subseteq + | LeadsTo -> Builtin.Leadsto + | ActionComposition -> Builtin.Cdot + | PlusArrow -> Builtin.Actplus + | _ -> conversion_failure ("SANY built-in operator cannot be translated to TLAPM built-in operator: " ^ Xml.show_built_in_operator builtin) node.location + +(** Conversion of application of all traditional built-in operators like = or + \cup but also things like CHOOSE and \A which one would ordinarily not + view as built-in operators. +*) +let rec convert_built_in_op_appl (apply : Xml.op_appl_node) (op : Xml.built_in_kind) : Expr.T.expr = + (** TLAPM has a specific set of operators it considers "built in" which is + different from the set that SANY consideres "built-in"; this function + constructs operators for the TLAPM built-in operator set. + *) + let mk (builtin : Builtin.builtin) : Expr.T.expr = + match apply.operands with + | [] -> Internal builtin |> attach_props apply.node + | args -> Apply ( + Internal builtin |> attach_props op.node, + args |> as_expr_ls (Builtin.builtin_to_string builtin) apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + in match op.operator with + (* Reserved words *) + | TRUE -> mk Builtin.TRUE + | FALSE -> mk Builtin.FALSE + | BOOLEAN -> mk Builtin.BOOLEAN + | STRING -> mk Builtin.STRING + (* Prefix operators *) + | LogicalNegation -> mk Builtin.Neg + | UNION -> mk Builtin.UNION + | SUBSET -> mk Builtin.SUBSET + | DOMAIN -> mk Builtin.DOMAIN + | ENABLED -> mk Builtin.ENABLED + | UNCHANGED -> mk Builtin.UNCHANGED + | Always -> mk (Builtin.Box false) (* TODO: figure out meaning of false parameter *) + | Eventually -> mk Builtin.Diamond + (* Postfix operators *) + | Prime -> mk Builtin.Prime + (* Infix operators *) + | SetIn -> mk Builtin.Mem + | SetNotIn -> mk Builtin.Notmem + | Implies -> mk Builtin.Implies + | Equivalent -> mk Builtin.Equiv + | Conjunction -> mk Builtin.Conj + | Disjunction -> mk Builtin.Disj + | Equals -> mk Builtin.Eq + | NotEquals -> mk Builtin.Neq + | SetMinus -> mk Builtin.Setminus + | Union -> mk Builtin.Cup + | Intersect -> mk Builtin.Cap + | SubsetEq -> mk Builtin.Subseteq + | LeadsTo -> mk Builtin.Leadsto + | ActionComposition -> mk Builtin.Cdot + | PlusArrow -> mk Builtin.Actplus + (* Language operators *) + | FiniteSetLiteral -> SetEnum ( + apply.operands |> as_expr_ls "FiniteSetLiteral" apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + | TupleLiteral -> Tuple ( + apply.operands |> as_expr_ls "TupleLiteral" apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + | ConjunctionList -> List ( + And, apply.operands |> as_expr_ls "ConjunctionList" apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + | DisjunctionList -> List ( + Or, apply.operands |> as_expr_ls "DisjunctionList" apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + | CartesianProduct -> Product ( + apply.operands |> as_expr_ls "CartesianProduct" apply.node.location |> List.map convert_expression + ) |> attach_props apply.node + | WeakFairness -> convert_fairness Weak apply + | StrongFairness -> convert_fairness Strong apply + | BoundedChoose -> convert_choose apply + | UnboundedChoose -> convert_choose apply + | ActionOrStutter -> convert_action_expr Box apply + | ActionNoStutter -> convert_action_expr Dia apply + | BoundedExists -> convert_quantification Exists apply + | BoundedForAll -> convert_quantification Forall apply + | UnboundedExists -> convert_quantification Exists apply + | UnboundedForAll -> convert_quantification Forall apply + | TemporalExists -> convert_temporal_quantification Exists apply + | TemporalForAll -> convert_temporal_quantification Forall apply + | FiniteSetMap -> convert_set_map apply + | FiniteSetFilter -> convert_set_filter apply + | FunctionSet -> convert_function_set apply + | FunctionConstructor -> convert_function_constructor apply + | FunctionDefinition -> convert_function_constructor apply + | RecursiveFunctionDefinition -> convert_recursive_function_definition apply + | FunctionApplication -> convert_function_application apply + | RecordSet -> convert_record_set apply + | RecordConstructor -> convert_record_constructor apply + | RecordSelect -> convert_record_select apply + | Except -> convert_except apply + | IfThenElse -> convert_if_then_else apply + | Case -> convert_case apply + | Subexpression -> convert_subexpression apply + (* Grouping operators used within other operators *) + | Pair | Sequence + (* Proof step operators *) + | CaseProofStep | PickProofStep | TakeProofStep | HaveProofStep | WitnessProofStep | SufficesProofStep | QedProofStep + -> conversion_failure ("Operator invalid at this location : " ^ Xml.show_built_in_operator op.operator) apply.node.location + +(** Converts a top-level module node. *) +and convert_module_node (mule : Xml.module_node) : Module.T.mule = + (** Converts an entry, which is an abstract type that can be all sorts of + things; SANY heavily uses GUIDs to reference one entity from another and + those GUIDs are resolved in a global table with no real type information. + Thus in-scope operator parameters coexist alongside entire modules, and + here we branch out to the appropriate conversion method. + *) + let convert_entry (unit : Xml.unit_kind) : Module.T.modunit option = + match unit with + | Instance instance -> Some (convert_unit_instance instance) + | UseOrHide use_or_hide -> Some (convert_use_or_hide use_or_hide) + | Ref uid -> let entry = resolve_ref mule.node uid in + match entry.kind with + | ModuleNode submod -> Some (Submod (convert_module_node submod) |> attach_props submod.node) + | AssumeNode assume -> Some (convert_assume_node assume) + | OpDeclNode op_decl_node -> Some (convert_op_decl_node op_decl_node) + | UserDefinedOpKind user_defined_op_kind -> convert_unit_user_defined_op_kind user_defined_op_kind mule.name + | TheoremNode theorem_node -> Some (convert_theorem_node entry.uid 0 theorem_node) + | ModuleInstanceKind instance -> Some (convert_unit_instance instance) + | BuiltInKind _ -> conversion_failure "BuiltInKind not expected at module top-level" None + | FormalParamNode _ -> conversion_failure "FormalParamNode not expected at module top-level" None + | AssumeDefNode assume -> conversion_failure "AssumeDefNode should not be converted directly" None + | TheoremDefNode theorem_def_node -> conversion_failure "TheoremDefNode should not be converted directly" None + (** Returns 0 if equal, positive if first is greater, negative if second is + greater; This is used to sort units in increasing order, where order is + given by the unit definition's position in the file. This function is + necessary because the XML Exporter does not provide unit definitions in + the same order as they appear in the file. + *) + in let order_unit (first : Module.T.modunit) (second : Module.T.modunit) : int = + let first_loc = Util.get_locus first in + let first_line_start, first_col_start = Loc.line first_loc.start, Loc.column first_loc.start in + let second_loc = Util.get_locus second in + let second_line_start, second_col_start = Loc.line second_loc.start, Loc.column second_loc.start in + let line_order = first_line_start - second_line_start + in if line_order <> 0 then line_order else first_col_start - second_col_start + in { + name = noprops mule.name; + extendees = List.map (fun name -> noprops name) mule.extends; + instancees = []; (* TODO: collate list of instancees from units *) + (* Filter map to skip all operators which were inlined during import. *) + body = mule.units |> List.filter_map convert_entry |> List.sort order_unit; + defdepth = 0; + stage = Parsed; + important = false + } |> attach_props mule.node + +(** Converts M(x, y) == INSTANCE ModuleName WITH op <- x, op2 <- y. Also + converts non-named (anonymous) unit-level instances. + + TODO: SANY can handle named instances accepting an operator as a + parameter, but TLAPM does not seem to be able to represent this; it uses + a simple 'hint' for the parameter name instead of a hint * shape tuple + which would capture arity. This doesn't *necessarily* mean that TLAPM + does not handle operator parameters, but it is odd that arity info is not + captured. For now we will just error in that case. +*) +and convert_instance (instance : Xml.instance_node) : Expr.T.instance = + let mk_arg (param : Xml.formal_param_node) : hint = + match param.arity with + | 0 -> attach_props param.node param.name + | _ -> conversion_failure "TLAPM cannot handle operators as instance arguments" param.node.location + in let mk_substitution (sub : Xml.substitution) : (hint * Expr.T.expr) = + let target = resolve_op_decl_node instance.node sub.target_uid in ( + attach_props target.node target.name, + convert_expression_or_operator_argument instance.node sub.substitute + ) + in { + inst_args = instance.parameters |> List.map (resolve_formal_param_node instance.node) |> List.map mk_arg; + inst_mod = instance.module_name; + inst_sub = List.map mk_substitution instance.substitutions; + } + +(** INSTANCE conversion at the module unit level. This just wraps a converted + instance in a Definition or Anoninst variant. +*) +and convert_unit_instance (instance : Xml.instance_node) : Module.T.modunit = ( + let instantiation = convert_instance instance in + match instance.name with + | Some name -> Definition (Instance (noprops name, instantiation) |> noprops, User, Hidden, Export) + | None -> Anoninst (instantiation, if instance.local then Local else Export) +) |> attach_props instance.node + +(** Converts USE x, y, z and HIDE a, b, c statements. These statements will + reveal or conceal the given definitions to all subsequent proof steps. + The USE ONLY x, y, z statement ensures that only the given definitions + will be considered in subsequent proof steps. +*) +and convert_use_or_hide (use_or_hide : Xml.use_or_hide_node) : Module.T.modunit = + let action = if use_or_hide.hide then `Hide else `Use use_or_hide.only in + Mutate (action, convert_usable use_or_hide) |> attach_props use_or_hide.node + +(** Called both from unit-level USE/HIDE conversion and from proof step USE/ + HIDE conversion. De-duplication of shared conversion logic. +*) +and convert_usable (use_or_hide : Xml.use_or_hide_node) : Proof.T.usable = { + facts = List.map convert_expression use_or_hide.facts; + defs = List.map (resolve_def use_or_hide.node) use_or_hide.def_refs; +} + +(** Converts an ASSUME unit-level construct. This can be named or unnamed. If + named, this name is given by resolving a reference to an AssumeDefNode, + which is different from an AssumeNode. Probably this duplication will be + removed on the SANY side eventually. +*) +and convert_assume_node (assume : Xml.assume_node) : Module.T.modunit = + Module.T.Axiom ( + Option.map (fun uid -> let def = resolve_assume_def_node assume.node uid in attach_props def.node def.name) assume.definition, + convert_expression assume.body + ) |> attach_props assume.node + +(** Converts operator declarations such as CONSTANTS and VARIABLES. In a + declaration like VARIABLES x, y, z, each of x, y, and z are given as + separate OpDeclNode entries. In contrast, TLAPM wraps all of these in a + single Variables modunit. +*) +and convert_op_decl_node (xml : Xml.op_decl_node) : Module.T.modunit = + match xml.kind with + | Constant -> attach_props xml.node (Constants [attach_props xml.node xml.name, match xml.arity with | 0 -> Shape_expr | n -> Shape_op n]) + | Variable -> attach_props xml.node (Variables [attach_props xml.node xml.name]) + | _ -> todo "Operator declaration kind" (Xml.show_declaration_kind xml.kind) xml.node.location + +(** Converts fairness expressions such as WF_sub(expr) and SF_sub(expr). +*) +and convert_fairness (fairness : fairness_op) (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], [Expression sub; Expression expr] -> Fair (fairness, convert_expression sub, convert_expression expr) + | _ -> conversion_failure "Wrong number of operands to fairness expression" apply.node.location +) |> attach_props apply.node + +(** Converts action-level expressions such as [expr]_sub and <>_sub. + TODO: construct the TSub type if this is prefixed with [] or <>. +*) +and convert_action_expr (op : modal_op) (apply : Xml.op_appl_node) : Expr.T.expr = + match apply.operands with + | [Expression expr; Expression sub] -> Sub ( + op, + convert_expression expr, + convert_expression sub + ) |> attach_props apply.node + | _ -> conversion_failure "Wrong number of operands to action expression" apply.node.location + +(** This method handles conversion of four cases: + 1. Bounded non-tuple choice like CHOOSE x \in S : P + 2. Bounded tuple choice like CHOOSE <> \in S : P + 3. Unbounded non-tuple choice like CHOOSE x : P + 4. Unbounded tuple choice like CHOOSE <> : P + + The XML representation of these does not really adhere very well to the + principle of making invalid state unrepresentable, so there is a range of + possible input data that theoretically should never occur but nonetheless + must be handled in OCaml match statements. +*) +and convert_choose (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + (* Case 1: Bounded non-tuple CHOOSE expression *) + | [Bound {is_tuple = false; symbol_refs = [param]; expression}], [Expression body] -> + Choose ( + resolve_bound_symbol apply.node param, + Some (convert_expression expression), + convert_expression body + ) + (* Case 2: Bounded tuple CHOOSE expression *) + | [Bound ({is_tuple = true} as symbol)], [Expression body] -> + ChooseTuply ( + List.map (resolve_bound_symbol apply.node) symbol.symbol_refs, + Some (convert_expression symbol.expression), + convert_expression body + ) + (* Case 3: Unbounded non-tuple CHOOSE expression *) + | [Unbound ({is_tuple = false} as symbol)], [Expression body] -> + Choose ( + resolve_bound_symbol apply.node symbol.symbol_ref, + None, + convert_expression body + ) + (* Case 4: Unbounded tuple CHOOSE expression; this is the only place in TLA+ where an unbounded tuple quantifier is valid. *) + | Unbound {is_tuple = true} :: _, [Expression body] -> + let symbols = List.filter_map (fun (s : Xml.symbol) -> match s with | Unbound ({is_tuple = true} as u) -> Some u | _ -> None) apply.bound_symbols in + if List.length symbols <> List.length apply.bound_symbols + then conversion_failure "Inconsistent bound/unbound or tuple/non-tuple symbols in CHOOSE" apply.node.location + else ChooseTuply ( + List.map (fun (s : Xml.unbound_symbol) -> resolve_bound_symbol apply.node s.symbol_ref) symbols, + None, + convert_expression body + ) + | _ -> conversion_failure "Invalid number of bounds or operands to CHOOSE" apply.node.location +) |> attach_props apply.node + +(** General utility function to convert the given bound symbol into a non- + tuple bound type. +*) +and convert_non_tuply_bounds (node : Xml.node) (bound : Xml.bound_symbol) : bounds = + if bound.is_tuple then conversion_failure "Tuple bound passed to non-tuple bound conversion" node.location else + match List.map (resolve_bound_symbol node) bound.symbol_refs with + (* TODO: figure out meaning of "Unknown" parameter *) + | hd :: tl -> (hd, Unknown, Domain (convert_expression bound.expression)) + :: List.map (fun s -> (s, Unknown, Ditto)) tl + | [] -> conversion_failure "Bound symbol groups must have at least one symbol" node.location + +(** General utility function to convert the given bound symbol into a tuply + bound type, regardless of whether it is of the form <> \in S. If + even one quantifier bound in a list of quantifier bounds has tuple form, + then all must be put in the tuply_bounds type; see comment on the + convert_quantification function for more info. +*) +and convert_tuply_bounds (node : Xml.node) (bound : Xml.bound_symbol) : tuply_bounds = + if bound.is_tuple + then match List.map (resolve_bound_symbol node) bound.symbol_refs with + | (_ :: _ as symbols) -> [(Bound_names symbols, Domain (convert_expression bound.expression))] + | [] -> conversion_failure "Tuple bound symbol groups must have at least one symbol" node.location + else match List.map (resolve_bound_symbol node) bound.symbol_refs with + | hd :: tl -> (Bound_name hd, Domain (convert_expression bound.expression)) + :: List.map (fun s -> (Bound_name s, Ditto)) tl + | [] -> conversion_failure "Bound symbol groups must have at least one symbol" node.location + +(** General utility function to convert a list of quantifier bounds either to + tuple or non-tuple type. If even one quantifier bound in a list of quantifier + bounds has tuple form, then all must be put in the tuply_bounds type; see + comment on the convert_quantification function for more info. This function + requires all bounds to actually have a set bound, and will error if given + an unbounded quantifier bound. +*) +and convert_bounds (node : Xml.node) (all_bound_symbols : Xml.symbol list) : bounds_kind = + let bound_symbols = List.filter_map (fun (s : Xml.symbol) -> match s with | Bound b -> Some b | _ -> None) all_bound_symbols in + if List.length bound_symbols <> List.length all_bound_symbols + then conversion_failure "Cannot have unbound symbols" node.location + else if List.exists (fun (b : Xml.bound_symbol) -> b.is_tuple) bound_symbols + then Tuply (List.map (convert_tuply_bounds node) bound_symbols |> List.flatten) + else NonTuply (List.map (convert_non_tuply_bounds node) bound_symbols |> List.flatten) + +(** General utility function to convert a list of quantifier bounds either to + tuple or non-tuple type. As above, one tuple bound means all are given as + tuple bounds. The difference between this and the convert_bounds function + is that unbounded symbols are accepted here, albeit not unbounded tuple + bounds (those are only acceptable within a CHOOSE expression). However, + if one quantifier bound is unbounded, then all must be unbounded. +*) +and convert_bound_or_unbound_symbols (node : Xml.node) (all_symbols : Xml.symbol list) : bounds_kind = + let bound_symbols = List.filter_map (fun (s : Xml.symbol) -> match s with | Bound b -> Some b | _ -> None) all_symbols in + let unbound_symbols = List.filter_map (fun (s : Xml.symbol) -> match s with | Unbound b -> Some b | _ -> None) all_symbols in + if unbound_symbols <> [] + then + if bound_symbols <> [] + then conversion_failure "Cannot mix bound and unbound symbols" node.location + else if List.exists (fun (b : Xml.unbound_symbol) -> b.is_tuple) unbound_symbols + then conversion_failure "Unbounded tuple quantification is not supported" node.location + else let mk_bound (bound : Xml.unbound_symbol) : bound = ( + resolve_bound_symbol node bound.symbol_ref, + Unknown, (* TODO: figure out purpose of this parameter *) + No_domain + ) in NonTuply (List.map mk_bound unbound_symbols) + else if List.exists (fun (b : Xml.bound_symbol) -> b.is_tuple) bound_symbols + (* Bounded, includes at least one tuple *) + then Tuply (List.map (convert_tuply_bounds node) bound_symbols |> List.flatten) + (* Bounded, no tuples *) + else NonTuply (List.map (convert_non_tuply_bounds node) bound_symbols |> List.flatten) + +(** Handles conversion of both bounded & unbounded quantification. Both sides + of the conversion here are fairly weird. The SANY AST has the same issues + as in the CHOOSE conversion where many invalid states are representable + although at least the troublesome unbounded tuple case does not exist. + The TLAPM AST has an artificial distinction between tuple and non-tuple + quantification due to support for tuple quantification being added at a + later date. In reality, mixed tuple/non-tuple quantification is a regular + feature of TLA+ so ideally these would be folded into a single variant. + This is perhaps a good target for future refactoring. TLAPM's method of + representing bounds is also very odd (and that oddity is, unfortunately, + made worse by its duplication in the tuply_bounds type). Of particular + note is the bound_domain type, a variant which encompasses an ordinary + domain expression, no domain, and also "ditto". The latter is used to + indicate that the domain of a bound is the same as the previous bound's + domain. At a functional level this is complex to deal with as it means + each bound must be processed in sequence with knowledge of the previous + bound's domain, necessitating use of fold instead of map. The resulting + code never fails to be mind-bending. It also allows representation of + invalid states, as bound & unbound quantification cannot be mixed in + valid TLA⁺ syntax. Ideally quantification would be split at the top level + between bound & unbound, where the bound case has a nonempty list of + bounds, each of which is either tuple or non-tuple and consists of a + nonempty list of symbols and a domain expression. The unbound case would + be a simple nonempty list of symbols. +*) +and convert_quantification (quant : Expr.T.quantifier) (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | _ :: _, [Expression body] -> ( + match convert_bound_or_unbound_symbols apply.node apply.bound_symbols with + | Tuply tuply_bounds -> QuantTuply (quant, tuply_bounds, convert_expression body) + | NonTuply bounds -> Quant (quant, bounds, convert_expression body) + ) + | _ -> conversion_failure "Invalid number of bounds or operands to quantification" apply.node.location +) |> attach_props apply.node + +(** Temporal quantification; these symbols are always unbound. +*) +and convert_temporal_quantification (quant : Expr.T.quantifier) (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | _ :: _, [Expression body] -> + let unbound_symbols = List.filter_map (fun (s : Xml.symbol) -> match s with | Unbound b -> Some b | _ -> None) apply.bound_symbols in + if List.length unbound_symbols <> List.length apply.bound_symbols + then conversion_failure "Temporal quantification requires unbound symbols" apply.node.location + else if List.exists (fun (b : Xml.unbound_symbol) -> b.is_tuple) unbound_symbols + then conversion_failure "Unbounded tuple quantification is not supported" apply.node.location + else let bounds = List.map (fun (b : Xml.unbound_symbol) -> resolve_bound_symbol apply.node b.symbol_ref) unbound_symbols in + Tquant (quant, bounds, convert_expression body) + | _ -> conversion_failure "Invalid number of bounds or operands to temporal quantification" apply.node.location +) |> attach_props apply.node + +(** Conversion of expressions of the form {f(x, y) : x \in S, y \in Z} +*) +and convert_set_map (apply : Xml.op_appl_node) : Expr.T.expr = + match apply.bound_symbols, apply.operands with + | _ :: _, [Expression body] -> ( + match convert_bounds apply.node apply.bound_symbols with + | Tuply tuply_bounds -> SetOfTuply (convert_expression body, tuply_bounds) + | NonTuply bounds -> SetOf (convert_expression body, bounds) + ) |> attach_props apply.node + | _ -> conversion_failure "Invalid number of bounds or operands to set mapping" apply.node.location + +(** Conversion of expressions of the form {x \in S : P(x)} or {<> \in S \X T : P(x, y)} +*) +and convert_set_filter (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [Bound {symbol_refs = [symbol_ref]; is_tuple = false; expression}], [Expression predicate] -> SetSt ( + resolve_bound_symbol apply.node symbol_ref, + convert_expression expression, + convert_expression predicate + ) + | [Bound {symbol_refs = (_ :: _) as symbol_refs; is_tuple = true; expression}], [Expression predicate] -> SetStTuply ( + List.map (resolve_bound_symbol apply.node) symbol_refs, + convert_expression expression, + convert_expression predicate + ) + | _ -> conversion_failure "Invalid bounds or operands to set filter" apply.node.location +) |> attach_props apply.node + +(** Conversion of recursive functions where the function body refers to the + function definition, for example f[x \in Nat] == f[x - 1]. Both SANY and + TLAPM represent these as f == [x \in Nat |-> f[x - 1]], and here we + convert the right-hand side of this definition. The function name is + introduced as the first symbol, unbound. +*) +and convert_recursive_function_definition (apply : Xml.op_appl_node) : Expr.T.expr = ( + let bounds, body = match apply.bound_symbols, apply.operands with + | Unbound function_name :: (_ :: _ as all_bound_symbols), [Expression body] -> + all_bound_symbols, convert_expression body + | _ -> conversion_failure "Invalid number of bounds or operands to function definition" apply.node.location + in match convert_bounds apply.node bounds with + | Tuply tuply_bounds -> FcnTuply (tuply_bounds, body) + | NonTuply bounds -> Fcn (bounds, body) +) |> attach_props apply.node + +(** Converts function construction expressions like [x \in S, y \in P |-> x + y] + and also f[x \in S, y \in P] == x + y. +*) +and convert_function_constructor (apply : Xml.op_appl_node) : Expr.T.expr = + match apply.bound_symbols, apply.operands with + | _ :: _, [Expression body] -> ( + match convert_bounds apply.node apply.bound_symbols with + | Tuply tuply_bounds -> FcnTuply (tuply_bounds, convert_expression body) + | NonTuply bounds -> Fcn (bounds, convert_expression body) + ) |> attach_props apply.node + | _ -> conversion_failure "Invalid operands to function constructor" apply.node.location + +(** Converts function set expressions of the form [P -> Q] +*) +and convert_function_set (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], [Expression domain; Expression range] -> + Arrow (convert_expression domain, convert_expression range) + | _ -> conversion_failure "Invalid operands to function set expression" apply.node.location +) |> attach_props apply.node + +(** Conversion of function application, like f[x, y, z]. Function application + with multiple arguments is represented using a tuple, with the special + case of a tuple with a single element being just that tuple itself as an + argument instead of the argument being destructured from it. The empty + tuple argument is also a weird one which must be handled. So: + - f[x] args given as expression x, transformed into list [x] + - f[x, y] args given as a tuple <>, transformed into list [x; y] + - f[<>] args given as tuple <>, transformed into list [<>] + - f[<<>>] args given as tuple <<>>, transformed into list [<<>>] + + TODO: validate all of these cases +*) +and convert_function_application (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], [Expression fn; Expression args] -> ( + let args = match args with + | OpApplNode {node; operator; operands} when is_builtin_op apply.node operator TupleLiteral -> ( + match operands with + | [] -> [args] (* Empty tuple *) + | [Expression single_arg] -> [args] (* Tuple with single element *) + | _ -> as_expr_ls __FUNCTION__ node.location operands (* Tuple with multiple elements *) + ) + | _ -> [args] (* Not a tuple; single expression *) + in FcnApp (convert_expression fn, List.map convert_expression args) + ) + | _ -> conversion_failure "Invalid operands to function application" apply.node.location +) |> attach_props apply.node + +(** Conversion of record selection expressions like r.fieldName +*) +and convert_record_select (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], [Expression record; Expression (StringNode {value})] -> Dot (convert_expression record, value) + | _ -> conversion_failure "Invalid operands to record selection" apply.node.location +) |> attach_props apply.node + +(** Conversion of record set expressions like [field1 : expr1, field2 : expr2, ...] +*) +and convert_record_set (apply : Xml.op_appl_node) : Expr.T.expr = + convert_record_operator apply (fun arg -> Rect arg) + +(** Conversion of record construction expressions like [field1 |-> expr1, field2 |-> expr2, ...] +*) +and convert_record_constructor (apply : Xml.op_appl_node) : Expr.T.expr = + convert_record_operator apply (fun arg -> Record arg) + +(** The conversion logic for both record sets and record constructors is + identical except for the wrapping constructor (Rect vs Record). This + method captures that shared logic, taking the constructor as a parameter. +*) +and convert_record_operator (apply : Xml.op_appl_node) (constructor : (string * Expr.T.expr) list -> Expr.T.expr_) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], _ :: _ -> + let mk_field (left, right : Xml.expression * Xml.expression) : (string * Expr.T.expr) = + match left, right with + | StringNode {value}, expr -> (value, convert_expression expr) + | _ -> conversion_failure "Expected field name to be a string" apply.node.location + in apply.operands |> List.map (as_pair apply.node) |> List.map mk_field |> constructor + | _ -> conversion_failure "Invalid operands to record operator" apply.node.location +) |> attach_props apply.node + +(** Converts expressions of the form [f EXCEPT ![1].2[<<3, 4>>] = val, ![2] = val2, ...] + Note that SANY does not distinguish between function and record EXCEPT; + the expression [f EXCEPT !["field"] = val] is the same as [f EXCEPT !.field = val]. + TLAPM *does* distinguish these with the Except_dot and Except_apply variants, + so we make a best-effort attempt to determine which is which. Whether this + leads to buggy behavior is currently unknown (TODO). +*) +and convert_except (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], Expression base :: (_ :: _ as updates) -> + let mk_path (path_item : Expr.T.expr) : Expr.T.expoint = + match path_item.core with + | String s -> Except_dot s + | _ -> Except_apply path_item + in let mk_update (operand : Xml.expr_or_op_arg) : (Expr.T.expoint list * Expr.T.expr) option = + match operand with + | Expression OpApplNode {operator; bound_symbols = []; operands = [Expression OpApplNode {operator = update_op; bound_symbols = []; operands = update_path}; Expression new_value]} -> ( + match (resolve_ref apply.node operator).kind, (resolve_ref apply.node update_op).kind with + | BuiltInKind {operator = Pair}, BuiltInKind {operator = Sequence} -> + let path = update_path |> as_expr_ls __FUNCTION__ apply.node.location |> List.map convert_expression in + Some (List.map mk_path path, convert_expression new_value) + | _ -> None + ) | _ -> None + in let updates_converted = List.filter_map mk_update updates in + if List.length updates_converted <> List.length updates + then conversion_failure "Invalid operands to EXCEPT; expected pairs of update paths and new values" apply.node.location + else Except (convert_expression base, updates_converted) + | _ -> conversion_failure "Invalid operands to EXCEPT" apply.node.location +) |> attach_props apply.node + +(** Conversion of expression IF predicate THEN A ELSE B +*) +and convert_if_then_else (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], [Expression cond; Expression then_branch; Expression else_branch] -> + If (convert_expression cond, convert_expression then_branch, convert_expression else_branch) + | _ -> conversion_failure "Invalid operands to IF/THEN/ELSE" apply.node.location +) |> attach_props apply.node + +(** Conversion of expression CASE p1 -> e1 [] p2 -> e2 [] ... [] OTHER -> e. + Operands are given as a list of (predicate, expression) pairs, with the + optional final OTHER node having its predicate represented as a string + with value "$Other"; this will likely be changed on the SANY side in the + future, as it's equivalent in representation to the plausible syntax + CASE "$Other" -> expr. +*) +and convert_case (apply : Xml.op_appl_node) : Expr.T.expr = ( + match apply.bound_symbols, apply.operands with + | [], _ :: _ -> ( + let cases = List.map (as_pair apply.node) apply.operands in + let mk_case ((predicate, expr) : Xml.expression * Xml.expression) : (Expr.T.expr * Expr.T.expr) = + (convert_expression predicate, convert_expression expr) + in match split_last_ls apply.node cases with + | prefix, (StringNode {value = "$Other"}, other) -> Case (List.map mk_case prefix, Some (convert_expression other)) + | _ -> Case (List.map mk_case cases, None) + ) + | _ -> conversion_failure "Invalid bound symbols or operands to CASE" apply.node.location +) |> attach_props apply.node + +(** Subexpressions like M!N!op(expr)!1. + TODO: SANY currently does not export all the info needed for this. +*) +and convert_subexpression (apply : Xml.op_appl_node) : Expr.T.expr = ( + raise (Unsupported_language_feature (Option.map convert_location apply.node.location, Subexpression))) + +(** SANY gives references like M!op as opaque strings; these are resolved + using the UID system. We need to parse these back into Bang instances. + Probably this could most easily be done on the SANY side then attached + to various references, but we will do it here for now. Note that these + will not contain subexpression elements like :, <<, @, etc. because those + would have been given by SANY as the $Nop operator and thus are handled + in the convert_subexpression method. + TODO: How are things like M!N(a)!op represented? +*) +and convert_definition_reference (node : Xml.node) (name : string) (op_or_apply : [ `Op | `Apply of Xml.expr_or_op_arg list]) : Expr.T.expr = + let convert_selector (component : string) : Expr.T.sel = + if String.contains component '(' + then todo "Definition reference" "Function application in selector" node.location + else Sel_lab (component, []) + in let components = String.split_on_char '!' name in + if List.mem "" components then todo "Definition reference" "!!" node.location + else match components with + | [] -> conversion_failure "Unexpected empty definition reference" node.location + | [component] -> ( + match op_or_apply with + | `Op -> Opaque name |> attach_props node + | `Apply [] -> Opaque name |> attach_props node + | `Apply args -> Apply ( + Opaque name |> attach_props node, + List.map (convert_expression_or_operator_argument node) args + ) |> attach_props node + ) + | head :: tail -> + let prefix, last = split_last_ls node tail in + let last = Sel_lab ( + last, + List.map (convert_expression_or_operator_argument node) (match op_or_apply with | `Op -> [] | `Apply args -> args) + ) in Bang ( + Opaque head |> noprops, + List.map convert_selector prefix @ [last] + ) |> attach_props node + +(** Conversion of application of user-defined operators, including operators + defined in the standard modules. +*) +and convert_user_defined_op_appl (apply : Xml.op_appl_node) (op : Xml.user_defined_op_kind) : Expr.T.expr = + convert_definition_reference apply.node op.name (`Apply apply.operands) + +(** Conversion of reference to in-scope operator parameters, such as in + op(a, b, c) == a. This is a case where information is actually lost, + since the reference is converted to a simple string that will be resolved + again later on by turning it into a De Bruijn index (Ix) type. +*) +and convert_formal_param_node_op_appl (apply : Xml.op_appl_node) (param : Xml.formal_param_node) : Expr.T.expr = + match param.arity with + | 0 -> Opaque param.name |> attach_props param.node + | n -> Apply ( + Opaque param.name |> attach_props param.node, + List.map (convert_expression_or_operator_argument apply.node) apply.operands + ) |> attach_props apply.node + +(** Conversion of reference to module-level constants or variables. Again + information is lost and the string will need to be resolved into a De + Bruijn index later on. +*) +and convert_op_decl_node_op_appl (apply : Xml.op_appl_node) (decl : Xml.op_decl_node) : Expr.T.expr = + convert_definition_reference apply.node decl.name (`Apply apply.operands) + +(** OpApplNode is a very general node used by SANY to represent essentially + all expression types. Things like \A x \in S : P are represented as an + application of the built-in "forall" operator, with operand P and symbol + x bound by S. This complicated method de-abstracts this into the more + detailed Expr.T.expr variant type used by TLAPS. +*) +and convert_op_appl_node (apply : Xml.op_appl_node) : Expr.T.expr = + let op_kind = (resolve_ref apply.node apply.operator).kind in + match op_kind with + (* Operators like = and \cup but also CHOOSE and \A *) + | BuiltInKind op -> convert_built_in_op_appl apply op + (* An operator defined by the user, including operators in the standard modules *) + | UserDefinedOpKind userdef -> convert_user_defined_op_appl apply userdef + (* A reference to an in-scope operator parameter *) + | FormalParamNode param -> convert_formal_param_node_op_appl apply param + (* A reference to a CONSTANT or VARIABLE identifier *) + | OpDeclNode decl -> convert_op_decl_node_op_appl apply decl + (* A reference to a named THEOREM or a proof step *) + | TheoremDefNode thm -> convert_definition_reference thm.node thm.name `Op + (* A reference to a named ASSUME node *) + | AssumeDefNode assume -> convert_definition_reference assume.node assume.name `Op + | _ -> conversion_failure ("Invalid operator reference in OpApplNode : " ^ (Xml.show_entry_kind op_kind)) apply.node.location + +(** Some places in TLA⁺ syntax allow both normal expressions and also + operators. Mainly this occurs when applying an operator that could accept + another operator as a parameter. So any time the user calls an operator + like op(x, y, z), x, y, and z can each be either expressions or operator + references. LAMBDA operators can also appear here. +*) +and convert_expression_or_operator_argument (node : Xml.node) (op_expr : Xml.expr_or_op_arg) : Expr.T.expr = + match op_expr with + | Expression expr -> convert_expression expr + | OpArg uid -> match (resolve_ref node uid).kind with + | FormalParamNode param -> Opaque param.name |> attach_props param.node + | UserDefinedOpKind userdef -> + (* The XML export format identifies lambda operators with just the string name LAMBDA *) + if userdef.name = "LAMBDA" then convert_lambda userdef else + convert_definition_reference userdef.node userdef.name `Op + | BuiltInKind builtin -> + let op = sany_to_tlapm_builtin builtin.node builtin.operator in + Internal op |> attach_props builtin.node + | OpDeclNode decl -> convert_definition_reference decl.node decl.name `Op + | ModuleInstanceKind instance -> conversion_failure ("Invalid operator argument reference to module instance: " ^ Option.get instance.name) instance.node.location + | AssumeNode assume -> conversion_failure "Invalid operator argument reference to ASSUME" assume.node.location + | AssumeDefNode assume -> conversion_failure ("Invalid operator argument reference to ASSUME: " ^ assume.name) assume.node.location + | TheoremNode thm -> conversion_failure "Invalid operator argument reference to THEOREM" thm.node.location + | TheoremDefNode thm -> conversion_failure ("Invalid operator argument reference to THEOREM: " ^ thm.name) thm.node.location + | ModuleNode mule -> conversion_failure ("Invalid operator argument reference to MODULE: " ^ mule.name) mule.node.location + +(** Converts a basic expression type, which will be either a primitive value + or an operator application. +*) +and convert_expression (expr : Xml.expression) : Expr.T.expr = + match expr with + (* TODO: true means @ from EXCEPT, false means @ from proof step (???) *) + | AtNode at_node -> At true |> attach_props at_node.node + | DecimalNode {node; integralPart; fractionalPart} -> Num (Int.to_string integralPart, Int.to_string fractionalPart) |> attach_props node + | LabelNode label -> convert_label label + | LetInNode let_in -> convert_let_in_node let_in + | NumeralNode n -> Num (Int.to_string n.value, "") |> attach_props n.node + | OpApplNode apply -> convert_op_appl_node apply + | StringNode s -> String s.value |> attach_props s.node + | SubstInNode subst -> convert_substitution_in subst + | TheoremDefRef uid -> todo "Expression" "TheoremDefRef" None + | AssumeDefRef uid -> todo "Expression" "AssumeDefRef" None + +(** Converts LAMBDA x : x + 1 type operators. +*) +and convert_lambda (op : Xml.user_defined_op_kind) : Expr.T.expr = + Lambda ( + List.map (convert_leibniz_param_node op.node) op.params, + convert_expression op.body + ) |> attach_props op.node + +(** When a module has been imported using INSTANCE along with one or more + substitutions, and then an expression referencing an operator or definition + from that module is used, that reference is given as a subst_in_node by + SANY. This provides various details on the substitutions necessary in the + given expression to properly evaluate it. Here, we throw away all of that + information and let TLAPM re-derive the substitutions later on in the parse + process. + + Example: + + M == INSTANCE Mod WITH x <- y + op == M!op + + Here, the expression M!op is given as a subst_in_node. Compare this with + an INSTANCE import that does not use substitution: + + M == INSTANCE Naturals + op == M!Nat + + In this case, M!Nat is actually introduced as a new operator named M!Nat + in the importing module, and directly referenced with the usual uid-based + resolution mechanism. This might spell trouble for TLAPM as M!Nat is not + a valid TLA+ identifier name; TODO check whether this causes trouble. +*) +and convert_substitution_in (subst : Xml.subst_in_node) : Expr.T.expr = + convert_expression subst.body + +(** Converts LET/IN definition sets, consisting of one or more definitions + followed by a body expression in which the definitions are available. +*) +and convert_let_in_node ({node; def_refs; body} : Xml.let_in_node) : Expr.T.expr = + let convert_definition (def_ref : int) : Expr.T.defn = + match (resolve_ref node def_ref).kind with + | UserDefinedOpKind op -> convert_user_defined_op_kind op + | _ -> todo "LET/IN definition" "" None + in Let (List.map convert_definition def_refs, convert_expression body) |> attach_props node + +(** Converts user-defined operators defined within LET/IN expressions. +*) +and convert_user_defined_op_kind (op : Xml.user_defined_op_kind) : Expr.T.defn = + let body = convert_expression op.body in + (* TLAPS represents op(x) == expr as op == LAMBDA x : expr *) + let expr = match op.params with + | [] -> body + | params -> + Lambda (List.map (convert_leibniz_param_node op.node) params, body) + |> attach_props op.node + in Operator (attach_props op.node op.name, expr) |> attach_props op.node + +(** Converts user-defined operators defined in a module top-level. If operator + was defined in a different module, return None. +*) +and convert_unit_user_defined_op_kind (xml: Xml.user_defined_op_kind) (enclosing_module_name : string) : Module.T.modunit option = + (* TODO: this comparison does not work for nested modules, since location.filename uses enclosing module name. *) + if (Option.get xml.node.location).filename <> enclosing_module_name then None else + match xml.recursive with + | true -> raise (Unsupported_language_feature (Option.map convert_location xml.node.location, RecursiveOperator)) + | false -> Definition ( + convert_user_defined_op_kind xml, + User, + Hidden, (* If Visible, will be auto-included in all BY proofs *) + if xml.local then Local else Export + ) |> attach_props xml.node |> Option.some + +(** This type is redundant with the below TheoremNode type and its conversion + does not need to be handled. Probably the SANY XML exporter should be + refactored to combine these two types. The only difference is that this + type contains the name of the theorem, like in THEOREM thm == expr, while + the other does not. +*) +and convert_theorem_def_node (theorem_def_node : Xml.theorem_def_node) : Module.T.modunit = + todo "TheoremDefNode" "" theorem_def_node.node.location + +(** Converts theorem nodes. Oddly, SANY has two different theorem node types + containing identical information except TheoremDefNode contains the name + and TheoremNode does not. TLAPM's theorem node construction has some + oddities in the form of additional metadata. The proof is stored twice, + as one copy is rewritten during proof elaboration while the other remains + unchanged for error message purposes. +*) +and convert_theorem_node (uid : int) (previous_proof_level : int) (thm : Xml.theorem_node) : Module.T.modunit = + let proof = convert_proof thm.node uid previous_proof_level thm.proof in + Theorem ( + Option.map (fun uid -> let def = resolve_theorem_def_node thm.node uid in attach_props def.node def.name) thm.definition, + convert_sequent thm.body, + 0 (* The purpose of this integer parameter is unknown. *), + proof, + proof, + empty_summary + ) |> attach_props thm.node + +(** Converts ASSUME/PROVE constructs; this method de-duplicates some logic + and is called both from the theorem sequent conversion method and also + the label conversion method. +*) +and convert_assume_prove (ap : Xml.assume_prove_node) : sequent = + let convert_hypothesis (hypothesis : Xml.assumption_kind) : Expr.T.hyp = + match hypothesis with + | Expression expr -> Fact (convert_expression expr, Visible, NotSet) |> attach_props ap.node + | NewSymbol ns -> + let symbol = resolve_op_decl_node ns.node ns.symbol_ref in + let arity = match symbol.arity with | 0 -> Shape_expr | n -> Shape_op n in + let kind = match symbol.kind with + | NewConstant -> Constant + | NewVariable -> State + | NewState -> State + | NewAction -> Action + | NewTemporal -> Temporal + | _ -> conversion_failure "Invalid symbol kind in NEW" ns.node.location + in let domain = match ns.domain with + | Some domain -> Bounded (convert_expression domain, Hidden) + | None -> Unbounded + in Fresh (noprops symbol.name, arity, kind, domain) + |> attach_props ns.node + | AssumeProveLike AssumeProveNode apl -> + Fact (Sequent (convert_assume_prove apl) |> attach_props apl.node, Visible, NotSet) |> attach_props apl.node + | AssumeProveLike AssumeProveSubstitution apl -> todo "ASSUME/PROVE" "nested ASSUME/PROVE with substitution" apl.node.location + in { + context = List.map convert_hypothesis ap.assumptions |> Deque.of_list; + active = convert_expression ap.prove; + } + +(** Sequents are theorem bodies, which are either simple expressions or + ASSUME/PROVE constructs. + TODO: handle ASSUME/PROVE substitution case (uncertain what this is). +*) +and convert_sequent (seq : Xml.expr_or_assume_prove) : sequent = + match seq with + | Expression expr -> {context = Deque.empty; active = convert_expression expr} + | AssumeProveLike AssumeProveNode ap -> convert_assume_prove ap + | AssumeProveLike AssumeProveSubstitution aps -> todo "Sequent" "ASSUME/PROVE with substitution" aps.node.location + +(** Converts lbl(a, b, c) :: expr and lbl(a, b, c) :: ASSUME ... PROVE. TLAPM + treats labels as parentheses subtypes. + TODO: Determine whether labels should be able to handle operators here. +*) +and convert_label (label : Xml.label_node) : Expr.T.expr = ( + let mk_arg (param : Xml.formal_param_node) : hint = + match param.arity with + | 0 -> attach_props param.node param.name + | _ -> conversion_failure "TLAPM cannot handle operators as label arguments" param.node.location + in let parameters = List.map (resolve_formal_param_node label.node) label.parameters |> List.map mk_arg in + let lbl = Nlabel (label.name, parameters) |> attach_props label.node in + match label.body with + | Expression expr -> Parens (convert_expression expr, lbl) + | AssumeProveLike AssumeProveNode ap -> Parens (Sequent (convert_assume_prove ap) |> attach_props ap.node, lbl) + | AssumeProveLike AssumeProveSubstitution aps -> todo "Label" "ASSUME/PROVE with substitution" aps.node.location +) |> attach_props label.node + +(** Converts a proof, which can either be OMITTED, OBVIOUS, BY, or a series + of individual proof steps culminated in a QED step. We need to attach a + proof name to each proof step type, which in most of them is fairly + meaningless but is required by subsequent TLAPM processing. Thus we just + attach the incremented previous proof level and the reference UID. +*) +and convert_proof (enclosing_thm : Xml.node) (uid : int) (previous_proof_level : int) (proof : Xml.proof_node_group option) : Proof.T.proof = + let proof_name = Unnamed (previous_proof_level + 1, uid) in + match proof with + | None -> Omitted Implicit |> attach_props enclosing_thm |> attach_proof_step_name proof_name + | Some Omitted node -> Omitted Explicit |> attach_props node |> attach_proof_step_name proof_name + | Some Obvious node -> Obvious |> attach_props node |> attach_proof_step_name proof_name + | Some By proof -> convert_by_proof proof |> attach_proof_step_name proof_name + | Some Steps proof -> convert_proof_steps uid proof + +(** Converts proofs of the form BY x, y, z DEF a, b, c. This is another place + where information is lost, as the facts and definitions are converted to + strings that will need to be resolved to De Bruijn indices later on. +*) +and convert_by_proof ({node; facts; defs; only} : Xml.by_proof_node) : Proof.T.proof = + By ({ + facts = List.map convert_expression facts; + defs = List.map (resolve_def node) defs; + }, + only +) |> attach_props node + +(** Extracts the proof step name from a string like <1>abc. This is done by + taking the substring between the > and either the end of the string or + the first '.' character. Probably this information should be exposed by + SANY. +*) +and convert_proof_step_name (node : Xml.node) (uid : int) (proof_level : int) (theorem_def_ref : int option) : stepno = + match theorem_def_ref with + | Some uid -> + let proof_name = (resolve_theorem_def_node node uid).name in + let name_start = String.index proof_name '>' in + let name_end = match String.index_opt proof_name '.' with | Some n -> n | None -> String.length proof_name in + let name_len = name_end - name_start in + let name = String.sub proof_name name_start name_len in + Named (proof_level, name, false) + | None -> Unnamed (proof_level, uid) + +(** One possible proof form is a series of steps, culminating in a QED step. + This method converts that structure. +*) +and convert_proof_steps (uid : int) ({node; proof_level; steps} : Xml.steps_proof_node) : Proof.T.proof = + let convert_qed_step (qed_proof_step : Xml.proof_step_group) : Proof.T.qed_step = + match qed_proof_step with + | TheoremNodeRef uid -> + let thm = resolve_theorem_node node uid in + let step_name = convert_proof_step_name node uid proof_level thm.definition in + Qed (convert_proof thm.node uid (step_number step_name) thm.proof) |> attach_props thm.node + |> attach_proof_step_name step_name + | _ -> conversion_failure "QED step must be a theorem node" node.location + in let steps, qed = split_last_ls node steps + in Steps ( + List.map (convert_proof_step node proof_level) steps, + convert_qed_step qed + ) |> attach_props node |> attach_proof_step_name (Unnamed (proof_level, uid)) + +(** Converts a specific proof step into the Proof.T.step variant expected by + TLAPM. While TLAPM has thirteen proof variants as of this writing, SANY + bundles everything into only five: DefStepNode (where the user introduces + new operator definitions into scope), UseOrHideNode, InstanceNode (removed + from TLA+; see https://github.com/tlaplus/rfcs/issues/18), TheoremNodeRef, + and TheoremNode. In keeping with the odd duplication of purpose between + TheoremDefNode and TheoremNode, the TheoremNode type is not believed to be + used. TheoremNodeRef is the real workhorse proof step type, as it is used + for all proof step types that can have sub-proofs. The specific proof step + subtype is identified by a special built-in operator as the theorem body. +*) +and convert_proof_step (node : Xml.node) (proof_level : int) (step : Xml.proof_step_group) : Proof.T.step = + match step with + | InstanceNode {node} -> raise (Unsupported_language_feature (Option.map convert_location node.location, InstanceProofStep)) + | TheoremNode -> todo "TheoremNode proof step" "" None + | DefStep {node; def_refs} -> convert_definition_proof_step node def_refs + (* TODO: attach name to UseOrHide step *) + | UseOrHide use_or_hide -> Use (convert_usable use_or_hide, use_or_hide.only) |> attach_props use_or_hide.node + | TheoremNodeRef uid -> + let thm = resolve_theorem_node node uid in + let step_name = convert_proof_step_name thm.node uid proof_level thm.definition in + let proof = convert_proof thm.node uid (step_number step_name) thm.proof in + let step = match thm.body with + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator CaseProofStep -> + convert_case_proof_step apply proof + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator PickProofStep -> + convert_pick_proof_step apply proof + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator TakeProofStep -> + convert_take_proof_step apply + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator HaveProofStep -> + convert_have_proof_step apply + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator WitnessProofStep -> + convert_witness_proof_step apply + | Expression OpApplNode ({operator} as apply) when is_builtin_op node operator SufficesProofStep -> + convert_suffices_proof_step apply proof + | _ -> Suffices (convert_sequent thm.body, proof) + in step |> attach_props thm.node |> attach_proof_step_name step_name + +(** Converts DEFINE proof steps, like + DEFINE + P(x) == x + Q(y) == y + M(z) == INSTANCE Naturals + + TODO: attach name to DefStep step +*) +and convert_definition_proof_step (node : Xml.node) (def_refs : int list) : Proof.T.step = + let mk_def (uid : int) : Expr.T.defn = + match (resolve_ref node uid).kind with + | UserDefinedOpKind op -> convert_user_defined_op_kind op + | ModuleInstanceKind m -> ( + match m.name with + | Some name -> Instance (noprops name, convert_instance m) |> attach_props m.node + | None -> conversion_failure "Unnamed module instance in DEFINE proof step" m.node.location + ) + | _ -> conversion_failure "Invalid reference type in DEFINE proof step" node.location + in Define (List.map mk_def def_refs) |> attach_props node + +(** Converts CASE proof steps, like: <2>7. CASE UNCHANGED vars +*) +and convert_case_proof_step (apply : Xml.op_appl_node) (proof : Proof.T.proof) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | [], [Expression expr] -> Pcase (convert_expression expr, proof) + | _ -> conversion_failure "Invalid operands to CASE proof step" apply.node.location + +(** Converts PICK proofs steps, like PICK a, b, c : P +*) +and convert_pick_proof_step (apply : Xml.op_appl_node) (proof : Proof.T.proof) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | _ :: _, [Expression body] -> ( + match convert_bound_or_unbound_symbols apply.node apply.bound_symbols with + | Tuply tuply_bounds -> PickTuply (tuply_bounds, convert_expression body, proof) + | NonTuply bounds -> Pick (bounds, convert_expression body, proof) + ) + | _ -> conversion_failure "Invalid number of bounds or operands to PICK proof step" apply.node.location + +(** Converts TAKE a, b, c, d or TAKE a, b \in S, c \in P, <> \in Q + proof step type. +*) +and convert_take_proof_step (apply : Xml.op_appl_node) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | _ :: _, [] -> ( + match convert_bound_or_unbound_symbols apply.node apply.bound_symbols with + | Tuply tuply_bounds -> TakeTuply tuply_bounds + | NonTuply bounds -> Take bounds + ) + | _ -> conversion_failure "Invalid number of bounds or operands to TAKE proof step" apply.node.location + +(** Converts HAVE P proof steps. +*) +and convert_have_proof_step (apply : Xml.op_appl_node) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | [], [Expression expr] -> Have (convert_expression expr) + | _ -> conversion_failure "Invalid bounds or operands to HAVE proof step" apply.node.location + +(** Converts WITNESS x, y, z proof steps. +*) +and convert_witness_proof_step (apply : Xml.op_appl_node) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | [], expressions -> + Witness (expressions |> as_expr_ls __FUNCTION__ apply.node.location |> List.map convert_expression) + | _ -> conversion_failure "Invalid bounds or operands to WITNESS proof step" apply.node.location + +(** Converts SUFFICES P PROOF BY x, y, z proof steps. +*) +and convert_suffices_proof_step (apply : Xml.op_appl_node) (proof : Proof.T.proof) : Proof.T.step_ = + match apply.bound_symbols, apply.operands with + | [], [Expression expr] -> Suffices (convert_sequent (Expression expr), proof) + | _ -> conversion_failure "Invalid bounds or operands to SUFFICES proof step" apply.node.location + +(** The top-level method converting the entire SANY AST to TLAPM's AST. SANY + uses a lot of GUIDs for one entity to reference another, so we load those + into a global table for fast lookup. This table would have to be a + parameter to every conversion method in this file; for simplicity we make + it a module-level mutable variable instead. This method returns both the + converted root module and a context, which is a mapping from module names + to module structures for the transitive closure of modules imported from + root. +*) +let convert_ast (ast : Xml.modules) : (Module.T.modctx * Module.T.mule, (string option * string)) result = + entries := + List.fold_left + (fun m (e : Xml.entry) -> Coll.Im.add e.uid e.kind m) + Coll.Im.empty + ast.context; + let ctx : Module.T.modctx = List.fold_left + (fun (map : Module.T.modctx) (mule_ref : int) -> + let toplevel_node : Xml.node = {location = None; level = None} in + let mule : Xml.module_node = resolve_module_node toplevel_node mule_ref in + match Coll.Sm.find_opt mule.name Module.Standard.initctx with + | Some std_mule -> + if Params.debugging "sany" then print_endline ("Using built-in standard module " ^ mule.name) else (); + Coll.Sm.add mule.name std_mule map + | None -> + if Params.debugging "sany" then print_endline ("Converting parsed module " ^ mule.name) else (); + Coll.Sm.add mule.name (convert_module_node mule) map + ) + Coll.Sm.empty + ast.module_refs + in let root_module = Coll.Sm.find ast.root_module ctx in + root_module.core.important <- true; + Ok (ctx, root_module) + +(** Calls SANY to parse the given module, then converts SANY's AST into the + TLAPM AST forma +This was, in retrospect, a huge mistake. + +The AI tokens cost more than the salaries.I spend more time maintaining AI agents than I ever spent managing the humans. Yet the performance is worse. + +But everyone on Linkedin was doing it and I didn't want to feel left out. + +I told myself this was the future. I just needed better prompts. Rome wasn't t. +*) +let parse (module_path : string) : (Module.T.modctx * Module.T.mule, (string option * string)) result = + let ( >>= ) = Result.bind in + Option.to_result ~none:(None, "TLAPS standard library cannot be found") Params.stdlib_path + >>= (Xml.get_module_ast_xml module_path) + >>= convert_ast + +open Sexplib;; + +let module_to_sexp (mule : Module.T.mule) : Sexp.t = + mule + |> Translate_syntax_tree.translate_tla_source_file + |> Translate_syntax_tree.ts_node_to_sexpr diff --git a/src/sany/sany.mli b/src/sany/sany.mli new file mode 100644 index 00000000..a76a847a --- /dev/null +++ b/src/sany/sany.mli @@ -0,0 +1,17 @@ +type language_feature = + | RecursiveOperator + | InstanceProofStep + | Subexpression + +exception Unsupported_language_feature of Loc.locus option * language_feature + +type conversion_failure_kind = + | NotYetImplemented + | InvalidBoundsOrOperands + +exception Conversion_failure of conversion_failure_kind * string option * string + +val parse : string -> (Module.T.modctx * Module.T.mule, string option * string) result + +open Sexplib;; +val module_to_sexp : Module.T.mule -> Sexp.t diff --git a/test/parser/translate_syntax_tree.ml b/src/sany/translate_syntax_tree.ml similarity index 99% rename from test/parser/translate_syntax_tree.ml rename to src/sany/translate_syntax_tree.ml index e244d436..c6f7908a 100644 --- a/test/parser/translate_syntax_tree.ml +++ b/src/sany/translate_syntax_tree.ml @@ -4,7 +4,10 @@ *) open Sexplib;; -open Tlapm_lib;; +open Module.T;; +open Expr.T;; +open Proof.T;; +open Util;; type field_or_node = | Field of string * ts_node diff --git a/src/sany/xml.ml b/src/sany/xml.ml new file mode 100644 index 00000000..b3607766 --- /dev/null +++ b/src/sany/xml.ml @@ -0,0 +1,1070 @@ +(** This module provides functions to interact with SANY to parse TLA+ source + files into an XML representation, and then convert that XML representation + into something with a semblance of a type system. For example, different + node types will have fields of type int or string or even a variant. This + makes it much more tractable to convert SANY's XML output into the format + expected by TLAPM. +*) + +(** Calls SANY in another process to parse the given TLA+ file, then collects + the XML parse tree output. +*) +let source_to_sany_xml_str (module_path : string) (stdlib_path : string) : (string, (string option * string)) result = + let open Unix in + let open Paths in + (** Module jars must be appended at the end of the classpath; the reason + for this is that some commonly-used jars like Apalache's embed SANY + along with the XMLExporter class, so we accidentally use Apalache's + (old) embedded version instead of the one from tla2tools.jar if we put + Apalache earlier in the classpath. + *) + let cmd = Printf.sprintf "java -cp %s tla2sany.xml.XMLExporter -I %s -I %s -t %s" + ((backend_classpath_string "tla2tools.jar") ^ (String.concat ":" !Params.module_jar_paths)) + (Filename.dirname module_path) + (Filename.quote stdlib_path) + (Filename.quote module_path) in + if Params.debugging "sany" then print_endline cmd else (); + let (pid, out_fd) = System.launch_process cmd in + let in_chan = Unix.in_channel_of_descr out_fd in + let output = In_channel.input_all in_chan in + In_channel.close in_chan; + match Unix.waitpid [] pid with + | (_, WEXITED 0) -> Ok output + | (_, WEXITED exit_code) -> Error (None, Printf.sprintf "%d\n%s" exit_code output) + | _ -> failwith "Process terminated abnormally" + +open Xmlm;; + +(** This simple XML representation only consists of nodes and values, where + node is a tag with a list of children. For example, the XML snippet + "value" would be Node ("SomeName", [SValue "value"]). + XML can also have attributes on tags, like , but + these are not used in SANY's XML format. +*) +type tree = + | Node of string * tree list + | SValue of string + | IValue of int +[@@deriving show] + +(** Uses the Xmlm library to parse an XML string into the simple XML tree + representation defined above. If SANY's XML output format is ever changed + to make use of attributes or namespaces, this function and the tree type + will both need to be updated accordingly. +*) +let str_to_xml (xml_str: string) : (tree, (string option * string)) result = + try + let xml = Xmlm.make_input (`String (0, xml_str)) in + let el (((_namespace, name), _attributes) : tag) (children : tree list) = Node (name, children) in + let data (s : string) = match int_of_string_opt s with | Some n -> IValue n | None -> SValue s in + let _, tree = Xmlm.input_doc_tree ~el ~data xml in Ok tree + with Xmlm.Error ((line, column), err) -> + Error (Some (Printf.sprintf "Line: %d, Column: %d" line column), "XML parsing failed: " ^ Xmlm.error_message err) + +(** Error method which raises an exception when parsing the SANY XML output + fails. If this is ever triggered it indicates a bug either in this code + (most likely) or in the SANY XML output. It is also possible this could + be triggered if SANY's XML output format changes in a future version. +*) +let conversion_failure (fn_name : string) (xml : tree) : 'a = + let err_msg = Printf.sprintf "%s conversion failure on %s" fn_name (show_tree xml) in + Invalid_argument err_msg |> raise + +(** Error method which raises an exception when parsing a list of XML + children fails. +*) +let ls_conversion_failure (fn_name : string) (children : tree list) : 'a = + let err_msg = Printf.sprintf "%s conversion failure on [%s]" fn_name (children |> List.map show_tree |> String.concat "; ") in + Invalid_argument err_msg |> raise + +(** Utility function most often used with List.find or List.exists to search + for a tag in the children of an XML node. +*) +let is_tag (tag_name : string) (node : tree) : bool = + match node with + | Node (name, _) when name = tag_name -> true + | _ -> false + +(** Use this in conjunction with List.filter_map on children of a node to get + all references of various types. +*) +let get_ref_opt (xml : tree) : int option = + match xml with + | Node ("AssumeNodeRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("AssumeDefRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("BuiltInKindRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("FormalParamNodeRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("ModuleInstanceKindRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("ModuleNodeRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("OpDeclNodeRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("TheoremDefRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("TheoremNodeRef", [Node ("UID", [IValue uid])]) -> Some uid + | Node ("UserDefinedOpKindRef", [Node ("UID", [IValue uid])]) -> Some uid + | _ -> None + +(** Use this either on a single node that must have a UID child, or in + conjunction with List.map on children of a node that all must have UID + children. +*) +let get_ref (xml : tree) : int = + match get_ref_opt xml with + | Some uid -> uid + | _ -> conversion_failure __FUNCTION__ xml + +type location = { + column : int * int; + line : int * int; + filename : string; +} +[@@deriving show] + +let show_location (loc : location) : string = + Printf.sprintf "Location: %s module, line %d column %d to line %d column %d" + loc.filename + (fst loc.line) (fst loc.column) + (snd loc.line) (snd loc.column) + +let xml_to_location (xml : tree) : location = + match xml with + | Node ("location", [ + Node ("column", [Node ("begin", [IValue column_begin]); Node ("end", [IValue column_end])]); + Node ("line", [Node ("begin", [IValue line_begin]); Node ("end", [IValue line_end])]); + Node ("filename", [SValue filename]) + ]) -> { + column = (column_begin, column_end); + line = (line_begin, line_end); + filename; + } + | _ -> conversion_failure __FUNCTION__ xml + +type node = { + location : location option; + level : int option; +} +[@@deriving show] + +(** Many XML nodes have children that start with some optional "location" and + "level" tags, followed by other tags specific to that node. This function + extracts the location and level information from such a list of children, + then returns the remaining children for further processing. +*) +let extract_inline_node (children : tree list) : (node * tree list) = + match children with + | Node ("location", _) as loc :: Node ("level", [IValue lvl]) :: rest -> {location = Some (xml_to_location loc); level = Some lvl}, rest + | Node ("location", _) as loc :: rest -> {location = Some (xml_to_location loc); level = None}, rest + | Node ("level", [IValue lvl]) :: rest -> {location = None; level = Some lvl}, rest + | rest -> {location = None; level = None}, rest + + +(** A few XML nodes have an inline definition reference as their first child + after the location and level tags. This function is meant to be chained + after extract_inline_node to extract that definition reference if it + exists. +*) +let extract_inline_definition_opt (node, children : node * tree list) : (node * int option * tree list) = + match children with + | Node ("definition", [Node ("AssumeDefRef", [Node ("UID", [IValue uid])])]) :: children -> (node, Some uid, children); + | Node ("definition", [Node ("TheoremDefRef", [Node ("UID", [IValue uid])])]) :: children -> (node, Some uid, children); + | _ -> (node, None, children) + +type decimal_node = { + node : node; + mantissa : int; + exponent : int; + integralPart : int; + fractionalPart : int +} +[@@deriving show] + +let xml_to_decimal_node (children : tree list) : decimal_node = + match extract_inline_node children with + | node, [ + Node ("mantissa", [IValue mantissa]); + Node ("exponent", [IValue exponent]); + Node ("integralPart", [IValue integralPart]); + Node ("fractionalPart", [IValue fractionalPart]); + ] -> { + node; + mantissa; + exponent; + integralPart; + fractionalPart; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type 'a literal = { + node : node; + value : 'a +} +[@@deriving show] + +let xml_to_numeral_node (children : tree list) : int literal = + match extract_inline_node children with + | node, [Node ("IntValue", [IValue value])] -> {node; value} + | _ -> ls_conversion_failure __FUNCTION__ children + +let xml_to_string_node (children : tree list) : string literal = + match extract_inline_node children with + | node, [Node ("StringValue", [SValue value])] -> {node; value} + (* In the case of an empty string "", node has no children *) + | node, [Node ("StringValue", [])] -> {node; value = ""} + (* Sometimes strings can accidentally be converted into integers! *) + | node, [Node ("StringValue", [IValue value])] -> {node; value = Int.to_string value} + | node, _ -> ls_conversion_failure __FUNCTION__ children + +type leibniz_param = { + ref : int; + is_leibniz : bool; +} +[@@deriving show] + +let xml_to_leibniz_param xml = + match xml with + | Node ("leibnizparam", Node ("FormalParamNodeRef", [Node ("UID", [IValue ref])]) :: is_leibniz_opt) -> { + ref; + is_leibniz = match is_leibniz_opt with | [Node ("leibniz", [])] -> true | _ -> false; + } + | _ -> conversion_failure __FUNCTION__ xml + +type formal_param_node = { + node : node; + name : string; + arity : int; +} +[@@deriving show] + +let xml_to_formal_param_node (children : tree list) : formal_param_node = + match extract_inline_node children with + | node, [Node ("uniquename", [SValue name]); Node ("arity", [IValue arity])] -> {node; name; arity} + | _ -> ls_conversion_failure __FUNCTION__ children + +type unbound_symbol = { + symbol_ref : int; + is_tuple : bool; +} +[@@deriving show] + +let xml_to_unbound_symbol (children : tree list) : unbound_symbol = + match children with + | Node ("FormalParamNodeRef", [Node ("UID", [IValue symbol_ref])]) :: tuple_tag_opt -> { + symbol_ref; + is_tuple = match tuple_tag_opt with | [Node ("tuple", [])] -> true | _ -> false; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type op_appl_node = { + node : node; + operator : int; + operands : expr_or_op_arg list; + bound_symbols : symbol list; +} + +and let_in_node = { + node : node; + def_refs : int list; + body : expression; +} + +and at_node = { + node : node; +} + +and label_node = { + node : node; + name : string; + arity : int; + body : expr_or_assume_prove; + parameters : int list +} + +and subst_in_node = { + node : node; + substitutions : substitution list; + body : expression; + instance_from_mule_ref : int; + instance_to_mule_ref : int; +} + +and substitution = { + target_uid : int; + substitute : expr_or_op_arg; +} + +and expression = + | AtNode of at_node + | DecimalNode of decimal_node + | LabelNode of label_node + | LetInNode of let_in_node + | NumeralNode of int literal + | OpApplNode of op_appl_node + | StringNode of string literal + | SubstInNode of subst_in_node + | TheoremDefRef of int + | AssumeDefRef of int + +and expr_or_op_arg = + | Expression of expression + | OpArg of int + +and bound_symbol = { + symbol_refs : int list; + is_tuple : bool; + expression : expression +} + +and symbol = + | Unbound of unbound_symbol + | Bound of bound_symbol + +and user_defined_op_kind = { + node : node; + name : string; + arity : int; + precomments : string option; + body : expression; + params : leibniz_param list; + recursive : bool; + local : bool; +} + +and assume_prove_node = { + node : node; + assumptions : assumption_kind list; + prove : expression; +} + +and assume_prove_like = + | AssumeProveNode of assume_prove_node + | AssumeProveSubstitution of subst_in_node + +and new_symbol_node = { + node : node; + symbol_ref : int; + domain : expression option; +} + +and assumption_kind = + | Expression of expression + | AssumeProveLike of assume_prove_like + | NewSymbol of new_symbol_node + +and expr_or_assume_prove = + | Expression of expression + | AssumeProveLike of assume_prove_like +[@@deriving show] + +let rec xml_to_symbols (xml : tree) : symbol = + match xml with + | Node ("unbound", children) -> Unbound (xml_to_unbound_symbol children) + | Node ("bound", children) -> Bound (xml_to_bound_symbol children) + | _ -> conversion_failure __FUNCTION__ xml + +and xml_to_bound_symbol (children : tree list) : bound_symbol = + let rec consume_symbol_refs (acc : int list) (children : tree list) : int list * tree list = + match children with + | Node ("FormalParamNodeRef", [Node ("UID", [IValue symbol_ref])]) :: rest -> + consume_symbol_refs (symbol_ref :: acc) rest + | _ -> (List.rev acc, children) + in match consume_symbol_refs [] children with + | symbol_refs, [Node ("tuple", _); expression] -> { + symbol_refs; + is_tuple = true; + expression = xml_to_expression expression; + } + | symbol_refs, [expression] -> { + symbol_refs; + is_tuple = false; + expression = xml_to_expression expression; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_expr_or_op_arg (xml : tree) : expr_or_op_arg = + match xml with + | Node ("OpArgNode", children) -> ( + match extract_inline_node children with + | node, [Node ("argument", [argument])] -> OpArg (get_ref argument) + | _ -> conversion_failure __FUNCTION__ xml + ) + | _ -> Expression (xml_to_expression xml) + +and xml_to_op_appl_node (children : tree list) : op_appl_node = + match extract_inline_node children with + | node, [Node ("operator", [ref_node]); Node ("operands", operands); Node ("boundSymbols", bound_symbols)] -> { + node; + operator = get_ref ref_node; + operands = List.map xml_to_expr_or_op_arg operands; + bound_symbols = List.map xml_to_symbols bound_symbols; + } + | node, [Node ("operator", [ref_node]); Node ("operands", operands)] -> { + node; + operator = get_ref ref_node; + operands = List.map xml_to_expr_or_op_arg operands; + bound_symbols = [] + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_let_in_node (children : tree list) : let_in_node = + match extract_inline_node children with + | node, [Node ("body", [body]); Node ("opDefs", op_defs)]-> { + node; + body = xml_to_expression body; + def_refs = List.map get_ref op_defs; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +(** TODO: there are more fields to parse in this structure +*) +and xml_to_at_node (children : tree list) : at_node = + match extract_inline_node children with + | node, _ -> {node} + +and xml_to_label_node (children : tree list) : label_node = + match extract_inline_node children with + | node, [ + Node ("uniquename", [SValue name]); + Node ("arity", [IValue arity]); + Node ("body", [body]); + Node ("params", parameters) + ] -> { + node; + name; + arity; + body = xml_to_expr_or_assume_prove body; + parameters = List.map get_ref parameters; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_assume_prove_node (children : tree list) : assume_prove_node = + match extract_inline_node children with + | node, Node ("assumes", assumptions) :: Node ("prove", [prove]) :: _ -> { + node; + assumptions = List.map xml_to_assumption_kind assumptions; + prove = xml_to_expression prove; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_assumption_kind (xml : tree) : assumption_kind = + match xml with + | Node ("AssumeProveNode", children) -> AssumeProveLike (AssumeProveNode (xml_to_assume_prove_node children)) + | Node ("NewSymbNode", children) -> NewSymbol (xml_to_new_symbol_node children) + | expr -> Expression (xml_to_expression expr) + +and xml_to_new_symbol_node (children : tree list) : new_symbol_node = + match extract_inline_node children with + | node, [Node ("OpDeclNodeRef", [Node ("UID", [IValue symbol_ref])])] -> { + node; + symbol_ref; + domain = None; + } + | node, [Node ("OpDeclNodeRef", [Node ("UID", [IValue symbol_ref])]); domain] -> { + node; + symbol_ref; + domain = Some (xml_to_expression domain); + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_expr_or_assume_prove (xml : tree) : expr_or_assume_prove = + match xml with + | Node ("AssumeProveNode", children) -> AssumeProveLike (AssumeProveNode (xml_to_assume_prove_node children)) + | Node ("APSubstInNode", children) -> AssumeProveLike (AssumeProveSubstitution (xml_to_subst_in_node children)) + | expr -> Expression (xml_to_expression expr) + +and xml_to_substitution (xml : tree) : substitution = + match xml with + | Node ("Subst", [Node ("OpDeclNodeRef", [Node ("UID", [IValue target_uid])]); substitute]) -> { + target_uid; + substitute = xml_to_expr_or_op_arg substitute; + } + | _ -> conversion_failure __FUNCTION__ xml + +and xml_to_subst_in_node (children : tree list) : subst_in_node = + match extract_inline_node children with + | node, [ + Node ("substs", substitutions); + Node ("body", [body]); + Node ("instFrom", [Node ("ModuleNodeRef", [Node ("UID", [IValue instance_from_mule_ref])])]); + Node ("instTo", [Node ("ModuleNodeRef", [Node ("UID", [IValue instance_to_mule_ref])])])] -> { + node; + substitutions = List.map xml_to_substitution substitutions; + body = xml_to_expression body; + instance_from_mule_ref; + instance_to_mule_ref + } + | _ -> ls_conversion_failure __FUNCTION__ children + +and xml_to_expression (xml : tree) : expression = + match xml with + | Node ("AtNode", children) -> AtNode (xml_to_at_node children) + | Node ("DecimalNode", children) -> DecimalNode (xml_to_decimal_node children) + | Node ("LabelNode", children) -> LabelNode (xml_to_label_node children) + | Node ("LetInNode", children) -> LetInNode (xml_to_let_in_node children) + | Node ("NumeralNode", children) -> NumeralNode (xml_to_numeral_node children) + | Node ("OpApplNode", children) -> OpApplNode (xml_to_op_appl_node children) + | Node ("StringNode", children) -> StringNode (xml_to_string_node children) + | Node ("SubstInNode", children) -> SubstInNode (xml_to_subst_in_node children) + | Node ("TheoremDefRef", [Node ("UID", [IValue uid])]) -> TheoremDefRef uid + | Node ("AssumeDefRef", [Node ("UID", [IValue uid])]) -> AssumeDefRef uid + | _ -> conversion_failure __FUNCTION__ xml + +and xml_to_user_defined_op_kind (children : tree list) : user_defined_op_kind = + let node, name, arity, precomments, children = match extract_inline_node children with + | node, Node ("uniquename", [SValue name]) :: Node ("arity", [IValue arity]) :: Node ("pre-comments", [SValue precomments]) :: children -> + node, name, arity, Some precomments, children + | node, Node ("uniquename", [SValue name]) :: Node ("arity", [IValue arity]) :: children -> + node, name, arity, None, children + | _ -> ls_conversion_failure __FUNCTION__ children + in match children with + | Node ("body", [body]) :: Node ("params", parameters) :: flags -> { + node; + name; + arity; + precomments; + body = xml_to_expression body; + params = List.map xml_to_leibniz_param parameters; + recursive = flags |> List.exists (is_tag "recursive"); + local = flags |> List.exists (is_tag "local"); + } + | _ -> ls_conversion_failure __FUNCTION__ children + +[@@deriving show] + +type instance_node = { + node : node; + name : string option; + module_name : string; + substitutions : substitution list; + parameters : int list; + local : bool; +} +[@@deriving show] + +let xml_to_instance_node (children : tree list) : instance_node = + let extract_inline_name_opt (node, children : node * tree list) : (node * string option * tree list) = + match children with + | Node ("uniquename", [SValue name]) :: children -> (node, Some name, children) + | _ -> (node, None, children) + in match children |> extract_inline_node |> extract_inline_name_opt with + | node, name, Node ("module", [SValue module_name]) :: Node ("substs", substitutions) :: Node ("params", params) :: local -> { + node; + name; + module_name; + substitutions = List.map xml_to_substitution substitutions; + parameters = List.map get_ref params; + local = match local with | [Node ("local", _)] -> true | _ -> false; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +(** This is a weird case that is almost definitely just a bug on SANY's side. + For some reason SANY treats DEFINE M == INSTANCE Naturals proof steps + differently from any other INSTANCE node, which always are either + immediately inlined as M!-prefixed operators (in LET/IN blocks) or given + as an InstanceNode type. This is the ModuleInstanceKind node type, which + cannot even represent parameterization or substitution. In fact, it does + not even export the name of the instance at all! Thankfully very few + proofs seem to include DEFINE steps with an INSTANCE. + + TODO: fix this on SANY's side. +*) +let xml_to_define_step_instance_node (children : tree list) : instance_node = + match extract_inline_node children with + | node, Node ("uniquename", [SValue name]) :: local -> { + node; + name = Some name; + module_name = ""; + substitutions = []; + parameters = []; + local = match local with | [Node ("local", _)] -> true | _ -> false; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type use_or_hide_node = { + node : node; + facts : expression list; + def_refs : int list; + only : bool; + hide : bool; +} +[@@deriving show] + +let xml_to_use_or_hide_node (children : tree list) : use_or_hide_node = + match extract_inline_node children with + | node, Node ("facts", facts) :: Node ("defs", defs) :: children -> + let (only, hide) = match children with + | [Node ("only", _); Node ("hide", _)] -> (true, true) + | [Node ("only", _)] -> (true, false) + | [Node ("hide", _)] -> (false, true) + | [] -> (false, false) + | _ -> ls_conversion_failure __FUNCTION__ children + in { + node; + facts = List.map xml_to_expression facts; + def_refs = List.map get_ref defs; + only; + hide; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type unit_kind = +| Ref of int +| Instance of instance_node +| UseOrHide of use_or_hide_node +[@@deriving show] + +type module_node = { + node : node; + name : string; + extends : string list; + units : unit_kind list; +} +[@@deriving show] + +let xml_to_module_node (children : tree list) : module_node = + let extract_extends (xml : tree) : string = + match xml with + | Node ("uniquename", [SValue name]) -> name + | _ -> conversion_failure __FUNCTION__ xml + in let ref_child child = + match get_ref_opt child with + | Some uid -> Ref uid + | None -> match child with + | Node ("InstanceNode", children) -> Instance (xml_to_instance_node children) + | Node ("UseOrHideNode", children) -> UseOrHide (xml_to_use_or_hide_node children) + | _ -> conversion_failure __FUNCTION__ child + in match extract_inline_node children with + | node, Node ("uniquename", [SValue name]) :: Node ("extends", extends) :: units -> { + node; + name; + extends = List.map extract_extends extends; + units = List.map ref_child units + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type declaration_kind = + | Constant + | Variable + | BoundSymbol + | NewConstant + | NewVariable + | NewState + | NewAction + | NewTemporal +[@@deriving show] + +let xml_to_declaration_kind (kind : int) : declaration_kind = + match kind with + | 2 -> Constant + | 3 -> Variable + | 4 -> BoundSymbol + | 24 -> NewConstant + | 25 -> NewVariable + | 26 -> NewState + | 27 -> NewAction + | 28 -> NewTemporal + | _ -> conversion_failure __FUNCTION__ (IValue kind) + +type op_decl_node = { + node : node; + name : string; + arity : int; + kind : declaration_kind; +} +[@@deriving show] + +let xml_to_op_decl_node (children : tree list) : op_decl_node = + match extract_inline_node children with + | node, [Node ("uniquename", [SValue name]); Node ("arity", [IValue arity]); Node ("kind", [IValue kind])] -> { + node; + name; + arity; + kind = xml_to_declaration_kind kind; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type built_in_operator = + (* Reserved words *) + | TRUE + | FALSE + | BOOLEAN + | STRING + (* Prefix operators *) + | LogicalNegation + | UNION + | SUBSET + | DOMAIN + | ENABLED + | UNCHANGED + | Always + | Eventually + (* Postfix operators *) + | Prime + (* Infix operators *) + | SetIn + | SetNotIn + | Implies + | Equivalent + | Conjunction + | Disjunction + | Equals + | NotEquals + | SetMinus + | Union + | Intersect + | SubsetEq + | LeadsTo + | ActionComposition + | PlusArrow + (* Language operators *) + | FiniteSetLiteral + | TupleLiteral + | ConjunctionList + | DisjunctionList + | CartesianProduct + | WeakFairness + | StrongFairness + | BoundedChoose + | UnboundedChoose + | ActionOrStutter + | ActionNoStutter + | BoundedExists + | BoundedForAll + | UnboundedExists + | UnboundedForAll + | TemporalExists + | TemporalForAll + | FiniteSetMap + | FiniteSetFilter + | FunctionSet + | FunctionConstructor + | FunctionDefinition + | RecursiveFunctionDefinition + | FunctionApplication + | RecordSet + | RecordConstructor + | RecordSelect + | Except + | IfThenElse + | Case + | Subexpression + | Pair + | Sequence + | CaseProofStep + | PickProofStep + | TakeProofStep + | HaveProofStep + | WitnessProofStep + | SufficesProofStep + | QedProofStep +[@@deriving show] + +let xml_to_built_in_operator (name : string) : built_in_operator = + match name with + | "TRUE" -> TRUE + | "FALSE" -> FALSE + | "BOOLEAN" -> BOOLEAN + | "STRING" -> STRING + | "\\lnot" -> LogicalNegation + | "UNION" -> UNION + | "SUBSET" -> SUBSET + | "DOMAIN" -> DOMAIN + | "ENABLED" -> ENABLED + | "UNCHANGED" -> UNCHANGED + | "[]" -> Always + | "<>" -> Eventually + | "'" -> Prime + | "\\in" -> SetIn + | "\\notin" -> SetNotIn + | "=>" -> Implies + | "\\equiv" -> Equivalent + | "\\land" -> Conjunction + | "\\lor" -> Disjunction + | "=" -> Equals + | "/=" -> NotEquals + | "\\" -> SetMinus + | "\\union" -> Union + | "\\intersect" -> Intersect + | "\\subseteq" -> SubsetEq + | "~>" -> LeadsTo + | "\\cdot" -> ActionComposition + | "-+->" -> PlusArrow + | "$SetEnumerate" -> FiniteSetLiteral + | "$Tuple" -> TupleLiteral + | "$ConjList" -> ConjunctionList + | "$DisjList" -> DisjunctionList + | "$CartesianProd" -> CartesianProduct + | "$WF" -> WeakFairness + | "$SF" -> StrongFairness + | "$BoundedChoose" -> BoundedChoose + | "$UnboundedChoose" -> UnboundedChoose + | "$SquareAct" -> ActionOrStutter + | "$AngleAct" -> ActionNoStutter + | "$BoundedExists" -> BoundedExists + | "$BoundedForall" -> BoundedForAll + | "$UnboundedExists" -> UnboundedExists + | "$UnboundedForall" -> UnboundedForAll + | "$TemporalExists" -> TemporalExists + | "$TemporalForall" -> TemporalForAll + | "$SetOfAll" -> FiniteSetMap + | "$SubsetOf" -> FiniteSetFilter + | "$SetOfFcns" -> FunctionSet + | "$FcnConstructor" -> FunctionConstructor + | "$NonRecursiveFcnSpec" -> FunctionDefinition + | "$RecursiveFcnSpec" -> RecursiveFunctionDefinition + | "$FcnApply" -> FunctionApplication + | "$SetOfRcds" -> RecordSet + | "$RcdConstructor" -> RecordConstructor + | "$RcdSelect" -> RecordSelect + | "$Except" -> Except + | "$IfThenElse" -> IfThenElse + | "$Case" -> Case + | "$Nop" -> Subexpression + | "$Pfcase" -> CaseProofStep + | "$Pick" -> PickProofStep + | "$Take" -> TakeProofStep + | "$Have" -> HaveProofStep + | "$Witness" -> WitnessProofStep + | "$Suffices" -> SufficesProofStep + | "$Qed" -> QedProofStep + | "$Pair" -> Pair + | "$Seq" -> Sequence + | name -> conversion_failure __FUNCTION__ (SValue name) + +type built_in_kind = { + node : node; + operator : built_in_operator; + arity : int; + params : leibniz_param list; +} +[@@deriving show] + +let xml_to_built_in_kind (children : tree list) : built_in_kind = + let node, name, arity, params = match extract_inline_node children with + | node, [Node ("uniquename", [SValue name]); Node ("arity", [IValue arity]); Node ("params", params)] -> + node, name, arity, List.map xml_to_leibniz_param params + | node, [Node ("uniquename", [SValue name]); Node ("arity", [IValue arity])] -> + node, name, arity, [] + | _ -> ls_conversion_failure __FUNCTION__ children + in { + node; + operator = xml_to_built_in_operator name; + arity; + params + } + +type assume_def_node = { + node : node; + name : string; + body : expr_or_assume_prove; +} +[@@deriving show] + +let xml_to_assume_def_node (children : tree list) : assume_def_node = + match extract_inline_node children with + | node, [Node ("uniquename", [SValue name]); body] -> { + node; + name; + body = xml_to_expr_or_assume_prove body; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type assume_node = { + node : node; + definition : int option; + body : expression; +} +[@@deriving show] + +let xml_to_assume_node (children : tree list) : assume_node = + match children |> extract_inline_node |> extract_inline_definition_opt with + | node, definition, [Node ("body", [body])] -> { + node; + definition; + body = xml_to_expression body; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type theorem_def_node = { + node : node; + name : string; + body : expr_or_assume_prove; +} +[@@deriving show] + +let xml_to_theorem_def_node (children : tree list) : theorem_def_node = + match extract_inline_node children with + | node, [Node ("uniquename", [SValue name]); body] -> { + node; + name; + body = xml_to_expr_or_assume_prove body + } +| _ -> ls_conversion_failure __FUNCTION__ children + +type by_proof_node = { + node : node; + facts : expression list; + defs : int list; + only : bool; +} +[@@deriving show] + +let xml_to_by_proof_node (children : tree list) : by_proof_node = + match extract_inline_node children with + | node, Node ("facts", facts) :: Node ("defs", defs) :: children -> { + node; + facts = List.map xml_to_expression facts; + defs = List.filter_map get_ref_opt defs; + only = match children with | [Node ("only", _)] -> true | _ -> false + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type def_proof_step = { + node : node; + def_refs : int list; +} +[@@deriving show] + +let xml_to_def_proof_step (children : tree list) : def_proof_step = + match extract_inline_node children with + | node, defs -> { + node; + def_refs = List.map get_ref defs; + } + +type proof_step_group = + | TheoremNodeRef of int + | DefStep of def_proof_step + | UseOrHide of use_or_hide_node + | InstanceNode of instance_node + | TheoremNode +[@@deriving show] + +type steps_proof_node = { + node : node; + proof_level : int; + steps : proof_step_group list; +} +[@@deriving show] + +let xml_to_steps_proof_node (children : tree list) : steps_proof_node = + let xml_to_proof_step_group xml = + match xml with + | Node ("TheoremNodeRef", [Node ("UID", [IValue uid])]) -> TheoremNodeRef uid + | Node ("DefStepNode", children) -> DefStep (xml_to_def_proof_step children) + | Node ("UseOrHideNode", children) -> UseOrHide (xml_to_use_or_hide_node children) + | Node ("InstanceNode", children) -> InstanceNode (xml_to_instance_node children) + | _ -> conversion_failure __FUNCTION__ xml + in match extract_inline_node children with + | node, Node ("proofLevel", [IValue proof_level]) :: steps -> { + node; + proof_level; + steps = List.map xml_to_proof_step_group steps + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type proof_node_group = + | Omitted of node + | Obvious of node + | By of by_proof_node + | Steps of steps_proof_node +[@@deriving show] + +type theorem_node = { + node : node; + definition : int option; + body : expr_or_assume_prove; + proof : proof_node_group option; +} +[@@deriving show] + +let xml_to_theorem_node (children : tree list) : theorem_node = + let xml_to_inline_proof_node_group (children : tree list) : proof_node_group option = + match children with + | Node ("omitted", children) :: _ -> let (node, _) = extract_inline_node children in Some (Omitted node) + | Node ("obvious", children) :: _ -> let (node, _) = extract_inline_node children in Some (Obvious node) + | Node ("by", children) :: _ -> Some (By (xml_to_by_proof_node children)) + | Node ("steps", children) :: _ -> Some (Steps (xml_to_steps_proof_node children)) + | [] -> None + | _ -> ls_conversion_failure __FUNCTION__ children + in match children |> extract_inline_node |> extract_inline_definition_opt with + | node, definition, Node ("body", [body]) :: proof -> { + node; + definition; + body = xml_to_expr_or_assume_prove body; + proof = xml_to_inline_proof_node_group proof; + } + | _ -> ls_conversion_failure __FUNCTION__ children + +type entry_kind = + | FormalParamNode of formal_param_node + | ModuleNode of module_node + | OpDeclNode of op_decl_node + | AssumeNode of assume_node + | ModuleInstanceKind of instance_node + | UserDefinedOpKind of user_defined_op_kind + | BuiltInKind of built_in_kind + | TheoremNode of theorem_node + | TheoremDefNode of theorem_def_node + | AssumeDefNode of assume_def_node +[@@deriving show] + +let xml_to_entry_kind (xml : tree) : entry_kind = + match xml with + | Node ("ModuleNode", children) -> ModuleNode (xml_to_module_node children) + | Node ("AssumeNode", children) -> AssumeNode (xml_to_assume_node children) + | Node ("AssumeDef", children) -> AssumeDefNode (xml_to_assume_def_node children) + | Node ("OpDeclNode", children) -> OpDeclNode (xml_to_op_decl_node children) + | Node ("UserDefinedOpKind", children) -> UserDefinedOpKind (xml_to_user_defined_op_kind children) + | Node ("BuiltInKind", children) -> BuiltInKind (xml_to_built_in_kind children) + | Node ("FormalParamNode", children) -> FormalParamNode (xml_to_formal_param_node children) + | Node ("ModuleInstanceKind", children) -> ModuleInstanceKind (xml_to_define_step_instance_node children) + | Node ("TheoremDefNode", children) -> TheoremDefNode (xml_to_theorem_def_node children) + | Node ("TheoremNode", children)-> TheoremNode (xml_to_theorem_node children) + | _ -> conversion_failure __FUNCTION__ xml + +type entry = { + uid : int; + kind : entry_kind; +} +[@@deriving show] + +let xml_to_entry (xml : tree) : entry = + match xml with + | Node ("entry", [Node ("UID", [IValue uid]); entry]) -> { + uid; + kind = xml_to_entry_kind entry; + } + | _ -> conversion_failure __FUNCTION__ xml + +type modules = { + root_module: string; + context: entry list; + module_refs : int list; +} +[@@deriving show] + +let xml_to_modules (xml : tree) : modules = + match xml with + | Node ("modules", children) -> ( + match children with + | Node ("RootModule", [SValue root_module]) :: Node ("context", entries) :: modules -> { + root_module; + context = List.map xml_to_entry entries; + module_refs = List.map get_ref modules; + } + | _ -> ls_conversion_failure __FUNCTION__ children) + | _ -> conversion_failure __FUNCTION__ xml + +let xml_to_ast (xml : tree) : (modules, (string option * string)) result = + let prev_backtrace = Printexc.backtrace_status () in + if Params.debugging "sany" then Printexc.record_backtrace true; + try + let modules = xml_to_modules xml in + Printexc.record_backtrace prev_backtrace; + Result.ok modules + with Invalid_argument e -> + let trace = Printexc.get_backtrace () in + Printexc.record_backtrace prev_backtrace; + Result.error (None, Printf.sprintf "%s\n%s" e trace) + +let get_module_ast_xml (module_path : string) (stdlib_path : string) : (modules, (string option * string)) result = + let ( >>= ) = Result.bind in + (source_to_sany_xml_str module_path stdlib_path) >>= str_to_xml >>= xml_to_ast diff --git a/src/tlapm_args.ml b/src/tlapm_args.ml index 63acc4a5..f00a441e 100644 --- a/src/tlapm_args.ml +++ b/src/tlapm_args.ml @@ -49,6 +49,14 @@ let set_default_method meth = try set_default_method meth with Failure msg -> raise (Arg.Bad ("--method: " ^ msg)) +let set_parser_backend parser_str = + match String.lowercase_ascii parser_str with + | "sany" -> Params.parser_backend := Sany + | "tlapm" -> Params.parser_backend := Tlapm + | _ -> raise (Arg.Bad ("--parser: " ^ parser_str)) + +let add_module_jar_path jar_path = + Params.module_jar_paths := jar_path :: !Params.module_jar_paths let parse_args executable_name args opts mods usage_fmt err terminate = try @@ -200,6 +208,12 @@ let init ?(out=Format.std_formatter) ?(err=Format.err_formatter) ?(terminate=exi "--prefer-stdlib", Arg.Set prefer_stdlib, " \ prefer built-in standard modules if the module search path \ contains files with the same names as modules in stdlib."; + "--parser", Arg.String set_parser_backend, " \ + Set parser backend to use: TLAPM (default) or SANY."; + "--module-jar", Arg.String add_module_jar_path, " \ + Add a path to a .jar file containing additional TLA+ modules, such + as the community modules. Multiple .jar files can be added by using + this option multiple times."; "--noproving", Arg.Set noproving, " do not prove, report fingerprinted results only"; blank; diff --git a/src/tlapm_lib.ml b/src/tlapm_lib.ml index 4bd57a16..1fc61c12 100644 --- a/src/tlapm_lib.ml +++ b/src/tlapm_lib.ml @@ -544,7 +544,22 @@ let setup_loader fs loader_paths = let loader_paths = List.fold_left add_if_new loader_paths fs in Loader.Global.setup loader_paths +let sany_modctx_of_string filename = + let transform (ctx, mule : modctx * Module.T.mule) : (modctx * Module.T.mule, string option * string) result = + Params.input_files := [Filename.basename filename]; + Params.set_search_path [Filename.basename filename]; + let (mule, _) = let open Module.Flatten in flatten ctx mule Ss.empty + in let (ctx, m, _summ) = Module.Elab.normalize ctx Deque.empty mule in Ok (ctx, m) + in Result.bind (Sany.parse filename) transform + let main fs = + match !Params.parser_backend, fs with + | Sany, [root_module_path] -> (match sany_modctx_of_string root_module_path with + | Ok (mcx, mule) -> process_module mcx mule |> ignore + | Error (_, msg) -> failwith ("Error parsing module using Sany backend: " ^ msg) + ) + | Sany, _ -> failwith "When using Sany parser backend, exactly one input file must be provided." + | Tlapm, _ -> setup_loader fs !Params.rev_search_path; Params.input_files := map_paths_to_filenames fs; let () = @@ -616,8 +631,7 @@ let init () = end; exit 3 -(* Access to this function has to be synchronized. *) -let modctx_of_string ~(content : string) ~(filename : string) ~loader_paths ~prefer_stdlib : (modctx * Module.T.mule, string option * string) result = +let tlapm_modctx_of_string ~(content : string) ~(filename : string) ~loader_paths ~prefer_stdlib : (modctx * Module.T.mule, string option * string) result = let parse_it () = Errors.reset (); Params.prefer_stdlib := prefer_stdlib; @@ -652,9 +666,18 @@ let modctx_of_string ~(content : string) ~(filename : string) ~loader_paths ~pre | Some l, None -> Error (Some l, Printexc.to_string e) | None, None -> Error (None, Printexc.to_string e)) +(* Access to this function has to be synchronized. *) +let modctx_of_string ~(content : string) ~(filename : string) ~loader_paths ~prefer_stdlib : (modctx * Module.T.mule, string option * string) result = + match !Params.parser_backend with + | Tlapm -> tlapm_modctx_of_string ~content ~filename ~loader_paths ~prefer_stdlib + | Sany -> sany_modctx_of_string filename + let module_of_string module_str = - let hparse = Tla_parser.P.use Module.Parser.parse in - let (flex, _) = Alexer.lex_string module_str in - Tla_parser.P.run hparse ~init:Tla_parser.init ~source:flex + match !Params.parser_backend with + | Tlapm -> + let hparse = Tla_parser.P.use Module.Parser.parse in + let (flex, _) = Alexer.lex_string module_str in + Tla_parser.P.run hparse ~init:Tla_parser.init ~source:flex + | Sany -> failwith "SANY cannot parse modules from a string" let stdlib_search_paths = Params.stdlib_search_paths diff --git a/test/parser/parser_tests.ml b/test/parser/parser_tests.ml index 44439d92..5ebf535e 100644 --- a/test/parser/parser_tests.ml +++ b/test/parser/parser_tests.ml @@ -153,9 +153,9 @@ let tests = "Standardized syntax test corpus" >::: ( | None -> assert_bool "Expected parse success" (expect_parse_failure test) | Some tlapm_output -> skip_if (should_skip_tree_comparison test) "Skipping parse tree comparison"; - let open Translate_syntax_tree in + let open Tlapm_lib__Sany in let open Sexplib in - let actual = tlapm_output |> translate_tla_source_file |> ts_node_to_sexpr in + let actual = module_to_sexp tlapm_output in if Sexp.equal expected actual then assert_bool "Expected parse test to fail" (not (expect_tree_comparison_failure test)) else diff --git a/test/sany/Test.tla b/test/sany/Test.tla new file mode 100644 index 00000000..896f7532 --- /dev/null +++ b/test/sany/Test.tla @@ -0,0 +1,4 @@ +---- MODULE Test ---- +EXTENDS Naturals +==== + diff --git a/test/sany/dune b/test/sany/dune new file mode 100644 index 00000000..fe0609ab --- /dev/null +++ b/test/sany/dune @@ -0,0 +1,6 @@ +(test + (name sany_tests) + (modes exe) + (libraries tlapm_lib ounit2 sexplib sexp_diff) + (preprocess (pps ppx_deriving.show)) +) diff --git a/test/sany/sany_tests.ml b/test/sany/sany_tests.ml new file mode 100644 index 00000000..cc6a96f1 --- /dev/null +++ b/test/sany/sany_tests.ml @@ -0,0 +1,69 @@ +let find_tla_files dir = + let cmd = Printf.sprintf "find %s -name '*.tla'" (Filename.quote dir) in + let ic = Unix.open_process_in cmd in + let rec loop acc = + match input_line ic with + | line -> loop (line :: acc) + | exception End_of_file -> + ignore (Unix.close_process_in ic); + List.rev acc + in + loop [] + +let _has_substring needle haystack = + match Str.search_forward (Str.regexp_string needle) haystack 0 with + | _ -> true + | exception Not_found -> false + +let should_run (path : string) : bool = + let preds = [ + String.ends_with ~suffix:"paxos/Paxos.tla"; + String.ends_with ~suffix:"ByzPaxos/BPConProof.tla"; + String.ends_with ~suffix:"GraphTheorem.tla"; + String.ends_with ~suffix:"NegativeOpTest.tla"; + ] in not (List.exists (fun pred -> pred path) preds) + +let _start_at (filename : string) (files : string list) : string list = + let rec drop_until (paths : string list) : string list = + match paths with + | [] -> [] + | hd :: tl -> + if String.ends_with ~suffix:filename hd then paths + else drop_until tl + in drop_until files + +let parse_tla_file filename = + let open Tlapm_lib in + let open Tlapm_lib__Sany in + print_endline ("Parsing " ^ filename ^ " ..."); + try match parse filename with + | Error (_, msg) -> failwith msg + | Ok _ -> print_endline (filename ^ " success") + with + | Unsupported_language_feature (location, RecursiveOperator) -> + (* This is okay, we just don't support recursive operators *) + Printf.eprintf "%s:\nUnsupported recursive operator at %s\n" filename (Loc.string_of_locus (Option.get location)) + | Unsupported_language_feature (location, Subexpression) -> + (* This is okay, we just don't support subexpressions *) + Printf.eprintf "%s:\nUnsupported subexpression at %s\n" filename (Loc.string_of_locus (Option.get location)) + (*| Failure (e : string) -> + Printf.eprintf "%s\n" e; + failwith filename*) + +let _ = + let open Tlapm_lib__Params in + parser_backend := Sany; + module_jar_paths := [ + "/mnt/data/ahelwer/src/tlaplus/examples/deps/apalache/lib/apalache.jar"; + "/mnt/data/ahelwer/src/tlaplus/examples/deps/community/modules.jar"; + ]; + add_debug_flag "sany"; + let tla_files = [ + "/mnt/data/ahelwer/src/tlaplus/examples/specifications"; + "/mnt/data/ahelwer/src/tlaplus/proofs/library"; + "/mnt/data/ahelwer/src/tlaplus/proofs/examples"; + "/home/ahelwer/src/tlaplus/java-tools/tlatools/org.lamport.tlatools/test/tla2sany/semantic/corpus"; + ] |> List.map find_tla_files |> List.flatten + |> List.filter should_run + (*|> _start_at "paxos/Paxos.tla"*) + in List.map parse_tla_file tla_files diff --git a/test/semantics/0_ConstantRefTest.tla b/test/semantics/0_ConstantRefTest.tla new file mode 100644 index 00000000..2196b20a --- /dev/null +++ b/test/semantics/0_ConstantRefTest.tla @@ -0,0 +1,6 @@ +---- MODULE 0_ConstantRefTest ---- +CONSTANT x +CONSTANT y +CONSTANT z +op == <> +==== diff --git a/test/semantics/10_BasicStepProofTest.tla b/test/semantics/10_BasicStepProofTest.tla new file mode 100644 index 00000000..5e5d9606 --- /dev/null +++ b/test/semantics/10_BasicStepProofTest.tla @@ -0,0 +1,15 @@ +---- MODULE 10_BasicStepProofTest ---- +A == 0 +B == 0 +C == 0 +THEOREM AB == A = B +PROOF BY ONLY DEFS A, B +THEOREM BC == B = C +PROOF BY ONLY DEFS B, C +THEOREM AC == A = C +PROOF +<1>a AB +<1>b BC +<1>c QED + +==== \ No newline at end of file diff --git a/test/semantics/11_BasicAssumeProveTest.tla b/test/semantics/11_BasicAssumeProveTest.tla new file mode 100644 index 00000000..efaf9720 --- /dev/null +++ b/test/semantics/11_BasicAssumeProveTest.tla @@ -0,0 +1,12 @@ +---- MODULE 11_BasicAssumeProveTest ---- +THEOREM + ASSUME + NEW CONSTANT A, + NEW CONSTANT B, + NEW CONSTANT C + PROVE + /\ A = B + /\ B = C + => A = C +PROOF OBVIOUS +==== \ No newline at end of file diff --git a/test/semantics/12_BasicCaseStepProofTest.tla b/test/semantics/12_BasicCaseStepProofTest.tla new file mode 100644 index 00000000..c53b44bc --- /dev/null +++ b/test/semantics/12_BasicCaseStepProofTest.tla @@ -0,0 +1,11 @@ +---- MODULE 12_BasicCaseStepProofTest ---- +A == TRUE +B == TRUE +THEOREM A /\ B +PROOF +<1> CASE A + PROOF BY ONLY DEF A +<1> CASE B + PROOF BY ONLY DEF B +<1> QED +==== \ No newline at end of file diff --git a/test/semantics/1_VariableRefTest.tla b/test/semantics/1_VariableRefTest.tla new file mode 100644 index 00000000..da51c41a --- /dev/null +++ b/test/semantics/1_VariableRefTest.tla @@ -0,0 +1,6 @@ +---- MODULE 1_VariableRefTest ---- +VARIABLE x +VARIABLE y +VARIABLE z +op == <> +==== diff --git a/test/semantics/2_OperatorRefTest.tla b/test/semantics/2_OperatorRefTest.tla new file mode 100644 index 00000000..afc0d918 --- /dev/null +++ b/test/semantics/2_OperatorRefTest.tla @@ -0,0 +1,4 @@ +---- MODULE 2_OperatorRefTest ---- +op == 0 +op2 == op +==== diff --git a/test/semantics/3_AssumeRefTest.tla b/test/semantics/3_AssumeRefTest.tla new file mode 100644 index 00000000..2518dcdf --- /dev/null +++ b/test/semantics/3_AssumeRefTest.tla @@ -0,0 +1,4 @@ +---- MODULE 3_AssumeRefTest ---- +ASSUME P == TRUE +op == P +==== \ No newline at end of file diff --git a/test/semantics/4_TheoremRefTest.tla b/test/semantics/4_TheoremRefTest.tla new file mode 100644 index 00000000..596424ac --- /dev/null +++ b/test/semantics/4_TheoremRefTest.tla @@ -0,0 +1,4 @@ +---- MODULE 4_TheoremRefTest ---- +THEOREM thm == TRUE +op == thm +==== \ No newline at end of file diff --git a/test/semantics/5_ConstantOperatorTest.tla b/test/semantics/5_ConstantOperatorTest.tla new file mode 100644 index 00000000..f615df75 --- /dev/null +++ b/test/semantics/5_ConstantOperatorTest.tla @@ -0,0 +1,4 @@ +---- MODULE 5_ConstantOperatorTest ---- +CONSTANT F(_, _) +op == F(1, 2) +==== \ No newline at end of file diff --git a/test/semantics/6_OperatorParameterRefTest.tla b/test/semantics/6_OperatorParameterRefTest.tla new file mode 100644 index 00000000..a54b3a1d --- /dev/null +++ b/test/semantics/6_OperatorParameterRefTest.tla @@ -0,0 +1,3 @@ +---- MODULE 6_OperatorParameterRefTest ---- +op(x, y) == <> +==== \ No newline at end of file diff --git a/test/semantics/7_ExpressionTest.tla b/test/semantics/7_ExpressionTest.tla new file mode 100644 index 00000000..3ba54aee --- /dev/null +++ b/test/semantics/7_ExpressionTest.tla @@ -0,0 +1,3 @@ +---- MODULE 7_ExpressionTest ---- +ITE == IF TRUE THEN 1 ELSE 2 +==== \ No newline at end of file diff --git a/test/semantics/8_ObviousProofTest.tla b/test/semantics/8_ObviousProofTest.tla new file mode 100644 index 00000000..9618222d --- /dev/null +++ b/test/semantics/8_ObviousProofTest.tla @@ -0,0 +1,4 @@ +---- MODULE 8_ObviousProofTest ---- +THEOREM TRUE +PROOF OBVIOUS +==== \ No newline at end of file diff --git a/test/semantics/9_BasicByProofTest.tla b/test/semantics/9_BasicByProofTest.tla new file mode 100644 index 00000000..99f04c87 --- /dev/null +++ b/test/semantics/9_BasicByProofTest.tla @@ -0,0 +1,6 @@ +---- MODULE 9_BasicByProofTest ---- +A == 1 +B == 1 +THEOREM A = B +PROOF BY ONLY DEFS A, B +==== \ No newline at end of file diff --git a/test/semantics/dune b/test/semantics/dune new file mode 100644 index 00000000..0fd77a68 --- /dev/null +++ b/test/semantics/dune @@ -0,0 +1,7 @@ +(test + (name semantic_tests) + (modes exe) + (libraries tlapm_lib ounit2 sexp_diff) + (deps (glob_files *.tla)) + (preprocess (pps ppx_deriving.show)) +) diff --git a/test/semantics/semantic_tests.ml b/test/semantics/semantic_tests.ml new file mode 100644 index 00000000..2b891958 --- /dev/null +++ b/test/semantics/semantic_tests.ml @@ -0,0 +1,84 @@ + +let find_tla_files dir = + let cmd = Printf.sprintf "find %s -name '*Test.tla'" (Filename.quote dir) in + let ic = Unix.open_process_in cmd in + let rec loop acc = + match input_line ic with + | line -> loop (line :: acc) + | exception End_of_file -> + ignore (Unix.close_process_in ic); + List.rev acc + in + loop [] + +let read_file (filepath : string) : string = + let ic = open_in filepath in + let content = really_input_string ic (in_channel_length ic) in + close_in ic; + content + +open OUnit2;; +open Tlapm_lib;; +open Stdlib;; +open Tlapm_lib__Params;; +open Tlapm_lib__Sany;; + +let compare_syntax_trees (filepath : string) (source_code : string) (is_error : bool) : unit = + parser_backend := Tlapm; + try match module_of_string source_code with + | None -> assert_failure "TLAPM failed to parse the test input syntax" + | Some tlapm_mule -> ( + parser_backend := Sany; + try match parse filepath with + | Error _ -> assert_failure "SANY failed to parse the test input syntax" + | Ok (_, sany_mule) -> + let open Sexplib in + let tlapm_tree = module_to_sexp tlapm_mule in + let sany_tree = module_to_sexp sany_mule in + if Sexp.equal tlapm_tree sany_tree + then assert_bool "Syntax trees equivalent but SANY failed" (not is_error) + else + let open Sexp_diff in + let diff = Algo.diff ~original:tlapm_tree ~updated:sany_tree () in + let options = Display.Display_options.(create Layout.Single_column) in + let text = Display.display_with_ansi_colors options diff in + assert_failure ( + if is_error then (Printf.sprintf "SANY failed and parse trees differ:\n%s" text) + else (Printf.sprintf "SANY succeeded but parse trees differ:\n%s" text)) + with Failure _ -> assert_failure "SANY failed to parse the test input syntax" + ) with Failure _ -> assert_failure "TLAPM failed to parse the test input syntax" + +let check (filename : string) (content : string) : (unit, string) result = + try match modctx_of_string ~content ~filename ~loader_paths:[] ~prefer_stdlib:true with + | Error (_, msg) -> Error msg + | Ok _ -> Ok () + with Failure msg -> Error msg + +let check_in_tlapm : string -> string -> (unit, string) result = + parser_backend := Tlapm; + check + +let check_in_sany : string -> string -> (unit, string) result = + parser_backend := Sany; + check + +let run_test (filename : string) (_ctx: test_ctxt) : unit = + (*add_debug_flag "sany";*) + let content = read_file filename in + match check_in_tlapm filename content with + | Error msg -> assert_failure ("TLAPM failed to parse the test input: " ^ msg) + | Ok () -> match check_in_sany filename content with + | Error _ -> compare_syntax_trees filename content true + | Ok () -> compare_syntax_trees filename content false + +let mk_test (filepath : string) : test = + Filename.basename filepath >:: (run_test filepath) + +let tests = "SANY semantic escalation tests" >::: ( + find_tla_files "." + |> List.sort String.compare + |> List.map mk_test +) + +(** The OUnit2 test entrypoint. *) +let () = run_test_tt_main tests diff --git a/tlapm.opam b/tlapm.opam index 872eb93f..b3fcbe68 100644 --- a/tlapm.opam +++ b/tlapm.opam @@ -38,6 +38,7 @@ depends: [ "ppx_inline_test" "ppx_assert" "ppx_deriving" + "xmlm" "ounit2" "odoc" {with-doc} ]