diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..a0a9062c --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,44 @@ +name: Benchmarks +on: + # TODO remove pull_request once it's consistent. + pull_request: + branches: ["*"] + schedule: + - cron: "0 5 * * WED" # Every Wed at 05:00 + +jobs: + run: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Julia + uses: julia-actions/setup-julia@v2 + - uses: julia-actions/cache@v2 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pip" + - name: Setup optimizers + uses: ./.github/actions/setup_optimizers_linux + with: + GUROBI_WLS: ${{ secrets.GUROBI_WLS }} + COPT_LICENSE_KEY: ${{ secrets.COPT_LICENSE_KEY }} + COPT_LICENSE_DAT: ${{ secrets.COPT_LICENSE_DAT }} + CHECK_LICENSE: true + - name: Install scikit-sparse dependencies + run: | + sudo apt-get update + sudo apt-get install -y libsuitesparse-dev + - name: Install dependencies + run: | + pip install -e . + cd benchmarks + pip install -e .[energy-planning] + - name: Install Julia packages + run: | + julia --project=benchmarks -e 'using Pkg; Pkg.instantiate()' + - name: Run benchmarks + run: | + python benchmarks/test.py diff --git a/.gitignore b/.gitignore index fe0722a4..021bee96 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ site src/pyoframe/_version.py htmlcov/ .snakemake/ +*.html \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 04202b32..f5f11416 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,6 +12,29 @@ "args": [ "serve" ] + }, + { + "name": "Snakemake", + "type": "debugpy", + "request": "launch", + "module": "snakemake", + "cwd": "${workspaceFolder}/benchmarks", + "args": [ + "--cores", + "all" + ], + }, + { + "name": "Python Debugger: Snakemake", + "type": "debugpy", + "request": "launch", + "module": "snakemake", + "cwd": "${workspaceFolder}/benchmarks", + "args": [ + "--cores", + "1", + "--debug" + ] } ] } \ No newline at end of file diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 00000000..233a37e8 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,5 @@ +results/** +src/*/raw_data/downloads/** +src/*/raw_data/preprocessed/** +julia_sysimage.so +julia_precompile_statements.jl \ No newline at end of file diff --git a/benchmarks/Manifest.toml b/benchmarks/Manifest.toml new file mode 100644 index 00000000..742ab0c2 --- /dev/null +++ b/benchmarks/Manifest.toml @@ -0,0 +1,1087 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.7" +manifest_format = "2.0" +project_hash = "4abf3ea264ca42e6c90e00065528b44bade52089" + +[[deps.ASL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6252039f98492252f9e47c312c8ffda0e3b9e78d" +uuid = "ae81ac8f-d209-56e5-92de-9978fef736f9" +version = "0.1.3+0" + +[[deps.AbstractTrees]] +git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.5" + +[[deps.Accessors]] +deps = ["CompositionsBase", "ConstructionBase", "Dates", "InverseFunctions", "MacroTools"] +git-tree-sha1 = "2eeb2c9bef11013efc6f8f97f32ee59b146b09fb" +uuid = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +version = "0.1.44" + + [deps.Accessors.extensions] + AxisKeysExt = "AxisKeys" + IntervalSetsExt = "IntervalSets" + LinearAlgebraExt = "LinearAlgebra" + StaticArraysExt = "StaticArrays" + StructArraysExt = "StructArrays" + TestExt = "Test" + UnitfulExt = "Unitful" + + [deps.Accessors.weakdeps] + AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.ArgCheck]] +git-tree-sha1 = "f9e9a66c9b7be1ad7372bbd9b062d9230c30c5ce" +uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" +version = "2.5.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.ArrayLayouts]] +deps = ["FillArrays", "LinearAlgebra", "StaticArrays"] +git-tree-sha1 = "e0b47732a192dd59b9d079a06d04235e2f833963" +uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" +version = "1.12.2" +weakdeps = ["SparseArrays"] + + [deps.ArrayLayouts.extensions] + ArrayLayoutsSparseArraysExt = "SparseArrays" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.BangBang]] +deps = ["Accessors", "ConstructionBase", "InitialValues", "LinearAlgebra"] +git-tree-sha1 = "cceb62468025be98d42a5dc581b163c20896b040" +uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" +version = "0.4.9" + + [deps.BangBang.extensions] + BangBangChainRulesCoreExt = "ChainRulesCore" + BangBangDataFramesExt = "DataFrames" + BangBangStaticArraysExt = "StaticArrays" + BangBangStructArraysExt = "StructArrays" + BangBangTablesExt = "Tables" + BangBangTypedTablesExt = "TypedTables" + + [deps.BangBang.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.Baselet]] +git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e" +uuid = "9718e550-a3fa-408a-8086-8db961cd8217" +version = "0.1.1" + +[[deps.BenchmarkTools]] +deps = ["Compat", "JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] +git-tree-sha1 = "e38fbc49a620f5d0b660d7f543db1009fe0f8336" +uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +version = "1.6.0" + +[[deps.BitIntegers]] +deps = ["Random"] +git-tree-sha1 = "091d591a060e43df1dd35faab3ca284925c48e46" +uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" +version = "0.3.7" + +[[deps.Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1b96ea4a01afe0ea4090c5c8039690672dd13f2e" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.9+0" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.CSV]] +deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] +git-tree-sha1 = "8d8e0b0f350b8e1c91420b5e64e5de774c2f0f4d" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.10.16" + +[[deps.CategoricalArrays]] +deps = ["Compat", "DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"] +git-tree-sha1 = "73acb4ed51b1855e1b5ce5c610334363a98d13f1" +uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" +version = "1.0.2" + + [deps.CategoricalArrays.extensions] + CategoricalArraysArrowExt = "Arrow" + CategoricalArraysJSONExt = "JSON" + CategoricalArraysRecipesBaseExt = "RecipesBase" + CategoricalArraysSentinelArraysExt = "SentinelArrays" + CategoricalArraysStatsBaseExt = "StatsBase" + CategoricalArraysStructTypesExt = "StructTypes" + + [deps.CategoricalArrays.weakdeps] + Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" + JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" + RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" + SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c" + StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" + StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" + +[[deps.ChunkCodecCore]] +git-tree-sha1 = "1a3ad7e16a321667698a19e77362b35a1e94c544" +uuid = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" +version = "1.0.1" + +[[deps.ChunkCodecLibBrotli]] +deps = ["ChunkCodecCore", "brotli_jll"] +git-tree-sha1 = "45709ad3ba09bdff5e6481d2c1727b1499989997" +uuid = "653b0ff7-85b5-4442-93c1-dcc330d3ec7d" +version = "1.0.0" + +[[deps.ChunkCodecLibLz4]] +deps = ["ChunkCodecCore", "Lz4_jll"] +git-tree-sha1 = "0a4d7695ef98ab714efe5aef26fc35c3b0b4c1ee" +uuid = "7e9cc85e-5614-42a3-ad86-b78f920b38a5" +version = "1.0.0" + +[[deps.ChunkCodecLibSnappy]] +deps = ["ChunkCodecCore", "snappy_jll"] +git-tree-sha1 = "a9e98b8cc7ccdcfcb406773a6c58987daa6eda05" +uuid = "eac87354-86d5-4a5b-ab5f-a6ee56b239b3" +version = "1.0.0" + +[[deps.ChunkCodecLibZlib]] +deps = ["ChunkCodecCore", "Zlib_jll"] +git-tree-sha1 = "cee8104904c53d39eb94fd06cbe60cb5acde7177" +uuid = "4c0bbee4-addc-4d73-81a0-b6caacae83c8" +version = "1.0.0" + +[[deps.ChunkCodecLibZstd]] +deps = ["ChunkCodecCore", "Zstd_jll"] +git-tree-sha1 = "34d9873079e4cb3d0c62926a225136824677073f" +uuid = "55437552-ac27-4d47-9aa3-63184e8fd398" +version = "1.0.0" + +[[deps.CodecBzip2]] +deps = ["Bzip2_jll", "TranscodingStreams"] +git-tree-sha1 = "84990fa864b7f2b4901901ca12736e45ee79068c" +uuid = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +version = "0.8.5" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "962834c22b66e32aa10f7611c08c8ca4e20749a9" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.8" + +[[deps.CodecZstd]] +deps = ["TranscodingStreams", "Zstd_jll"] +git-tree-sha1 = "da54a6cd93c54950c15adf1d336cfd7d71f51a56" +uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +version = "0.8.7" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "3a3dfb30697e96a440e4149c8c51bf32f818c0f3" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.17.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.CompositionsBase]] +git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" +uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" +version = "0.1.2" +weakdeps = ["InverseFunctions"] + + [deps.CompositionsBase.extensions] + CompositionsBaseInverseFunctionsExt = "InverseFunctions" + +[[deps.ConstructionBase]] +git-tree-sha1 = "b4b092499347b18a015186eae3042f72267106cb" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.6.0" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseLinearAlgebraExt = "LinearAlgebra" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "d8928e9169ff76c6281f39a659f9bca3a573f24c" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.8.1" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4e1fe97fdaed23e9dc21d4d664bea76b65fc50a0" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.22" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.DecFP]] +deps = ["DecFP_jll", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "3b98337b5b709548754973b9d79f4d5f2038c7cf" +uuid = "55939f99-70c6-5e9b-8bb0-5071ed7d61fd" +version = "1.4.2" + +[[deps.DecFP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "40e4404a0a267a8e75f5c1ce2cc9b5e2ce1ba268" +uuid = "47200ebd-12ce-5be5-abb7-8e082af23329" +version = "2.0.300+0" + +[[deps.Decimals]] +git-tree-sha1 = "e98abef36d02a0ec385d68cd7dadbce9b28cbd88" +uuid = "abce61dc-4473-55a0-ba07-351d65e31d42" +version = "0.4.1" + +[[deps.DefineSingletons]] +git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c" +uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52" +version = "0.1.2" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" + +[[deps.DocStringExtensions]] +git-tree-sha1 = "7442a5dfe1ebb773c29cc2962a8980f47221d76c" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.5" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FNVHash]] +git-tree-sha1 = "d6de2c735a8bffce9bc481942dfa453cc815357e" +uuid = "5207ad80-27db-4d23-8732-fa0bd339ea89" +version = "0.1.0" + +[[deps.FilePathsBase]] +deps = ["Compat", "Dates"] +git-tree-sha1 = "3bab2c5aa25e7840a4b065805c0cdfc01f3068d2" +uuid = "48062228-2e41-5def-b9a4-89aafe57970f" +version = "0.9.24" +weakdeps = ["Mmap", "Test"] + + [deps.FilePathsBase.extensions] + FilePathsBaseMmapExt = "Mmap" + FilePathsBaseTestExt = "Test" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "2f979084d1e13948a3352cf64a25df6bd3b4dca3" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.16.0" + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStaticArraysExt = "StaticArrays" + FillArraysStatisticsExt = "Statistics" + + [deps.FillArrays.weakdeps] + PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "910febccb28d493032495b7009dce7d7f7aee554" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "1.0.1" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +version = "1.11.0" + +[[deps.Glob]] +git-tree-sha1 = "83cb0092e2792b9e3a865b6655e88f5b862607e2" +uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" +version = "1.4.0" + +[[deps.Gurobi]] +deps = ["Gurobi_jll", "Libdl", "MathOptInterface", "PrecompileTools"] +git-tree-sha1 = "a39349533a3fe9f60cf05e1a56644f804473c46a" +uuid = "2e9cd046-0924-5485-92f1-d5272153d98b" +version = "1.7.5" + +[[deps.Gurobi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "0306382a5f266e08e48ccfd4ea2e05d61155a055" +uuid = "c018c7e6-a5b0-4aea-8f80-9c1ef9991411" +version = "12.0.2" + +[[deps.HiGHS]] +deps = ["HiGHS_jll", "MathOptIIS", "MathOptInterface", "PrecompileTools", "SparseArrays"] +git-tree-sha1 = "3fae2ee8d6ea22009532d5919ff592fcbcab0ad9" +uuid = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" +version = "1.19.0" + +[[deps.HiGHS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "f56d423e3f583e26ceaef08a15a270b28723c89a" +uuid = "8fd58aa0-07eb-5a78-9b36-339c94fd15ea" +version = "1.11.0+1" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "XML2_jll", "Xorg_libpciaccess_jll"] +git-tree-sha1 = "3d468106a05408f9f7b6f161d9e7715159af247b" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.12.2+0" + +[[deps.InitialValues]] +git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" +uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" +version = "0.3.1" + +[[deps.InlineStrings]] +git-tree-sha1 = "8f3d257792a522b4601c24a577954b0a8cd7334d" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.5" + + [deps.InlineStrings.extensions] + ArrowTypesExt = "ArrowTypes" + ParsersExt = "Parsers" + + [deps.InlineStrings.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.InverseFunctions]] +git-tree-sha1 = "a779299d77cd080bf77b97535acecd73e1c5e5cb" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.17" +weakdeps = ["Dates", "Test"] + + [deps.InverseFunctions.extensions] + InverseFunctionsDatesExt = "Dates" + InverseFunctionsTestExt = "Test" + +[[deps.InvertedIndices]] +git-tree-sha1 = "6da3c4316095de0f5ee2ebd875df8721e7e0bdbe" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.1" + +[[deps.Ipopt]] +deps = ["Ipopt_jll", "LinearAlgebra", "OpenBLAS32_jll", "PrecompileTools"] +git-tree-sha1 = "ef90a75a3ee8c2b170f6c177d4d003348dd30f67" +uuid = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +version = "1.11.0" +weakdeps = ["MathOptInterface"] + + [deps.Ipopt.extensions] + IpoptMathOptInterfaceExt = "MathOptInterface" + +[[deps.Ipopt_jll]] +deps = ["ASL_jll", "Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "MUMPS_seq_jll", "SPRAL_jll", "libblastrampoline_jll"] +git-tree-sha1 = "b33cbc78b8d4de87d18fcd705054a82e2999dbac" +uuid = "9cc047cb-c261-5740-88fc-0cf96f7bdcc7" +version = "300.1400.1900+0" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "e2222959fbc6c19554dc15174c81bf7bf3aa691c" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.4" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "a007feb38b422fbdab534406aeca1b86823cb4d6" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.7.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.JSON3]] +deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] +git-tree-sha1 = "411eccfe8aba0814ffa0fdf4860913ed09c34975" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.14.3" + + [deps.JSON3.extensions] + JSON3ArrowExt = ["ArrowTypes"] + + [deps.JSON3.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + +[[deps.JuMP]] +deps = ["LinearAlgebra", "MacroTools", "MathOptInterface", "MutableArithmetics", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays"] +git-tree-sha1 = "90002c976264d2f571c98cd1d12851f4cba403df" +uuid = "4076af6c-e467-56ae-b986-b466b2749572" +version = "1.26.0" + + [deps.JuMP.extensions] + JuMPDimensionalDataExt = "DimensionalData" + + [deps.JuMP.weakdeps] + DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.4.0" + +[[deps.LazyArrays]] +deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "MacroTools", "SparseArrays"] +git-tree-sha1 = "41d433e5854d7a67e8ab2b04962a713fcbcffcf1" +uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02" +version = "2.9.5" + + [deps.LazyArrays.extensions] + LazyArraysBandedMatricesExt = "BandedMatrices" + LazyArraysBlockArraysExt = "BlockArrays" + LazyArraysBlockBandedMatricesExt = "BlockBandedMatrices" + LazyArraysStaticArraysExt = "StaticArrays" + + [deps.LazyArrays.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.11.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "be484f5c92fad0bd8acfef35fe017900b0b73809" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.18.0+0" + +[[deps.LightBSON]] +deps = ["DataStructures", "Dates", "DecFP", "FNVHash", "JSON3", "Sockets", "StructTypes", "Transducers", "UUIDs", "UnsafeArrays", "WeakRefStrings"] +git-tree-sha1 = "a439b31fda0a2a5b9b2fdee299facbdf98edc1ae" +uuid = "a4a7f996-b3a6-4de6-b9db-2fa5f350df41" +version = "1.4.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LittleEndianBase128]] +deps = ["Test"] +git-tree-sha1 = "2cad132b52c86e0ccfc75116362ab57f0047893a" +uuid = "1724a1d5-ab78-548d-94b3-135c294f96cf" +version = "0.3.0" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "13ca9e2586b89836fd20cccf56e57e2b9ae7f38f" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.29" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.Lz4_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "191686b1ac1ea9c89fc52e996ad15d1d241d1e33" +uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" +version = "1.10.1+0" + +[[deps.METIS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "2eefa8baa858871ae7770c98c3c2a7e46daba5b4" +uuid = "d00139f3-1899-568f-a2f0-47f597d42d70" +version = "5.1.3+0" + +[[deps.MUMPS_seq_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "METIS_jll", "libblastrampoline_jll"] +git-tree-sha1 = "fc0c8442887b48c15aec2b1787a5fc812a99b2fd" +uuid = "d7ed1dd3-d0ae-5e8e-bfb4-87a502085b8d" +version = "500.800.100+0" + +[[deps.MacroTools]] +git-tree-sha1 = "1e0228a030642014fe5cfe68c2c0a818f9e3f522" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.16" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MathOptIIS]] +deps = ["MathOptInterface"] +git-tree-sha1 = "31d4a6353ea00603104f11384aa44dd8b7162b28" +uuid = "8c4f8055-bd93-4160-a86b-a0c04941dbff" +version = "0.1.1" + +[[deps.MathOptInterface]] +deps = ["BenchmarkTools", "CodecBzip2", "CodecZlib", "DataStructures", "ForwardDiff", "JSON3", "LinearAlgebra", "MutableArithmetics", "NaNMath", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays", "SpecialFunctions", "Test"] +git-tree-sha1 = "1251fce78b907fe415a2f680291b67cf51360d2a" +uuid = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +version = "1.42.0" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.MicroCollections]] +deps = ["Accessors", "BangBang", "InitialValues"] +git-tree-sha1 = "44d32db644e84c75dab479f1bc15ee76a1a3618f" +uuid = "128add7d-3638-4c79-886c-908ea0c25c34" +version = "0.2.0" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.MutableArithmetics]] +deps = ["LinearAlgebra", "SparseArrays", "Test"] +git-tree-sha1 = "491bdcdc943fcbc4c005900d7463c9f216aabf4c" +uuid = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" +version = "1.6.4" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "9b8215b1ee9e78a293f99797cd31375471b2bcae" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.1.3" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OpenBLAS32_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "ece4587683695fe4c5f20e990da0ed7e83c351e7" +uuid = "656ef2d0-ae68-5445-9ca0-591084a874a2" +version = "0.3.29+0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.5+0" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1346c9208249809840c91b26703912dff463d335" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.6+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "05868e21324cede2207c6f0f466b4bfef6d5e7ee" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.8.1" + +[[deps.PackageCompiler]] +deps = ["Artifacts", "Glob", "LazyArtifacts", "Libdl", "Pkg", "Printf", "RelocatableFolders", "TOML", "UUIDs", "p7zip_jll"] +git-tree-sha1 = "92a5702bc1b7f2c0d3a395d5553fa8ec7342ce09" +uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" +version = "2.2.4" + +[[deps.Parquet]] +deps = ["CategoricalArrays", "CodecZlib", "CodecZstd", "DataAPI", "Dates", "Decimals", "LittleEndianBase128", "Missings", "Mmap", "SentinelArrays", "Snappy", "Tables", "Thrift"] +git-tree-sha1 = "b34432cb917ae43123f1648eeb2a87ae8c5d2aa0" +uuid = "626c502c-15b0-58ad-a749-f091afb673ae" +version = "0.8.6" + +[[deps.Parquet2]] +deps = ["AbstractTrees", "BitIntegers", "ChunkCodecCore", "ChunkCodecLibBrotli", "ChunkCodecLibLz4", "ChunkCodecLibSnappy", "ChunkCodecLibZlib", "ChunkCodecLibZstd", "DataAPI", "Dates", "DecFP", "FilePathsBase", "FillArrays", "JSON3", "LazyArrays", "LightBSON", "Mmap", "OrderedCollections", "PooledArrays", "PrecompileTools", "SentinelArrays", "StaticArrays", "TableOperations", "Tables", "Thrift2", "Transducers", "UUIDs", "WeakRefStrings"] +git-tree-sha1 = "b807642c695d78b1f2d6cc829ea505a7fc6fe81d" +uuid = "98572fba-bba0-415d-956f-fa77e587d26d" +version = "0.2.33" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "7d2f8f21da5db6a806faf7b9b292296da42b2810" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.3" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" +weakdeps = ["REPL"] + + [deps.Pkg.extensions] + REPLExt = "REPL" + +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "REPL", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "c5a07210bd060d6a8491b0ccdee2fa0235fc00bf" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "3.1.2" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.Profile]] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" +version = "1.11.0" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RelocatableFolders]] +deps = ["SHA", "Scratch"] +git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" +uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" +version = "1.0.1" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "62389eeff14780bfe55195b7204c0d8738436d64" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.1" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SPRAL_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "Libdl", "METIS_jll", "libblastrampoline_jll"] +git-tree-sha1 = "4f9833187a65ead66ed1907b44d5f20606282e3f" +uuid = "319450e9-13b8-58e8-aa9f-8fd1420848ab" +version = "2025.5.20+0" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "9b81b8393e50b7d4e6d0a9f14e192294d3b7c109" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.3.0" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "ebe7e59b37c400f694f52b58c93d26201387da70" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.9" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "c5391c6ace3bc430ca630251d02ea9687169ca68" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.2" + +[[deps.Snappy]] +deps = ["CEnum", "snappy_jll"] +git-tree-sha1 = "098adf970792fd9404788f4558e94958473f7d57" +uuid = "59d4ed8c-697a-5b28-a4c7-fe95c22820f9" +version = "0.4.3" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "64d974c2e6fdf07f8155b5b2ca2ffa9069b608d9" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.2" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "41852b8679f78c8d8961eeadc8f62cef861a52e3" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.5.1" + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + + [deps.SpecialFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + +[[deps.SplittablesBase]] +deps = ["Setfield", "Test"] +git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5" +uuid = "171d559e-b47b-412a-8079-5efa626c420e" +version = "0.1.15" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "246a8bb2e6667f832eea063c3a56aef96429a3db" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.18" + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + + [deps.StaticArrays.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "a3c1536470bf8c5e02096ad4853606d7c8f62721" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.4.2" + +[[deps.StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "159331b30e94d7b11379037feeb9b690950cace8" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.11.0" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableOperations]] +deps = ["SentinelArrays", "Tables", "Test"] +git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1" +uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" +version = "1.2.0" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "f2c1efbc8f3a609aadf318094f8fc5204bdaf344" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.1" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.Thrift]] +deps = ["CodecZlib", "CodecZstd", "Distributed", "Sockets", "ThriftJuliaCompiler_jll", "TranscodingStreams"] +git-tree-sha1 = "bc24b4dc9ec4b34bca36659ae7efae9085442d80" +uuid = "8d9c9c80-f77e-5080-9541-c6f69d204e22" +version = "0.8.5" + +[[deps.Thrift2]] +deps = ["MacroTools", "OrderedCollections", "PrecompileTools"] +git-tree-sha1 = "9610f626cf80cf28468edb20ec2dc007f72aacfa" +uuid = "9be31aac-5446-47db-bfeb-416acd2e4415" +version = "0.2.1" + +[[deps.ThriftJuliaCompiler_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "949a51ca85d31b063531eed49e38a6c9b9bae58b" +uuid = "815b9798-8dd0-5549-95cc-3cf7d01bce66" +version = "0.12.1+0" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.Transducers]] +deps = ["Accessors", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "SplittablesBase", "Tables"] +git-tree-sha1 = "4aa1fdf6c1da74661f6f5d3edfd96648321dade9" +uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" +version = "0.4.85" + + [deps.Transducers.extensions] + TransducersAdaptExt = "Adapt" + TransducersBlockArraysExt = "BlockArrays" + TransducersDataFramesExt = "DataFrames" + TransducersLazyArraysExt = "LazyArrays" + TransducersOnlineStatsBaseExt = "OnlineStatsBase" + TransducersReferenceablesExt = "Referenceables" + + [deps.Transducers.weakdeps] + Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" + BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" + OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338" + Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.UnsafeArrays]] +git-tree-sha1 = "c63023bd84f46f9df786c90180d4f79dbfdafa2a" +uuid = "c4a57d5a-5b31-53a6-b365-19f8c011fbd6" +version = "1.0.9" + +[[deps.WeakRefStrings]] +deps = ["DataAPI", "InlineStrings", "Parsers"] +git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" +uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" +version = "1.4.2" + +[[deps.WorkerUtilities]] +git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" +uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" +version = "1.6.1" + +[[deps.XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] +git-tree-sha1 = "80d3930c6347cfce7ccf96bd3bafdf079d9c0390" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.13.9+0" + +[[deps.Xorg_libpciaccess_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "4909eb8f1cbf6bd4b1c30dd18b2ead9019ef2fad" +uuid = "a65dc6b1-eb27-53a1-bb3e-dea574b5389e" +version = "0.18.1+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "446b23e73536f84e8037f5dce465e92275f6a308" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.5.7+1" + +[[deps.brotli_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "46fda47f4215c957bc92fd5fbb5ad04fee1e3743" +uuid = "4611771a-a7d2-5e23-8d00-b1becdba1aae" +version = "1.2.0+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" + +[[deps.snappy_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "ca88363dd41d2547f52118287dd34dbbc14f3eb7" +uuid = "fe1e1685-f7be-5f59-ac9f-4ca204017dfd" +version = "1.2.3+0" diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml new file mode 100644 index 00000000..430efa55 --- /dev/null +++ b/benchmarks/Project.toml @@ -0,0 +1,9 @@ +[deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +JuMP = "4076af6c-e467-56ae-b986-b466b2749572" +PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" +Parquet2 = "98572fba-bba0-415d-956f-fa77e587d26d" diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..74700b12 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,30 @@ +# Pyoframe benchmarks + +This folder contains the code and instructions needed to benchmark Pyoframe's performance to other libraries. For benchmarking, we use [`snakemake`](https://snakemake.github.io/) to produce the inputs and run the benchmarks. + +## How to run the benchmarks + +1. First install pyoframe: `pip install --editable .` +1. `cd benchmarks` +2. `pip install --editable .` +3. If running the JuMP benchmark: + a. Install Julia: `curl -fsSL https://install.julialang.org | sh` + b. Install the Julia dependencies: `julia --project=. -e 'using Pkg; Pkg.instantiate()'` +4. If running the AMPL benchmark: + a. Install the AMPL dependencies: `python -m amplpy.modules install highs gurobi` + b. Active your AMPL license: `python -m amplpy.modules activate ` +4. Edit `config.yaml` to your liking (e.g. specify the problems and libraries to benchmark). +5. Run `python run.py`. This will run all the benchmarks and take a while. +6. Run `python plot.py` to generate the plots. +6. View the plotted results in, for example, `results/facility_location/` + +### Running energy planning benchmark + +You'll need to complete the following additional steps. +1. Install the dependencies for [`scikit-sparse`](https://github.com/scikit-sparse/scikit-sparse), typically `sudo apt-get install libsuitesparse-dev` +2. `pip install --editable .[energy-planning] + + +## Running energy model benchmark locally + +1. Download the California Test System data. Specifically, place the [load data](https://drive.google.com/file/d/1Sz8st7g4Us6oijy1UYMPUvkA1XeZlIr8/view?usp=drive_link), [generation data](https://drive.google.com/file/d/1CxLlcwAEUy-JvJQdAfVydJ1p9Ecot-4d/view?usp=drive_link), and [line data](https://github.com/staadecker/CATS-CaliforniaTestSystem/blob/master/GIS/CATS_lines.json) in the `/benchmarks/energy_planning/data`. \ No newline at end of file diff --git a/benchmarks/config.test.yaml b/benchmarks/config.test.yaml new file mode 100644 index 00000000..786a270d --- /dev/null +++ b/benchmarks/config.test.yaml @@ -0,0 +1,45 @@ +# Note: we don't allow timeouts +name: test +save_outputs: true +julia_trace_compile: true +problems: + facility_location: + size: [2] + facility_location_construct_only: + size: [2] + construct_only: true + simple_problem: + size: [100] + inputs: + - "input_100.parquet" + energy_simple: + code_dir: energy_planning + inputs: "*" + size: [2] + args: + security_constrained: false + capacity_expansion: false + yearly_limits: false + variable_capacity_factors: false + solver_args: + Method: 2 + Crossover: false + energy_all: + code_dir: energy_planning + inputs: "*" + size: [1] + +solvers: + - gurobi + # - highs + # - mosek + +libraries: + - pyoframe + - pyomo + - cvxpy + - gurobipy + - jump + - linopy + - pyoptinterface + - ampl \ No newline at end of file diff --git a/benchmarks/config.yaml b/benchmarks/config.yaml new file mode 100644 index 00000000..e1bb6dc5 --- /dev/null +++ b/benchmarks/config.yaml @@ -0,0 +1,55 @@ +repeat: 1 # TODO repeat +timeout: 1200 # seconds, per run +name: "main" +problems: + simple_problem: + label: "Simple Problem" + size: [10000, 100000, 1000000, 10000000, 50000000] + inputs: + - "input_10000.parquet" + - "input_100000.parquet" + - "input_1000000.parquet" + - "input_10000000.parquet" + - "input_50000000.parquet" + facility_location: + label: "Facility Location Problem (from JuMP paper)" + size: [16, 32, 64, 128, 192] + construct_only: true + energy_planning_capacity_expansion: + label: "Electrical Grid Capacity Expansion Problem" + code_dir: energy_planning + inputs: "*" + size: [2, 4, 24, 168, 336] + args: + security_constrained: false + capacity_expansion: true + solver_args: + Method: 2 + Crossover: false + energy_planning_security_constrained_dispatch: + label: "Electrical Grid Dispatch Problem" + code_dir: energy_planning + inputs: "*" + size: [2, 4, 24, 48, 72, 168] + args: + security_constrained: true + capacity_expansion: false + solver_args: + Method: 2 + Crossover: false + +solvers: + - gurobi + # - highs + # - mosek + +libraries: + - pyoframe + - pyomo + - cvxpy + - gurobipy + - jump + - linopy + - pyoptinterface + - pulp + - ampl diff --git a/benchmarks/maintainability/.gitignore b/benchmarks/maintainability/.gitignore new file mode 100644 index 00000000..ea1deac0 --- /dev/null +++ b/benchmarks/maintainability/.gitignore @@ -0,0 +1 @@ +downloads/** \ No newline at end of file diff --git a/benchmarks/maintainability/README.md b/benchmarks/maintainability/README.md new file mode 100644 index 00000000..797d0ff0 --- /dev/null +++ b/benchmarks/maintainability/README.md @@ -0,0 +1,3 @@ +# What's in here? + +Code and tools related to measuring the lines of source code of modeling interfaces. \ No newline at end of file diff --git a/benchmarks/maintainability/config.yaml b/benchmarks/maintainability/config.yaml new file mode 100644 index 00000000..4f480701 --- /dev/null +++ b/benchmarks/maintainability/config.yaml @@ -0,0 +1,41 @@ +valid_extensions: ["py", "jl", "cpp", "hpp", "c", "h", "c", "i"] +always_exclude_dirs: ["tests", "test"] +modeling_interfaces: + pyoframe: + github: Bravos-Power/pyoframe + include_paths: ["src"] + linopy: + github: PyPSA/linopy + include_paths: ["linopy"] + pyomo_no_contrib: + github: Pyomo/pyomo + include_paths: ["pyomo"] + exclude_paths: ["pyomo/contrib"] + pyomo: + github: Pyomo/pyomo + include_paths: ["pyomo"] + cvxpy: + github: cvxpy/cvxpy + include_paths: + - "cvxpy" + exclude_paths: + - "cvxpy/cvxcore/include/Eigen" + - "cvxpy/cvxcore/python" # mostly auto-generated or third-party code + jump: + github: jump-dev/JuMP.jl + include_paths: ["src"] + pyoptinterface: + github: metab0t/PyOptInterface + include_paths: + - "include" + - "lib" + - "src" + exclude_paths: + - "third_party" + pulp: + github: coin-or/pulp + include_paths: ["pulp"] + pyoframe_benchmarks: + github: Bravos-Power/pyoframe + branch: ms/benchmarks-2 + include_paths: ["benchmarks"] \ No newline at end of file diff --git a/benchmarks/maintainability/results.csv b/benchmarks/maintainability/results.csv new file mode 100644 index 00000000..3383c0c9 --- /dev/null +++ b/benchmarks/maintainability/results.csv @@ -0,0 +1,9 @@ +modeling_interface,Python,Julia,C/C++,total,factor +pyoframe,2614,,0,2614,1.0 +pulp,7793,,0,7793,3.0 +linopy,8095,,0,8095,3.1 +jump,,12258,0,12258,4.7 +pyoptinterface,5338,,17981,23319,8.9 +cvxpy,27456,,1300,28756,11.0 +pyomo_no_contrib,82344,,0,82344,31.5 +pyomo,151503,,6925,158428,60.6 diff --git a/benchmarks/maintainability/run.py b/benchmarks/maintainability/run.py new file mode 100644 index 00000000..5e2be577 --- /dev/null +++ b/benchmarks/maintainability/run.py @@ -0,0 +1,130 @@ +"""Script to download Github repos and measure their lines of code using cloc.""" + +import json +import subprocess +from pathlib import Path + +import polars as pl +import yaml + + +def main(): + cwd = Path(__file__).parent + + with open(cwd / "config.yaml") as file: + config = yaml.safe_load(file) + + results = pl.DataFrame() + + for modeling_interface, mi_config in config["modeling_interfaces"].items(): + github = mi_config["github"].lower() + results = pl.concat( + [ + results, + measure_lines_of_code( + github, + cwd / "downloads" / github.partition("/")[2], + include_paths=mi_config["include_paths"], + exclude_paths=mi_config.get("exclude_paths", []), + include_exts=config["valid_extensions"], + exclude_dirs=config["always_exclude_dirs"], + branch=mi_config.get("branch", None), + ).with_columns(modeling_interface=pl.lit(modeling_interface)), + ], + how="diagonal", + ) + + results = results.with_columns( + ( + pl.col("C++").fill_null(0) + + pl.col("C/C++ Header").fill_null(0) + + pl.col("C").fill_null(0) + ).alias("C/C++") + ).drop("C/C++ Header", "C", "C++") + results = results.select( + "modeling_interface", + *[col for col in results.columns if col not in ("modeling_interface", "SUM")], + total="SUM", + ).sort("total") + results = results.with_columns( + factor=( + pl.col("total") + / results.filter(modeling_interface="pyoframe")["total"].item() + ).round(1) + ) + + results = results.sort("factor") + + print(results) + + results.write_csv(cwd / "results.csv") + + +def measure_lines_of_code( + github: str, + downloads_dir: Path, + *, + include_paths: list[str], + exclude_paths: list[str], + exclude_dirs: list[str], + include_exts: list[str], + branch: str | None = None, +) -> pl.DataFrame: + if not downloads_dir.exists() or not any(downloads_dir.iterdir()): + url = "https://github.com/" + github + subprocess.run(["git", "clone", url, downloads_dir], check=True) + + if branch is not None: + subprocess.run(["git", "checkout", branch], check=True, cwd=downloads_dir) + else: + # Checkout the default branch (e.g., main or master) + default_branch = ( + subprocess.check_output( + ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"], + cwd=downloads_dir, + text=True, + ) + .strip() + .replace("origin/", "") + ) + subprocess.run( + ["git", "switch", default_branch], + check=True, + cwd=downloads_dir, + ) + + cmd = ["cloc"] + cmd += include_paths + if exclude_paths: + cmd += ["--fullpath", "--not-match-d=(" + "|".join(exclude_paths) + ")"] + + cmd += [ + "--include-ext=" + ",".join(include_exts), + "--exclude-dir=" + ",".join(exclude_dirs), + "--json", + ] + print(f"{github}: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True, cwd=downloads_dir) + + if result.returncode != 0: + raise RuntimeError( + f"cloc failed with return code {result.returncode}: {result.stderr}" + ) + + try: + cloc_output = json.loads(result.stdout) + except json.JSONDecodeError as e: + raise RuntimeError( + f"Failed to parse cloc output as JSON: {result.stdout}" + ) from e + + df = pl.DataFrame(cloc_output) + df = df.select( + pl.col(c).struct.field("code").alias(c) for c in df.columns if c != "header" + ) + + return df + + +if __name__ == "__main__": + main() diff --git a/benchmarks/plot.py b/benchmarks/plot.py new file mode 100644 index 00000000..4e8c02e8 --- /dev/null +++ b/benchmarks/plot.py @@ -0,0 +1,247 @@ +"""Produces plots from benchmark results.""" + +from pathlib import Path + +import altair as alt +import polars as pl +import yaml + + +def get_latest_data(base_path: Path): + df = pl.read_csv(base_path / "benchmark_results.csv") + df = df.filter(pl.col("error").is_null()) + if df.is_empty(): + return df + df = df.group_by(["problem", "library", "solver", "size"]).agg(pl.col("*").last()) + df = df.with_columns((pl.col("max_memory_uss_mb") / 1024).alias("memory_uss_GiB")) + df = df.with_columns( + construct_time_s=pl.col("total_time_s") - pl.col("solve_time_s") + ) + + # Add normalization column + join_cols = ["size", "solver", "problem"] + pyoframe_results = df.filter(library="pyoframe").drop("library") + assert pyoframe_results.height > 0, ( + f"Cannot normalize results: no pyoframe data found\n{df}" + ) + df = df.join( + pyoframe_results, on=join_cols, how="left", validate="m:1", suffix="_pyoframe" + ) + + df = df.with_columns( + (pl.col("construct_time_s") / pl.col("construct_time_s_pyoframe")).alias( + "construct_time_normalized" + ), + (pl.col("memory_uss_GiB") / pl.col("memory_uss_GiB_pyoframe")).alias( + "memory_uss_normalized" + ), + ) + + return df + + +def plot_combined(results: pl.DataFrame, output, config): + panels = [[], []] + results = results.sort("problem") + for (problem,), problem_df in results.group_by("problem", maintain_order=True): + if problem not in config["problems"]: + continue + problem_label = config["problems"][problem].get("label", problem) + chart = alt.Chart(problem_df).encode( + color=alt.condition( + alt.datum.library == "pyoframe", + alt.value("black"), + alt.Color("library", legend=None), + ) + ) + + lib_names = ["construct_time_s_pyoframe", "memory_uss_GiB_pyoframe"] + ys = ["construct_time_normalized", "memory_uss_normalized"] + titles = ["Time to construct", "Peak memory usage"] + units = ["sec", "GiB"] + max_ys = [10, 5] + for label, y, title, unit, panel_col, max_y in zip( + lib_names, ys, titles, units, panels, max_ys + ): + max_data = problem_df.select(y).max().item() + y_scale = "linear" + min_y = 0 + if max_data is not None and max_data > max_y: + max_y = max_data * 1.1 + y_scale = "log" + min_y = 0.5 + tick_values = [10**i for i in range(1, 9)] + lines = ( + chart.mark_line(point=True, clip=True) + .encode( + alt.X("num_variables") + .scale(type="log") + .title("Number of variables") + .axis(grid=False, format="~s", values=tick_values), + alt.Y(y) + .axis(labelExpr="datum.value + 'x'", grid=True) + .title(problem_label) + .scale(type=y_scale, domain=[min_y, max_y]), + ) + .properties(title=title) + ) + lib_names = chart.encode( + alt.X("max(num_variables)"), + alt.Y(y, aggregate=alt.ArgmaxDef(argmax=label)), + text="library", + ).mark_text(align="left", dx=4, fontSize=12) + + pyoframe_data = problem_df.filter(library="pyoframe") + pyoframe_data = pyoframe_data.with_columns( + pl.col(label).round_sig_figs(1).map_elements(lambda v: f"{v:g} {unit}") + ) + pf_labels = ( + alt.Chart(pyoframe_data) + .encode(alt.X("num_variables"), alt.Y(y), alt.Text(label)) + .mark_text(align="center", dy=-10, fontSize=12) + ) + + pf_labels_background = pf_labels.mark_text( + align="center", + stroke="white", + strokeWidth=5, + strokeJoin="round", + strokeOpacity=0.6, + dy=-10, + fontSize=12, + ) + facet = lines + pf_labels_background + pf_labels + lib_names + + panel_col.append(facet) + + columns = [alt.vconcat(*panel_col) for panel_col in panels] + + y_labels = [ + alt.Chart(pl.DataFrame({"y": [0.5]})) + .mark_text( + text=title, + angle=270, + align="center", + baseline="middle", + fontSize=16, + dx=10, + ) + .encode(y=alt.Y("y", axis=None).scale(domain=[0, 1])) + .properties(width=20) + for title in [ + "Time relative to Pyoframe", + "Memory relative to Pyoframe", + ] + ] + + plot = y_labels[0] | columns[0] | y_labels[1] | columns[1] + + plot.configure_view(stroke=None).save(output) + + +def plot_memory_usage_over_time(base_path: Path, config): + for problem in config["problems"]: + problem_mem_log_dir = base_path / problem / "mem_log" + if not problem_mem_log_dir.exists(): + continue + + all_data = [] + + for file in problem_mem_log_dir.glob("*.parquet"): + file_terms = file.stem.split("_") + day, time, library, solver, size = ( + file_terms[0], + file_terms[1], + file_terms[2], + file_terms[3], + file_terms[4], + ) + df = pl.read_parquet(file) + df = df.with_columns( + timestamp=pl.lit(f"{day} {time}"), + size=pl.lit(int(size)), + library=pl.lit(library), + solver=pl.lit(solver), + ) + all_data.append(df) + all_data_df = pl.concat(all_data) + most_recent = all_data_df.group_by(["library", "solver", "size"]).agg( + pl.col("timestamp").max() + ) + only_most_recent = all_data_df.join( + most_recent, + on=["library", "solver", "size", "timestamp"], + how="inner", + ) + + plt = None + only_most_recent = only_most_recent.sort("size") + + only_most_recent = only_most_recent.with_columns( + pl.col("uss_MiB", "vms_MiB", "rss_MiB") / 1024 + ) + + for (size, solver), group in only_most_recent.group_by( + ["size", "solver"], maintain_order=True + ): + if group.height == 0: + continue + + if group["process_name"].n_unique() > 1: + group = group.group_by("time_s", "library").agg( + pl.col("uss_MiB", "rss_MiB", "vms_MiB", "num_threads").sum(), + pl.col("marker").first(), + ) + + panel = ( + alt.Chart(group) + .mark_line(strokeWidth=1) + .encode( + x=alt.X("time_s", title="Elapsed time (s)"), + y=alt.Y("uss_MiB", title="Memory usage (GiB, USS)"), + color="library:N", + ) + .properties(title=f"Memory usage over time (N={size}, {solver})") + ) + # add rss as dashed lines + # panel += ( + # alt.Chart(group) + # .mark_line(strokeWidth=1, strokeDash=[2, 2]) + # .encode(x=alt.X("time_s"), y=alt.Y("rss_MiB"), color="library:N") + # ) + keypoints = group.filter(pl.col("marker").is_not_null()) + if keypoints.height > 0: + panel += keypoints.plot.scatter( + x="time_s", + y="uss_MiB", + color="library:N", + shape="marker:N", + ) + + if plt is None: + plt = panel + else: + plt |= panel + if plt is not None: + plt.save(base_path / problem / "memory_usage_over_time.svg") + + +def plot_all(config_name="config.yaml"): + cwd = Path(__file__).parent + with open(cwd / config_name) as f: + config = yaml.safe_load(f) + base_path = cwd / "results" / config["name"] + + plot_memory_usage_over_time(base_path, config) + + df = get_latest_data(base_path) + + if df.is_empty(): + return + + for (solver,), solver_df in df.group_by("solver"): + plot_combined(solver_df, base_path / f"results_{solver}.svg", config) + + +if __name__ == "__main__": + plot_all() diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml new file mode 100644 index 00000000..b2466404 --- /dev/null +++ b/benchmarks/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyoframe-benchmarks" +version = "0.0.0" +dependencies = [ + # solvers + "pyomo==6.9.2", + "linopy==0.6.5", + "pyoptinterface==0.5.1", + "cvxpy[GUROBI,MOSEK]==1.7.1", + "amplpy==0.16.0", + "pulp[gurobi,highs,copt]==3.2.1", + # tools + "snakemake==9.6.0", + "gdown==5.2.0", # To download data from Google Drive + "papermill==2.6.0", # To run notebooks + "altair==5.5.0", # For plotting in notebooks + "vegafusion==2.0.3", # For altair with large data + "xarray==2025.7.1", + "numpy==2.3.1", + "vl-convert-python==1.8.0", # For saving plots in altair + "snakeviz==2.2.2", # For viewing profiling results + "memray==1.17.2", # For memory profiling + "pytest==8.4.1", +] + +[project.optional-dependencies] +energy-planning = [ + "scikit-sparse==0.4.16", + "scipy==1.16.0", + "nbformat==5.10.4", + "nbconvert==7.16.6", + "marimo==0.20.2" # For the energy planning notebooks +] diff --git a/benchmarks/run.py b/benchmarks/run.py new file mode 100644 index 00000000..9cc8b686 --- /dev/null +++ b/benchmarks/run.py @@ -0,0 +1,813 @@ +"""Script to run the benchmarks specified in config.yaml. + +Saves results to results/benchmark_results.csv. +""" + +import argparse +import itertools +import math +import os +import queue +import re +import signal +import subprocess +import threading +import time +from contextlib import nullcontext +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +import psutil +import tomllib +import yaml + +POLL_MIN_S, POLL_MAX_S, POLL_TRANSITION_S = 0.01, 1, 30 +LOG_AFTER_S = 30 + +TIMESTAMP = time.strftime("%Y%m%d_%H%M%S") + +CWD = Path(__file__).parent + + +@dataclass +class Benchmark: + name: str + code_dir: str + solver: str + library: str + size: int | None + construct_only: bool + args: dict[str, str] + julia_trace_compile: bool + solver_args: dict | None + + +def run_all_benchmarks(config, ignore_past_results=False): + base_dir = CWD / "results" / config["name"] + base_dir.mkdir(parents=True, exist_ok=True) + + past_results = PastResults(base_dir, ignore_past_results=ignore_past_results) + + timeout = config.get("timeout", None) + + for name, problem_config in config["problems"].items(): + code_dir = problem_config.get("code_dir", name) + prepare_benchmark_problem(name, code_dir, problem_config) + + num_repeats = problem_config.get("repeat", config.get("repeat", 1)) + for size in sorted(problem_config.get("size", [None])): + with get_base_results_dir(config, base_dir, name, size) as base_results_dir: + for solver, library in itertools.product( + config["solvers"], config["libraries"] + ): + benchmark = Benchmark( + name=name, + code_dir=code_dir, + solver=solver, + library=library, + size=size, + construct_only=problem_config.get("construct_only", False), + julia_trace_compile=config.get("julia_trace_compile", False), + args=problem_config.get("args", {}), + solver_args=problem_config.get("solver_args", None), + ) + if not get_benchmark_code(benchmark).exists(): + print(f"{name}: Skipping {library} as no benchmark found.") + continue + + if not should_run_benchmark( + benchmark, past_results, timeout, num_repeats + ): + print( + f"{name} (n={size}): Skipping {library}, already benchmarked or timed out." + ) + continue + + input_dir = ( + CWD / "src" / code_dir / "model_data" + if "inputs" in problem_config + else None + ) + try: + for i in range(num_repeats): + print( + f"{name} (n={size}): Running with {library} and {solver} ({i + 1}/{num_repeats})..." + ) + + run_benchmark( + benchmark, + past_results, + timeout=timeout, + input_dir=input_dir, + results_dir=get_results_dir( + base_results_dir, library, solver + ), + ) + except BenchmarkError as e: + print(f"{name}: {e}") + + past_results_df = past_results.read( + problem=name, size=size, ignore_past_results=False + ) + past_results_df = past_results_df.filter(pl.col("error").is_null()) + past_results_df = past_results_df.sort("date") + past_results_df = past_results_df.group_by("library", "solver").last() + + check_results_csv_aligns(past_results_df, problem=name, size=size) + check_results_output_match(name, base_results_dir, past_results_df) + + +def check_results_csv_aligns(df, problem, size): + if df.height <= 1: + print(f"{problem} (n={size}): Not enough successful runs to compare results.") + return + + # Check objective values + df = df.with_columns(pl.col("objective_value").round_sig_figs(3)) + + num_objectives = df.group_by("objective_value").agg( + pl.col("solver", "library").first() + ) + if num_objectives.height <= 1: + print( + f"{problem}: Objective values match across all runs ({df['library'].unique().to_list()})." + ) + else: + raise ValueError( + f"{problem}: Objective values do not match for size {size}, see .csv results:\n{num_objectives}" + ) + + # Check number of variables + df = df.with_columns( + pl.when(library="pyoframe") + .then(pl.col("num_variables") - 1) + .otherwise("num_variables") + ) + + # ampl has a presolve so number of variables should not be compared (instead objective value is sufficient) + df = df.filter(pl.col("library") != "ampl") + + num_variables = df.group_by("num_variables").agg( + pl.col("solver", "library").unique() + ) + if num_variables.height <= 1: + print(f"{problem}: Number of variables match across all runs.") + else: + raise ValueError( + f"{problem}: Number of variables do not match for size {size}, see .csv results:\n{num_variables}" + ) + + +def should_run_benchmark(benchmark: Benchmark, past_results, timeout, num_repeats): + past_results_df = past_results.read( + problem=benchmark.name, library=benchmark.library, solver=benchmark.solver + ) + + # Previously errored on this run, don't try again. + if past_results_df.filter(pl.col("error").is_not_null(), date=TIMESTAMP).height > 0: + return False + + if benchmark.size is not None: + past_results_df = past_results_df.filter(size=benchmark.size) + + # Check if already completed + if past_results_df.filter(pl.col("error").is_null()).height >= num_repeats: + return False + + # Previously timed out at this size, don't try again. + prior_timeouts = past_results_df.filter(error="TIMEOUT") + if ( + timeout is not None + and prior_timeouts.height > 0 + and prior_timeouts["total_time_s"].max() >= timeout + ): + return False + + return True + + +def prepare_benchmark_problem(problem: str, code_dir: str, problem_config): + if "inputs" not in problem_config: + return + + print(f"{problem}: Generating required input files...") + + cmd = ["snakemake", "--cores", "all"] + if problem_config["inputs"] != "*": + input_files = [ + f"./model_data/{input_file}" for input_file in problem_config["inputs"] + ] + cmd.extend(input_files) + + subprocess.run( + cmd, stdout=subprocess.DEVNULL, cwd=CWD / "src" / code_dir, check=True + ) + + +def precompile_julia_benchmarks( + problem: str, *, image_path: Path, trace_compile_path: Path +): + if image_path.exists(): + return + + print(f"{problem}: Creating system image for Julia benchmarks...") + + if not trace_compile_path.exists(): + raise FileNotFoundError( + f"Precompile statements file not found at {trace_compile_path}. Try running python test.py to generate it." + ) + + with open(CWD / "Project.toml", "rb") as f: + project_toml = tomllib.load(f) + dependencies = list(project_toml.get("deps", {}).keys()) + dependencies_str = ", ".join(f'"{dep}"' for dep in dependencies) + + cmd = [ + "julia", + f"--project={CWD}", + "-e", + f'''using PackageCompiler; create_sysimage( + [{dependencies_str}], + sysimage_path="{image_path}", + precompile_statements_file="{trace_compile_path}", + )''', + ] + + subprocess.run(cmd, check=True) + + +def run_benchmark( + benchmark: Benchmark, + past_results: "PastResults", + timeout: int | None = None, + input_dir=None, + results_dir: Path | None = None, +): + def save_result( + total_time: float | None = None, + monitor_result: MonitorResult | None = None, + error=None, + ): + if monitor_result is None: + monitor_result = MonitorResult() + + past_results.append( + { + "date": TIMESTAMP, + "solver": benchmark.solver, + "problem": benchmark.name, + "size": benchmark.size, + "library": benchmark.library, + "num_variables": monitor_result.num_variables, + "num_constraints": monitor_result.num_constraints, + "num_nonzeros": monitor_result.num_nonzeros, + "total_time_s": safe_round(total_time, 3), + "solve_time_s": safe_round(monitor_result.solve_time, 3), + "max_memory_uss_mb": safe_round(monitor_result.max_memory_uss_mb, 3), + "objective_value": monitor_result.objective_value, + "error": error, + } + ) + + using_julia = benchmark.library == "jump" + + if not using_julia: + args = dict(solver=f"'{benchmark.solver}'", emit_benchmarking_logs="True") + if benchmark.size is not None: + args["size"] = str(benchmark.size) + if benchmark.construct_only: + args["block_solver"] = "True" + if benchmark.solver_args is not None: + args["solver_args"] = repr(benchmark.solver_args) + if input_dir is not None: + args["input_dir"] = f"'{input_dir}'" + args["results_dir"] = f"'{results_dir}'" + for arg_name, arg_value in benchmark.args.items(): + if isinstance(arg_value, str): + args[arg_name] = f"'{arg_value}'" + elif isinstance(arg_value, bool): + args[arg_name] = "True" if arg_value else "False" + else: + raise NotImplementedError( + f"Unsupported argument type for {arg_name}: {type(arg_value)}" + ) + + args = ", ".join(f"{k}={v}" for k, v in args.items()) + + cmd = [ + "python", + "-c", + f"from {benchmark.code_dir}.bm_{benchmark.library} import Bench; Bench({args}).run()", + ] + else: + problem_dir = CWD / "src" / benchmark.code_dir + image_path = problem_dir / "julia_sysimage.so" + benchmark_path = problem_dir / "bm_jump.jl" + trace_compile_path = problem_dir / "julia_precompile_statements.jl" + + cmd = ["julia", f"--project={CWD}"] + + if benchmark.julia_trace_compile: + cmd += ["--trace-compile", str(trace_compile_path)] + else: + precompile_julia_benchmarks( + benchmark.name, + image_path=image_path, + trace_compile_path=trace_compile_path, + ) + cmd += ["--sysimage", str(image_path)] + + cmd += [ + str(benchmark_path), + f"solver={benchmark.solver}", + f"problem_size={benchmark.size}", + f"results_dir={results_dir}" if results_dir is not None else "", + ] + + if benchmark.construct_only: + cmd.append("block_solver=true") + + for k, v in benchmark.args.items(): + if isinstance(v, bool): + v = "true" if v else "false" + cmd.append(f"{k}={v}") + if benchmark.solver_args is not None: + solver_args = [] + for k, v in benchmark.solver_args.items(): + if isinstance(v, bool): + v = 1 if v else 0 + solver_args.append(f"{k}={v}") + solver_args = ",".join(solver_args) + cmd.append(f"solver_args={solver_args}") + + max_memory_queue = queue.Queue() + + mem_log_dir = past_results.base_dir / benchmark.name / "mem_log" + mem_log_dir.mkdir(parents=True, exist_ok=True) + + # See paper for explanation + env = os.environ.copy() + env["_RJEM_MALLOC_CONF"] = "muzzy_decay_ms:1000" + + start_time = time.time() + + with subprocess.Popen( + cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, text=True, bufsize=1, env=env + ) as benchmark_proc: + memory_thread = threading.Thread( + target=monitor_benchmark, + args=( + start_time, + benchmark_proc, + max_memory_queue, + mem_log_dir + / f"{TIMESTAMP}_{benchmark.library}_{benchmark.solver}_{benchmark.size}.parquet", + ), + ) + memory_thread.start() + + try: + return_code = benchmark_proc.wait(timeout=timeout) + total_time = time.time() - start_time + except subprocess.TimeoutExpired: + kill_process(benchmark_proc, using_julia) + save_result(total_time=timeout, error="TIMEOUT") + raise BenchmarkError("Benchmark timed out") + except KeyboardInterrupt as e: + kill_process(benchmark_proc, using_julia) + raise e + + if return_code != 0: + save_result(total_time=total_time, error="ERROR") + raise BenchmarkError("Benchmark failed") + + result = max_memory_queue.get(timeout=10) + memory_thread.join(timeout=10) + + if benchmark.construct_only: + result.solve_time = 0 + else: + if result.objective_value is None: + save_result(total_time=total_time, error="ERROR") + raise BenchmarkError("No objective value found in benchmark output") + + save_result( + total_time=total_time, + monitor_result=result, + ) + + +@dataclass +class MonitorResult: + num_variables: int | None = None + num_constraints: int | None = None + num_nonzeros: int | None = None + solve_time: float | None = None + barrier_solve_time: float | None = None + max_memory_uss_mb: float | None = None + objective_value: float | None = None + + +def monitor_benchmark(start_time, proc, result_queue, output_file): + pid = proc.pid + ps_proc = psutil.Process(pid) + + memory_data = [] + process_names = {pid: "main"} + stdout = proc.stdout + + result = MonitorResult() + + keep_checking = True + + os.set_blocking(stdout.fileno(), False) # Requires Python 3.12 for windows + + while keep_checking: + elapsed_time = time.time() - start_time + + marker = None + + # This setup allows us to get memory one last time after process ends + if not ps_proc.is_running(): + keep_checking = False + + try: + for line in iter(stdout.readline, ""): + if elapsed_time > LOG_AFTER_S: + print("\t" + line.strip()) + if line.startswith("PF_BENCHMARK:"): + marker = line.removeprefix("PF_BENCHMARK:").strip() + elif line.startswith("Optimize a model with "): + marker = "3_GUROBI_START" + result.num_variables = int( + re.search(r"(\d+) columns", line).group(1) + ) + result.num_constraints = int( + re.search(r"(\d+) rows", line).group(1) + ) + result.num_nonzeros = int( + re.search(r"(\d+) nonzeros", line).group(1) + ) + elif line.startswith("Presolved: "): + marker = "3b_GUROBI_PRESOLVED" + elif line.startswith("Solved in "): + assert result.solve_time is None, "Multiple solve times found" + result.solve_time = float( + re.search(r"([\d.]+) seconds", line).group(1) + ) + marker = "4_GUROBI_END" + elif line.startswith("Barrier solved model in "): + assert result.barrier_solve_time is None, ( + "Multiple barrier solve times found" + ) + result.barrier_solve_time = float( + re.search(r"([\d.]+) seconds", line).group(1) + ) + marker = "4_GUROBI_END" + elif line.startswith("Optimal objective "): + # Allow multiple, always take last + result.objective_value = float(line.strip().rpartition(" ")[2]) + elif line.startswith("Best objective "): + assert result.objective_value is None, ( + "Multiple objective values found" + ) + result.objective_value = float(line.split(" ")[2].rstrip(",")) + except ValueError: + pass + + # We use USS (Unique Set Size) to measure memory usage because it works + # across OSes and represents the memory freed if the process were to end + # in my opinion is a good metric. + # https://psutil.readthedocs.io/en/latest/index.html#psutil.Process.memory_full_info + try: + memory_info = ps_proc.memory_full_info() + num_threads = len(ps_proc.threads()) + except psutil.NoSuchProcess: + assert ps_proc.is_running() is not None, "Process disappeared unexpectedly" + break + + memory_data.append( + ( + elapsed_time, + pid, + memory_info.uss, + memory_info.rss, + memory_info.vms, + num_threads, + marker, + ) + ) + + try: + children = ps_proc.children(recursive=True) + except psutil.NoSuchProcess: + children = [] + + for child in children: + try: + if child.pid not in process_names: + process_names[child.pid] = child.name() + memory_info = child.memory_full_info() + num_threads = len(child.threads()) + except psutil.NoSuchProcess: + continue + memory_data.append( + ( + elapsed_time, + child.pid, + memory_info.uss, + memory_info.rss, + memory_info.vms, + num_threads, + None, + ) + ) + + delay = POLL_MAX_S - (POLL_MAX_S - POLL_MIN_S) * math.exp( + -elapsed_time / POLL_TRANSITION_S + ) + time.sleep(delay) + + if result.solve_time is None and result.barrier_solve_time is not None: + result.solve_time = result.barrier_solve_time + + df = pl.DataFrame( + memory_data, + schema={ + "time_s": pl.Float64, + "pid": pl.Int32, + "uss_MiB": pl.Float64, + "rss_MiB": pl.Float64, + "vms_MiB": pl.Float64, + "num_threads": pl.UInt16, + "marker": pl.Utf8, + }, + orient="row", + ) + + if df.height != 0: + df = df.with_columns(pl.col("uss_MiB", "rss_MiB", "vms_MiB") / (1024 * 1024)) + + result.max_memory_uss_mb = ( + df.group_by("time_s") + .agg(pl.col("uss_MiB").sum()) + .get_column("uss_MiB") + .max() + ) # type: ignore + + df = df.with_columns( + pl.col("pid").replace_strict(process_names, return_dtype=pl.Utf8) + ).rename({"pid": "process_name"}) + + df.write_parquet(output_file) + + result_queue.put(result) + + +def check_results_output_match( + problem: str, base_results_dir: Path | str, results: pl.DataFrame +): + if not config.get("save_outputs", False): + results = results.filter(date=TIMESTAMP) + + reference = None + + libs_compared = set() + + for (library, solver), group in results.group_by(["library", "solver"]): + results_dir = get_results_dir(base_results_dir, library, solver) + files = list(f.name for f in results_dir.glob("*")) + + if reference is None: + reference = (library, solver, results_dir, files) + continue + + ref_lib, ref_solver, ref_dir, files_in_ref = reference + + if set(files) != set(files_in_ref): + missing_in_ref = set(files) - set(files_in_ref) + missing_in_dir = set(files_in_ref) - set(files) + assert len(missing_in_ref) > 0 or len(missing_in_dir) > 0 + if len(missing_in_dir) > 0: + raise BenchmarkError( + f"{problem}: Benchmark ({library}, {solver}) is missing files: {', '.join(missing_in_dir)} compared to {(ref_lib, ref_solver)}." + ) + if len(missing_in_ref) > 0: + raise BenchmarkError( + f"{problem}: Benchmark ({ref_lib}, {ref_solver}) is missing files: {', '.join(missing_in_ref)} compared to {(library, solver)}." + ) + + if len(files_in_ref) == 0: + continue + + for filename in files_in_ref: + ref = read_dataframe(ref_dir / filename) + diff = read_dataframe(results_dir / filename) + if ref.shape != diff.shape: + raise BenchmarkError( + f"{problem}: Benchmark ({ref_lib}, {ref_solver}) and ({library}, {solver}) have different shapes for file {filename}.\n{ref_lib} shape: {ref.shape}, {library} shape: {diff.shape}" + ) + if set(ref.columns) != set(diff.columns): + raise BenchmarkError( + f"{problem}: Benchmark ({ref_lib}, {ref_solver}) and ({library}, {solver}) have different columns for file {filename}.\n{ref_lib} columns: {ref.columns}, {library} columns: {diff.columns}" + ) + + move_to_back = ["solution", "dual", "ub_dual", "lb_dual"] + col_order = [c for c in ref.columns if c not in move_to_back] + [ + c for c in move_to_back if c in ref.columns + ] + + diff = diff.select(col_order) + ref = ref.select(col_order) + ref = ref.sort(*ref.columns) + diff = diff.sort(*diff.columns) + + # We need to round both absolutely and relatively (round and round_sig_figs) + ref = ref.with_columns( + pl.col(c).round_sig_figs(5).round(6) + for c in ref.columns + if ref[c].dtype.is_float() + ) + diff = diff.with_columns( + pl.col(c).round_sig_figs(5).round(6) + for c in diff.columns + if diff[c].dtype.is_float() + ) + + for c in ref.columns: + ref_col, diff_col = ref[c], diff[c] + if "dual" in c: + if (ref_col == diff_col).sum() < (ref_col == -diff_col).sum(): + diff_col = -diff_col + if not (ref_col == diff_col).all(): + num_conflicts = (ref_col != diff_col).sum() + msg = f"{problem}: {ref_lib} vs {library}: {filename}[{c}]: {num_conflicts} in {ref.height} rows differ" + if ( + num_conflicts / ref.height > 0.05 + ): # if more than 1% of rows differ, raise an error + ref_conflict = ref.filter(ref_col != diff_col) + diff_conflict = diff.filter(ref_col != diff_col) + raise BenchmarkError( + msg + + f"\nReference:\n{ref_conflict}\nDiff:\n{diff_conflict}" + ) + else: + print(f"WARNING: {msg}, maybe multiple solutions exist?") + + libs_compared.add(ref_lib) + libs_compared.add(library) + + if len(libs_compared) > 1: + print(f"{problem}: Outputs match across {', '.join(libs_compared)}") + + +def read_dataframe(path: Path) -> pl.DataFrame: + if path.suffix == ".csv": + return pl.read_csv(path) + elif path.suffix == ".parquet": + return pl.read_parquet(path) + else: + raise ValueError(f"Unsupported file type: {path.suffix}") + + +def kill_process(proc, using_julia, timeout=2): + if using_julia: + proc.kill() + else: + os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + try: + proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + proc.kill() + + +def read_config(name="config.yaml") -> dict: + with open(CWD / name) as f: + return yaml.safe_load(f) + + +def get_benchmark_code(benchmark: Benchmark) -> Path: + ext = "jl" if benchmark.library == "jump" else "py" + return CWD / "src" / benchmark.code_dir / f"bm_{benchmark.library}.{ext}" + + +def get_base_results_dir(config, base_dir: Path, problem: str, size: int | None): + if config.get("save_outputs", False): + p: Path = ( + base_dir + / problem + / "outputs" + / (str(size) if size is not None else "default") + ) + p.mkdir(parents=True, exist_ok=True) + return nullcontext(p) + else: + return TemporaryDirectory() + + +def get_results_dir(base_results_dir: Path | str, library: str, solver: str): + results_dir = Path(base_results_dir) / solver / library + results_dir.mkdir(parents=True, exist_ok=True) + return results_dir + + +class PastResults: + BENCHMARK_RESULTS_SCHEMA: dict = { + "date": pl.Utf8, + "problem": pl.Utf8, + "library": pl.Utf8, + "solver": pl.Utf8, + "size": pl.Int64, + "num_variables": pl.Int64, + "num_constraints": pl.Int64, + "num_nonzeros": pl.Int64, + "total_time_s": pl.Float64, + "solve_time_s": pl.Float64, + "max_memory_uss_mb": pl.Float64, + "objective_value": pl.Float64, + "error": pl.Utf8, + } + + FILE_NAME = "benchmark_results.csv" + + def __init__(self, base_dir: Path, ignore_past_results: bool = False) -> None: + self.base_dir = base_dir + self._path = base_dir / self.FILE_NAME + self._ignore_past_results = ignore_past_results + + if self._path.exists(): + self._data = pl.read_csv(self._path).cast(self.BENCHMARK_RESULTS_SCHEMA) + else: + self._path.parent.mkdir(exist_ok=True) + self._data = pl.DataFrame(schema=self.BENCHMARK_RESULTS_SCHEMA) + self._data.write_csv(self._path) + + def read( + self, + *, + size=None, + date=None, + library=None, + solver=None, + problem=None, + ignore_past_results=None, + ) -> pl.DataFrame: + df = self._data + if size is not None: + df = df.filter(size=size) + if date is not None: + df = df.filter(date=date) + if library is not None: + df = df.filter(library=library) + if solver is not None: + df = df.filter(solver=solver) + if problem is not None: + df = df.filter(problem=problem) + if ignore_past_results or self._ignore_past_results: + df = df.filter(date=TIMESTAMP) + + return df + + def append(self, row: dict): + self._data = self._data.select(self.BENCHMARK_RESULTS_SCHEMA.keys()).vstack( + pl.DataFrame(row, schema=self.BENCHMARK_RESULTS_SCHEMA) + ) + + self._data.write_csv(self._path) + + +def safe_round(value, ndigits): + if value is None: + return None + return round(value, ndigits) + + +class BenchmarkError(Exception): + pass + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--ignore-cache", action="store_true", help="Reset benchmark results file." + ) + argparser.add_argument( + "--config", type=str, default="config.yaml", help="Path to config YAML file." + ) + argparser.add_argument( + "-l", + "--library", + type=str, + help="Only run benchmarks for this library.", + default=None, + ) + args = argparser.parse_args() + config = read_config(name=args.config) + if args.library is not None: + assert args.library in config["libraries"], ( + f"Library {args.library} not found in config." + ) + config["libraries"] = [args.library] + run_all_benchmarks(config, ignore_past_results=args.ignore_cache) diff --git a/benchmarks/src/benchmark_utils/__init__.py b/benchmarks/src/benchmark_utils/__init__.py new file mode 100644 index 00000000..3b560fd2 --- /dev/null +++ b/benchmarks/src/benchmark_utils/__init__.py @@ -0,0 +1,83 @@ +"""Contains the base classes used for benchmarking. + +Note that this contributes to every benchmark so we try to keep the imports mostly clear. +""" + +from __future__ import annotations + +import contextlib +import shutil +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + + +class BaseBenchmark(ABC): + def __init__( + self, + solver="gurobi", + block_solver=False, + input_dir=None, + results_dir=None, + size=None, + emit_benchmarking_logs=False, + solver_args=None, + **kwargs, + ): + self.solver = solver + self.block_solver = block_solver + self.input_dir = Path(input_dir) if input_dir else None + self.results_dir = Path(results_dir) if results_dir else None + self.size = size + self.emit_benchmarking_logs = emit_benchmarking_logs + self.kwargs = kwargs + self.solver_args = solver_args if solver_args else {} + + @abstractmethod + def build(self, **kwargs) -> Any: ... + + @abstractmethod + def set_timeout_to_zero(self, model) -> None: ... + + @abstractmethod + def solve(self, model) -> None: ... + + def write_results(self, model, **kwargs) -> None: ... + + def get_objective(self) -> float: + assert not self.block_solver, ( + "Cannot get objective value when block_solver is True." + ) + return self._get_objective(self.model) + + @abstractmethod + def _get_objective(self, model) -> float: ... + + def run(self): + if self.emit_benchmarking_logs: + print("PF_BENCHMARK: 1_START", flush=True) + with ( + contextlib.chdir(self.input_dir) + if self.input_dir + else contextlib.nullcontext() + ): + self.model = self.build(**self.kwargs) + + if self.block_solver: + self.set_timeout_to_zero(self.model) + + if self.emit_benchmarking_logs: + print("PF_BENCHMARK: 2_SOLVE", flush=True) + self.solve(self.model) + if self.emit_benchmarking_logs: + print("PF_BENCHMARK: 5_SOLVE_RETURNED", flush=True) + + if self.results_dir is not None and not self.block_solver: + if self.results_dir.exists(): + shutil.rmtree(self.results_dir) + self.results_dir.mkdir(parents=True) + with contextlib.chdir(self.results_dir): + self.write_results(self.model, **self.kwargs) + if self.emit_benchmarking_logs: + print("PF_BENCHMARK: 6_DONE", flush=True) + return self.model diff --git a/benchmarks/src/benchmark_utils/ampl.py b/benchmarks/src/benchmark_utils/ampl.py new file mode 100644 index 00000000..a5bd1353 --- /dev/null +++ b/benchmarks/src/benchmark_utils/ampl.py @@ -0,0 +1,32 @@ +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._set_timeout_to_zero = False + + def set_timeout_to_zero(self, model) -> None: + self._set_timeout_to_zero = True + + def _get_objective(self, model) -> float: + return model.get_objective("obj").value() + + def solve(self, model): + model.option["solver"] = self.solver + default_options = "outlev=1 " + self._convert_solver_args_to_gurobi_options() + if self._set_timeout_to_zero: + model.setOption("gurobi_options", default_options + " timelimit=0") + else: + model.setOption("gurobi_options", default_options) + model.solve() + + def _convert_solver_args_to_gurobi_options(self): + if self.solver_args is None: + return "" + options = [] + for key, value in self.solver_args.items(): + if isinstance(value, bool): + value = int(value) + options.append(f"{key}={value}") + return " ".join(options) diff --git a/benchmarks/src/benchmark_utils/cvxpy.py b/benchmarks/src/benchmark_utils/cvxpy.py new file mode 100644 index 00000000..979ca09b --- /dev/null +++ b/benchmarks/src/benchmark_utils/cvxpy.py @@ -0,0 +1,17 @@ +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._solver_kwargs = {} + + def set_timeout_to_zero(self, model) -> None: + self._solver_kwargs["TimeLimit"] = 0.0 + + def solve(self, model): + self._solver_kwargs["OutputFlag"] = 1 + model.solve(self.solver, **self._solver_kwargs) + + def _get_objective(self, model) -> float: + return model.objective.value diff --git a/benchmarks/src/benchmark_utils/gurobipy.py b/benchmarks/src/benchmark_utils/gurobipy.py new file mode 100644 index 00000000..c16aac2f --- /dev/null +++ b/benchmarks/src/benchmark_utils/gurobipy.py @@ -0,0 +1,12 @@ +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def set_timeout_to_zero(self, model): + model.setParam("TimeLimit", 0.0) + + def solve(self, model): + model.optimize() + + def _get_objective(self, model) -> float: + return model.getObjective().getValue() diff --git a/benchmarks/src/benchmark_utils/jump.jl b/benchmarks/src/benchmark_utils/jump.jl new file mode 100644 index 00000000..649d4138 --- /dev/null +++ b/benchmarks/src/benchmark_utils/jump.jl @@ -0,0 +1,88 @@ +module Benchmark + +export run + +using JuMP + +# Read args from command line +args_dict = Dict() +for arg in ARGS + k, v = split(arg, "=", limit=2) + if v == "true" + v = true + elseif v == "false" + v = false + end + args_dict[Symbol(k)] = v +end +solver = pop!(args_dict, :solver, "gurobi") +solver_args = pop!(args_dict, :solver_args, nothing) +block_solver = pop!(args_dict, :block_solver, false) +output_dir = pop!(args_dict, :results_dir, nothing) +problem_size = parse(Int, pop!(args_dict, :problem_size)) +if output_dir !== nothing + output_dir = abspath(output_dir) +end + +# Create model +if solver == "gurobi" + import Gurobi + + # direct_model is faster and more memory efficient (according to tests on July 13, 2025). + # We use it to make a fair comparison with the other models. + base_model = direct_model(Gurobi.Optimizer()) +elseif solver == "highs" + import HiGHS + base_model = Model(HiGHS.Optimizer) +elseif solver == "ipopt" + import Ipopt + base_model = Model(Ipopt.Optimizer) +else + error("Unsupported solver: $(solver)") +end + + +function optimize!(model::JuMP.Model) + println("PF_BENCHMARK: 2_SOLVE") + flush(stdout) + + if block_solver + set_time_limit_sec(model, 0.0) + set_optimizer_attribute(model, "Presolve", 0) + end + + if solver_args !== nothing + for arg in split(solver_args, ",") + k, v = split(arg, "=", limit=2) + v = parse(Int, v) + set_optimizer_attribute(model, k, v) + end + end + + JuMP.optimize!(model) + + println("PF_BENCHMARK: 5_SOLVE_RETURNED") + flush(stdout) + + if output_dir !== nothing + cd(output_dir) + end +end + +function run(base_directory::String, main::Function) + println("PF_BENCHMARK: 1_START") + flush(stdout) + + model_data_dir = joinpath(base_directory, "model_data") + if isdir(model_data_dir) + cd(model_data_dir) + end + + main(base_model, problem_size; args_dict...) + + println("PF_BENCHMARK: 6_DONE") + flush(stdout) +end + +end + diff --git a/benchmarks/src/benchmark_utils/linopy.py b/benchmarks/src/benchmark_utils/linopy.py new file mode 100644 index 00000000..234f33f3 --- /dev/null +++ b/benchmarks/src/benchmark_utils/linopy.py @@ -0,0 +1,15 @@ +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def set_timeout_to_zero(self, model) -> None: + self.solver_args["TimeLimit"] = 0.0 + + def solve(self, model): + model.solve(self.solver, **self.solver_args) + + def _get_objective(self, model) -> float: + return model.objective.value diff --git a/benchmarks/src/benchmark_utils/pulp.py b/benchmarks/src/benchmark_utils/pulp.py new file mode 100644 index 00000000..1a91d102 --- /dev/null +++ b/benchmarks/src/benchmark_utils/pulp.py @@ -0,0 +1,24 @@ +import pulp + +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + if self.solver == "gurobi": + self._solver_instance = pulp.GUROBI() + else: + raise NotImplementedError( + f"Solver {self.solver} not implemented in PulpBenchmark." + ) + + def set_timeout_to_zero(self, model) -> None: + self._solver_instance.timeLimit = 0.0 + + def solve(self, model: pulp.LpProblem): + model.solve(self._solver_instance) + + def _get_objective(self, model) -> float: + return model.objective.value() diff --git a/benchmarks/src/benchmark_utils/pyoframe.py b/benchmarks/src/benchmark_utils/pyoframe.py new file mode 100644 index 00000000..3fc20a97 --- /dev/null +++ b/benchmarks/src/benchmark_utils/pyoframe.py @@ -0,0 +1,29 @@ +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, *args, use_var_names=False, **kwargs): + import pyoframe as pf + + super().__init__(*args, **kwargs) + self.use_var_names = use_var_names + pf.Config.default_solver = self.solver + + # if self.block_solver: + # Slightly improves performance + # The bottleneck is still the underlying library + # pf.Config.maintain_order = False + # pf.Config.disable_unmatched_checks = True + + def set_timeout_to_zero(self, model): + model.attr.TimeLimitSec = 0 + + def solve(self, model): + if self.solver_args: + for key, value in self.solver_args.items(): + setattr(model.params, key, value) + model.optimize() + return model + + def _get_objective(self, model) -> float: + return model.objective.value diff --git a/benchmarks/src/benchmark_utils/pyomo.py b/benchmarks/src/benchmark_utils/pyomo.py new file mode 100644 index 00000000..be76c021 --- /dev/null +++ b/benchmarks/src/benchmark_utils/pyomo.py @@ -0,0 +1,28 @@ +from pyomo.environ import value +from pyomo.opt import SolverFactory + +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._solver_factory = SolverFactory(self.solver) + if self.solver_args: + for key, value in self.solver_args.items(): + self._solver_factory.options[key] = value + + def set_timeout_to_zero(self, model) -> None: + self._solver_factory.options["timelimit"] = 0.0 + + def solve(self, model): + try: + self._solver_factory.solve(model, tee=True) + except ValueError as e: + if self.block_solver: + pass + else: + raise e + + def _get_objective(self, model) -> float: + return value(model.obj) diff --git a/benchmarks/src/benchmark_utils/pyoptinterface.py b/benchmarks/src/benchmark_utils/pyoptinterface.py new file mode 100644 index 00000000..23994509 --- /dev/null +++ b/benchmarks/src/benchmark_utils/pyoptinterface.py @@ -0,0 +1,37 @@ +import pyoptinterface as poi + +from benchmark_utils import BaseBenchmark + + +class Benchmark(BaseBenchmark): + def build(self, **kwargs): + if self.solver == "gurobi": + import pyoptinterface.gurobi as gurobi + + return gurobi.Model() + elif self.solver == "copt": + import pyoptinterface.copt as copt + + return copt.Model() + elif self.solver == "highs": + import pyoptinterface.highs as highs + + return highs.Model() + elif self.solver == "mosek": + import pyoptinterface.mosek as mosek + + return mosek.Model() + else: + raise ValueError(f"Unknown solver {self.solver}") + + def set_timeout_to_zero(self, model) -> None: + model.set_model_attribute(poi.ModelAttribute.TimeLimitSec, 0.0) + + def solve(self, model): + if self.solver_args is not None: + for key, value in self.solver_args.items(): + model.set_raw_parameter(key, value) + model.optimize() + + def _get_objective(self, model) -> float: + return model.get_model_attribute(poi.ModelAttribute.ObjectiveValue) diff --git a/benchmarks/src/energy_planning/.gitignore b/benchmarks/src/energy_planning/.gitignore new file mode 100644 index 00000000..06a7d19b --- /dev/null +++ b/benchmarks/src/energy_planning/.gitignore @@ -0,0 +1 @@ +results_*/ \ No newline at end of file diff --git a/benchmarks/src/energy_planning/Snakefile b/benchmarks/src/energy_planning/Snakefile new file mode 100644 index 00000000..52d2aa6c --- /dev/null +++ b/benchmarks/src/energy_planning/Snakefile @@ -0,0 +1,272 @@ +import shutil +import gdown +import os +from urllib.request import urlretrieve +from pathlib import Path + +CATS_GITHUB_URL = "https://raw.githubusercontent.com/WISPO-POP/CATS-CaliforniaTestSystem/f260d8bd89e68997bf12d24e767475b2f2b88a77/" + +CWD = Path(".") +INPUT_DIR = CWD / "raw_data" +DOWNLOADS = INPUT_DIR / "downloads" +CONSTANTS = INPUT_DIR / "constants" +PROCESSED_INPUT_DIR = INPUT_DIR / "preprocessed" +ANALYSIS_DIR = INPUT_DIR / "analysis" +SCRIPTS_DIR = Path("./scripts") + + +rule generate_all_inputs: + input: + PROCESSED_INPUT_DIR / "loads.parquet", + PROCESSED_INPUT_DIR / "lines_simplified.parquet", + PROCESSED_INPUT_DIR / "generators.parquet", + PROCESSED_INPUT_DIR / "yearly_limits.parquet", + PROCESSED_INPUT_DIR / "variable_capacity_factors.parquet", + PROCESSED_INPUT_DIR / "branch_outage_dist_facts.parquet", + PROCESSED_INPUT_DIR / "capex_costs.csv", + CONSTANTS / "map_type_to_vcf_type.csv", + CONSTANTS / "cost_parameters.csv", + output: + directory(CWD / "model_data"), + run: + output_dir = Path(output[0]) + os.makedirs(output_dir, exist_ok=True) + for input_data in input: + shutil.copy(input_data, output_dir / Path(input_data).name) + + +rule fetch_load_data: + """Downloads the load data from the Google Drive folder hosted by the CATS project (https://drive.google.com/drive/folders/1Zo6ZeZ1OSjHCOWZybbTd6PgO4DQFs8_K)""" + output: + DOWNLOADS / "CATS_loads.csv", + run: + gdown.download(id="1Sz8st7g4Us6oijy1UYMPUvkA1XeZlIr8", output=output[0]) + + +rule fetch_generation_data: + """Downloads the generation data from the Google Drive folder hosted by the CATS project (https://drive.google.com/drive/folders/1Zo6ZeZ1OSjHCOWZybbTd6PgO4DQFs8_K)""" + output: + DOWNLOADS / "CATS_generation.csv", + run: + gdown.download(id="1CxLlcwAEUy-JvJQdAfVydJ1p9Ecot-4d", output=output[0]) + + +rule fetch_line_data: + output: + DOWNLOADS / "CATS_lines.json", + run: + urlretrieve(CATS_GITHUB_URL + "GIS/CATS_lines.json", output[0]) + + +rule fetch_generator_data: + output: + DOWNLOADS / "CATS_generators.csv", + run: + urlretrieve(CATS_GITHUB_URL + "GIS/CATS_gens.csv", output[0]) + + +rule fetch_matpower_data: + output: + DOWNLOADS / "CaliforniaTestSystem.m", + run: + urlretrieve(CATS_GITHUB_URL + "MATPOWER/CaliforniaTestSystem.m", output[0]) + + +rule fetch_nrel_atb_data: + output: + DOWNLOADS / "NREL_ATB_data.parquet", + run: + urlretrieve( + "https://oedi-data-lake.s3.amazonaws.com/ATB/electricity/parquet/2024/v3.0.0/ATBe.parquet", + output[0], + ) + + +rule parse_matpower_case: + input: + DOWNLOADS / "CaliforniaTestSystem.m", + output: + bus=PROCESSED_INPUT_DIR / "matpower_bus.parquet", + branch=PROCESSED_INPUT_DIR / "matpower_branch.parquet", + gen=PROCESSED_INPUT_DIR / "matpower_gen.parquet", + run: + from energy_planning.scripts.A_parse_matpower_case import ( + app as parse_matpower_case, + ) + + parse_matpower_case.run( + defs={ + "MATPOWER_CASE": input[0], + "BUS_OUTPUT_PATH": output[0], + "BRANCH_OUTPUT_PATH": output[1], + "GEN_OUTPUT_PATH": output[2], + } + ) + + +rule extract_load_data: + """Convert the load data to narrow format and keep only the active loads.""" + input: + DOWNLOADS / "CATS_loads.csv", + output: + PROCESSED_INPUT_DIR / "loads.parquet", + run: + from energy_planning.scripts.B2_extract_load_data import ( + app as extract_load_data, + ) + + extract_load_data.run( + defs={ + "CATS_DATA": input[0], + "LOAD_DATA_OUT": output[0], + } + ) + + +rule extract_line_data: + input: + DOWNLOADS / "CATS_lines.json", + PROCESSED_INPUT_DIR / "matpower_branch.parquet", + output: + PROCESSED_INPUT_DIR / "lines.parquet", + run: + from energy_planning.scripts.C_extract_line_data import app as extract_line_data + + extract_line_data.run( + defs={ + "CATS_DATA": input[0], + "MATPOWER_DATA": input[1], + "LINE_DATA_OUT": output[0], + } + ) + + +rule extract_generator_data: + """Group the generators by type and bus.""" + input: + DOWNLOADS / "CATS_generators.csv", + PROCESSED_INPUT_DIR / "matpower_gen.parquet", + output: + PROCESSED_INPUT_DIR / "generators.parquet", + run: + from energy_planning.scripts.B_extract_generator_data import ( + app as process_generator_data, + ) + + process_generator_data.run( + defs={ + "CATS_DATA": input[0], + "MATPOWER_DATA": input[1], + "OUTPUT_PATH": output[0], + } + ) + + +rule extract_capex_data: + input: + DOWNLOADS / "NREL_ATB_data.parquet", + output: + PROCESSED_INPUT_DIR / "capex_costs.csv", + run: + from energy_planning.scripts.D_extract_capex_data import ( + app as extract_capex_data, + ) + + extract_capex_data.run( + defs={ + "NREL_DATA": input[0], + "OUTPUT_PATH": output[0], + } + ) + + +rule compute_capacity_factors: + """Use the hourly generation data to create capacity factors by fuel type.""" + input: + PROCESSED_INPUT_DIR / "generators.parquet", + DOWNLOADS / "CATS_generation.csv", + CONSTANTS / "map_type_to_vcf_type.csv", + output: + PROCESSED_INPUT_DIR / "yearly_limits.parquet", + PROCESSED_INPUT_DIR / "variable_capacity_factors.parquet", + run: + from energy_planning.scripts.E_compute_capacity_factors import ( + app as compute_capacity_factors, + ) + + compute_capacity_factors.run( + defs={ + "GENERATOR_DATA": input[0], + "HOURLY_GENERATION_DATA": input[1], + "TYPE_MAPPING": input[2], + "YEARLY_LIMIT_OUTPUT": output[0], + "VCF_OUTPUT": output[1], + } + ) + + +rule simplify_network: + input: + lines=PROCESSED_INPUT_DIR / "lines.parquet", + generators=PROCESSED_INPUT_DIR / "generators.parquet", + loads=PROCESSED_INPUT_DIR / "loads.parquet", + output: + PROCESSED_INPUT_DIR / "lines_simplified.parquet", + run: + from energy_planning.scripts.C1_simplify_network import main + + main( + lines_path=input.lines, + gens_path=input.generators, + loads_path=input.loads, + output_path=output[0], + ) + + +rule compute_ptdf: + input: + PROCESSED_INPUT_DIR / "lines_simplified.parquet", + output: + PROCESSED_INPUT_DIR / "power_transfer_dist_facts.parquet", + run: + from energy_planning.scripts.C2_compute_power_transfer_dist_facts import ( + app as compute_ptdf, + ) + + compute_ptdf.run( + defs={ + "LINES_DATA": input[0], + "OUTPUT_PATH": output[0], + } + ) + + +rule compute_bodf: + input: + PROCESSED_INPUT_DIR / "lines_simplified.parquet", + PROCESSED_INPUT_DIR / "power_transfer_dist_facts.parquet", + output: + PROCESSED_INPUT_DIR / "branch_outage_dist_facts.parquet", + run: + from energy_planning.scripts.C3_compute_branch_outage_dist_facts import ( + app as compute_bodf, + ) + + compute_bodf.run( + defs={ + "LINES_DATA": input[0], + "PTDF_DATA": input[1], + "OUTPUT_PATH": output[0], + } + ) + + +rule analyze_supply_demand: + input: + gen=PROCESSED_INPUT_DIR / "generators.parquet", + vcf=PROCESSED_INPUT_DIR / "variable_capacity_factors.parquet", + loads=PROCESSED_INPUT_DIR / "loads.parquet", + output: + ANALYSIS_DIR / "supply_demand_analysis.parquet", + notebook: + str(SCRIPTS_DIR / "analyze_supply_demand.ipynb") diff --git a/benchmarks/src/energy_planning/__init__.py b/benchmarks/src/energy_planning/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/src/energy_planning/bm_ampl.py b/benchmarks/src/energy_planning/bm_ampl.py new file mode 100644 index 00000000..833c6cc1 --- /dev/null +++ b/benchmarks/src/energy_planning/bm_ampl.py @@ -0,0 +1,259 @@ +from pathlib import Path + +import pandas as pd +from amplpy import AMPL +from benchmark_utils.ampl import Benchmark + + +class Bench(Benchmark): + def build( + self, + capacity_expansion: bool = True, + security_constrained: bool = True, + yearly_limits: bool = True, + variable_capacity_factors: bool = True, + **kwargs, + ): + ampl = AMPL() + + ampl.eval(""" + set G; + set L; + set B; + set T; + set LOADS within B cross T; + set GEN_TYPES; + + param COST_UNSERVED_LOAD; + param BASE_MW; + param SLACK_BUS; + param gen_bus {G}; + param gen_type {G} symbolic; + param gen_pmax {G}; + param gen_cost {G}; + param gen_overhead {G}; + param line_from {L}; + param line_to {L}; + param line_rating {L}; + param susceptance {L}; + param load {LOADS}; + param capex {GEN_TYPES} default 0; + + + var Voltage_Angle {b in B, t in T}; + var Power_Flow {l in L, t in T}; + var Load_Unserved {(b,t) in LOADS} >= 0, <= load[b,t]; + """) + + if capacity_expansion: + ampl.eval(""" + var Build_Out {g in G} >= 0, <= gen_pmax[g]; + var Dispatch {g in G, t in T} >= 0; + s.t. Dispatch_ub {g in G, t in T}: Dispatch[g,t] <= Build_Out[g]; + """) + else: + ampl.eval("var Dispatch {g in G, t in T} >= 0, <= gen_pmax[g];") + + ampl.eval(""" + s.t. Power_Flow_ub {l in L, t in T}: Power_Flow[l,t] <= line_rating[l]; + s.t. Power_Flow_lb {l in L, t in T}: Power_Flow[l,t] >= -line_rating[l]; + s.t. Con_Slack_Bus {t in T}: Voltage_Angle[SLACK_BUS,t] = 0; + s.t. Con_Power_Flow {l in L, t in T}: + Power_Flow[l,t] = BASE_MW * susceptance[l] * (Voltage_Angle[line_to[l],t] - Voltage_Angle[line_from[l],t]); + s.t. Con_Power_Balance {b in B, t in T}: + if (b,t) in LOADS then load[b,t] else 0 = + sum {g in G: gen_bus[g] = b} Dispatch[g,t] + + sum {l in L: line_to[l]=b} Power_Flow[l,t] - + sum {l in L: line_from[l]=b} Power_Flow[l,t] + + if (b,t) in LOADS then Load_Unserved[b,t] else 0; + """) + + if capacity_expansion: + ampl.eval(""" + minimize obj: + sum {(b,t) in LOADS} COST_UNSERVED_LOAD * Load_Unserved[b,t] + + sum {g in G, t in T} gen_cost[g] * Dispatch[g,t] + + sum {g in G} capex[gen_type[g]] * Build_Out[g] + + sum {g in G} gen_overhead[g] * Build_Out[g] * card(T); + """) + else: + ampl.eval(""" + minimize obj: + sum {(b,t) in LOADS} COST_UNSERVED_LOAD * Load_Unserved[b,t] + + sum {g in G, t in T} gen_cost[g] * Dispatch[g,t]; + """) + + gens = pd.read_parquet("generators.parquet").set_index("gen_id") + lines = pd.read_parquet("lines_simplified.parquet").set_index("line_id") + loads_df = pd.read_parquet("loads.parquet") + loads_df["datetime"] = loads_df["datetime"].astype(str) + capex_df = pd.read_csv("capex_costs.csv").set_index("type") + cost_params = pd.read_csv("cost_parameters.csv") + + hours = sorted(loads_df["datetime"].unique()) + if self.size is not None: + hours = hours[: self.size] + loads_df = loads_df[loads_df["datetime"].isin(hours)] + + loads_df = loads_df.set_index(["bus", "datetime"]) + + ampl.set["G"] = gens.index + ampl.set["L"] = lines.index + ampl.set["B"] = sorted(set(lines["from_bus"]).union(lines["to_bus"])) + ampl.set["T"] = hours + ampl.set["LOADS"] = loads_df.index + ampl.set["GEN_TYPES"] = sorted(gens["type"].unique()) + + ampl.param["gen_bus"] = gens["bus"] + ampl.param["gen_type"] = gens["type"] + ampl.param["gen_pmax"] = gens["Pmax"] + ampl.param["gen_cost"] = gens["cost_per_MWh_linear"] + ampl.param["gen_overhead"] = gens["hourly_overhead_per_MW_capacity"] + ampl.param["capex"] = capex_df["yearly_capex_cost_per_KW"] + + ampl.param["line_from"] = lines["from_bus"] + ampl.param["line_to"] = lines["to_bus"] + ampl.param["line_rating"] = lines["line_rating_MW"] + ampl.param["susceptance"] = 1 / lines["reactance"] + + ampl.param["load"] = loads_df["active_load"] + ampl.param["BASE_MW"] = 100 + ampl.param["COST_UNSERVED_LOAD"] = cost_params.query( + 'name=="load_unserved_MWh"' + )["cost"].iloc[0] + ampl.param["SLACK_BUS"] = 1 + + if security_constrained: + self.add_security_constraints(ampl) + + if yearly_limits: + self.add_yearly_limits(ampl) + + if variable_capacity_factors: + self.add_vcf(ampl, hours) + + return ampl + + def add_security_constraints(self, ampl): + ampl.eval(""" + set CONTIG within L cross L; + param bodf {CONTIG}; + + s.t. Con_Contingency_Pos {(out,aff) in CONTIG, t in T}: + Power_Flow[aff,t] + bodf[out,aff]*Power_Flow[out,t] <= line_rating[aff]; + s.t. Con_Contingency_Neg {(out,aff) in CONTIG, t in T}: + Power_Flow[aff,t] + bodf[out,aff]*Power_Flow[out,t] >= -line_rating[aff]; + """) + + bodf = pd.read_parquet("branch_outage_dist_facts.parquet").set_index( + ["outage_line_id", "affected_line_id"] + ) + ampl.set["CONTIG"] = bodf.index + ampl.param["bodf"] = bodf["factor"] + + def add_vcf(self, ampl, hours): + ampl.eval(""" + set VCF_TYPES; + set VCF_GEN_TYPES within GEN_TYPES; + + param gen_to_vcf_type {VCF_GEN_TYPES} symbolic; + param vcf {vt in VCF_TYPES, t in T}; + + s.t. Con_Variable_Dispatch_Limit {g in G, t in T: gen_type[g] in VCF_GEN_TYPES}: + Dispatch[g,t] <= vcf[gen_to_vcf_type[gen_type[g]], t] * gen_pmax[g]; + """) + + vcf_map = pd.read_csv("map_type_to_vcf_type.csv").set_index("type") + ampl.set["VCF_GEN_TYPES"] = vcf_map.index + ampl.set["VCF_TYPES"] = vcf_map["vcf_type"].unique() + ampl.param["gen_to_vcf_type"] = vcf_map + + vcf_df = pd.read_parquet("variable_capacity_factors.parquet") + vcf_df["datetime"] = vcf_df["datetime"].astype(str) + vcf_df = vcf_df[vcf_df["datetime"].isin(hours)] + vcf_df = vcf_df.set_index(["vcf_type", "datetime"]) + ampl.param["vcf"] = vcf_df + + def add_yearly_limits(self, ampl): + ampl.eval(""" + set TYPES_WITH_LIMIT within GEN_TYPES; + param yearly_limit {TYPES_WITH_LIMIT}; + s.t. Con_Yearly_Limits {cat in TYPES_WITH_LIMIT}: + sum {g in G, t in T: gen_type[g]=cat} Dispatch[g,t] <= yearly_limit[cat]*card(T)/(24*365); + """) + + yearly_df = pd.read_parquet("yearly_limits.parquet").set_index("type") + ampl.set["TYPES_WITH_LIMIT"] = yearly_df.index + ampl.param["yearly_limit"] = yearly_df["limit"] + + def write_results( + self, + model, + capacity_expansion: bool = True, + **kwargs, + ): + def _entity_df(entity, key_names, value_kind: str, value_name: str): + df = entity.get_values().to_pandas().reset_index() + value_col = next( + (col for col in df.columns if col.endswith(f".{value_kind}")), None + ) + if value_col is None: + raise ValueError(f"No .{value_kind} column found in AMPL entity values") + + key_cols = [col for col in df.columns if col != value_col] + rename_map = dict(zip(key_cols, key_names)) + rename_map[value_col] = value_name + + df = df.rename(columns=rename_map) + + if "datetime" in df.columns: + df["datetime"] = pd.to_datetime(df["datetime"]).astype("datetime64[us]") + + return df + + power_flow_keys = ["line_id", "datetime"] + power_flow = _entity_df( + model.var["Power_Flow"], power_flow_keys, "val", "solution" + ) + power_flow_ub = _entity_df( + model.con["Power_Flow_ub"], power_flow_keys, "dual", "ub_dual" + ) + power_flow_lb = _entity_df( + model.con["Power_Flow_lb"], power_flow_keys, "dual", "lb_dual" + ) + power_flow.merge(power_flow_ub, on=power_flow_keys, how="left").merge( + power_flow_lb, on=power_flow_keys, how="left" + ).to_parquet("power_flow.parquet", index=False) + + if capacity_expansion: + _entity_df( + model.var["Build_Out"], ["gen_id"], "val", "solution" + ).to_parquet("build_out.parquet", index=False) + + _entity_df( + model.var["Dispatch"], ["gen_id", "datetime"], "val", "solution" + ).to_parquet("dispatch.parquet", index=False) + + _entity_df( + model.var["Load_Unserved"], ["bus", "datetime"], "val", "solution" + ).to_parquet("load_unserved.parquet", index=False) + + _df = _entity_df( + model.con["Con_Power_Balance"], ["bus", "datetime"], "dual", "dual" + ) + _df["dual"] *= -1 # Fix sign + _df.to_parquet("power_balance_duals.parquet", index=False) + + +if __name__ == "__main__": + base_dir = Path(__file__).parent + benchmark = Bench( + size=1, + input_dir=base_dir / "model_data", + results_dir=base_dir / "results_ampl", + security_constrained=False, + capacity_expansion=True, + yearly_limits=False, + variable_capacity_factors=False, + ) + m = benchmark.run() diff --git a/benchmarks/src/energy_planning/bm_jump.jl b/benchmarks/src/energy_planning/bm_jump.jl new file mode 100644 index 00000000..60ee3ee6 --- /dev/null +++ b/benchmarks/src/energy_planning/bm_jump.jl @@ -0,0 +1,254 @@ +include("../benchmark_utils/jump.jl") + +using JuMP +using ..Benchmark +using DataFrames +using CSV +using Parquet2 +using Parquet2: Dataset + +function main( + model::JuMP.Model, + problem_size::Int; + capacity_expansion::Bool = true, + security_constrained::Bool = true, + yearly_limits::Bool = true, + variable_capacity_factors::Bool = true, +) + + # ------------------------- + # LOAD DATA + # ------------------------- + gens = DataFrame(Dataset("generators.parquet"); copycols=false) + lines = DataFrame(Dataset("lines_simplified.parquet"); copycols=false) + loads_df = DataFrame(Dataset("loads.parquet"); copycols=false) + capex = CSV.read("capex_costs.csv", DataFrame) + cost_params = CSV.read("cost_parameters.csv", DataFrame) + + BASE_MW = 100.0 + COST_UNSERVED_LOAD = cost_params[cost_params.name .== "load_unserved_MWh", :cost][1] + SLACK_BUS = 1 + + hours = sort(unique(loads_df.datetime)) + if problem_size !== nothing + hours = hours[1:problem_size] + loads_df = loads_df[in.(loads_df.datetime, Ref(hours)), :] + end + + # ------------------------- + # SETS + # ------------------------- + G = gens.gen_id + L = lines.line_id + T = hours + B = sort(unique(vcat(lines.from_bus, lines.to_bus))) + + + # ------------------------- + # INDEX MAPS (para acceso rápido) + # ------------------------- + gen_bus = Dict(zip(gens.gen_id, gens.bus)) + gen_type = Dict(zip(gens.gen_id, gens.type)) + line_from = Dict(zip(lines.line_id, lines.from_bus)) + line_to = Dict(zip(lines.line_id, lines.to_bus)) + + # ------------------------- + # PARAMETERS como contenedores + # ------------------------- + gen_pmax = JuMP.Containers.DenseAxisArray(gens.Pmax, G) + gen_cost = JuMP.Containers.DenseAxisArray(gens.cost_per_MWh_linear, G) + gen_overhead = JuMP.Containers.DenseAxisArray(gens.hourly_overhead_per_MW_capacity, G) + + line_rating = JuMP.Containers.DenseAxisArray(lines.line_rating_MW, L) + susceptance = JuMP.Containers.DenseAxisArray(1.0 ./ lines.reactance, L) + + gens_at_bus = Dict(b => [g for g in G if gen_bus[g] == b] for b in B) + lines_to_bus = Dict(b => [l for l in L if line_to[l] == b] for b in B) + lines_from_bus = Dict(b => [l for l in L if line_from[l] == b] for b in B) + + # carga como contenedor denso (default 0) + load = JuMP.Containers.DenseAxisArray( + zeros(length(B), length(T)), B, T, + ) + for r in eachrow(loads_df) + load[r.bus, r.datetime] = r.active_load + end + + # capex + capex_dict = Dict(zip(capex.type, capex.yearly_capex_cost_per_KW)) + + # ------------------------- + # VARIABLES (contenedores JuMP) + # ------------------------- + if capacity_expansion + @variable(model, 0 <= Build_Out[g in G] <= gen_pmax[g]) + @variable(model, Dispatch[g in G, t in T] >= 0) + + @constraint(model, [g in G, t in T], Dispatch[g, t] <= Build_Out[g]) + else + @variable(model, 0 <= Dispatch[g in G, t in T] <= gen_pmax[g]) + end + + @variable(model, Voltage_Angle[b in B, t in T]) + @variable(model, Power_Flow[l in L, t in T]) + + @constraint(model, Power_Flow_lower_b[l in L, t in T], -line_rating[l] <= Power_Flow[l, t]) + @constraint(model, Power_Flow_upper_b[l in L, t in T], Power_Flow[l, t] <= line_rating[l]) + + @variable(model, Load_Unserved[b in B, t in T; load[b, t] > 0] >= 0) + @constraint( + model, + [b in B, t in T; load[b, t] > 0], + Load_Unserved[b, t] <= load[b, t] + ) + + # ------------------------- + # CONSTRAINTS + # ------------------------- + @constraint(model, [t in T], Voltage_Angle[SLACK_BUS, t] == 0) + + @constraint( + model, + [l in L, t in T], + ( + Power_Flow[l, t] == + BASE_MW * susceptance[l] * + (Voltage_Angle[line_to[l], t] - Voltage_Angle[line_from[l], t]) + ) + ) + + @constraint( + model, + Con_Power_Balance[b in B, t in T], + ( + 0 == + -load[b, t] + + sum(Dispatch[g, t] for g in gens_at_bus[b]) + + sum(Power_Flow[l, t] for l in lines_to_bus[b]) - + sum(Power_Flow[l, t] for l in lines_from_bus[b]) + + ((b, t) in Load_Unserved ? Load_Unserved[b, t] : 0.0) + ) + ) + + # ------------------------- + # OBJECTIVE + # ------------------------- + @objective( + model, + Min, + ( + sum( + COST_UNSERVED_LOAD * Load_Unserved[b, t] + for b in B, t in T if (b, t) in Load_Unserved + ) + + sum(gen_cost[g] * Dispatch[g, t] for g in G, t in T) + + (capacity_expansion ? + ( + sum( + capex_dict[gen_type[g]] * Build_Out[g] + for g in G if haskey(capex_dict, gen_type[g]) + ) + sum( + gen_overhead[g] * Build_Out[g] * length(T) for g in G + ) + ) : 0.0 + ) + ) + ) + + # ------------------------- + # SECURITY CONSTRAINTS + # ------------------------- + if security_constrained + bodf = DataFrame(Dataset("branch_outage_dist_facts.parquet"); copycols=false) + OUT = bodf.outage_line_id + AFF = bodf.affected_line_id + FAC = bodf.factor + + @constraint(model, [i in eachindex(OUT), t in T], + Power_Flow[AFF[i], t] + FAC[i]*Power_Flow[OUT[i], t] + <= + line_rating[AFF[i]]) + + @constraint(model, [i in eachindex(OUT), t in T], + Power_Flow[AFF[i], t] + FAC[i]*Power_Flow[OUT[i], t] + >= + -line_rating[AFF[i]]) + end + + # ------------------------- + # VARIABLE CAPACITY FACTORS + # ------------------------- + if variable_capacity_factors + vcf_df = DataFrame(Dataset("variable_capacity_factors.parquet"); copycols=false) + map_df = CSV.read("map_type_to_vcf_type.csv", DataFrame) + + type_to_vcf = Dict(zip(map_df.type, map_df.vcf_type)) + + vcf = Dict((r.vcf_type, r.datetime) => r.capacity_factor + for r in eachrow(vcf_df) if r.datetime in T) + + @constraint(model, [g in G, t in T; + haskey(type_to_vcf, gen_type[g])], + Dispatch[g, t] <= + vcf[(type_to_vcf[gen_type[g]], t)] * gen_pmax[g]) + end + + # ------------------------- + # YEARLY LIMITS + # ------------------------- + if yearly_limits + yl = DataFrame(Dataset("yearly_limits.parquet"); copycols=false) + yearly_limit = Dict(zip(yl.type, yl.limit)) + + @constraint(model, [cat in keys(yearly_limit)], + sum(Dispatch[g, t] for g in G, t in T if gen_type[g] == cat) + <= + yearly_limit[cat] * length(T) / (24*365)) + end + + Benchmark.optimize!(model) + + # WRITE RESULTS + write_container_parquet(value.(Dispatch); filename = "dispatch.parquet", header = [:gen_id, :datetime, :solution]) + write_container_parquet(value.(Load_Unserved); filename = "load_unserved.parquet", header = [:bus, :datetime, :solution]) + write_container_parquet(dual.(Con_Power_Balance); filename = "power_balance_duals.parquet", header = [:bus, :datetime, :dual]) + if capacity_expansion + write_container_parquet(value.(Build_Out); filename = "build_out.parquet", header = [:gen_id, :solution]) + end + + + pf_df = DataFrame(Containers.rowtable( + value.(Power_Flow); + header = [:line_id, :datetime, :solution], + )) + lb_df = DataFrame(Containers.rowtable( + dual.(Power_Flow_lower_b); + header = [:line_id, :datetime, :lb_dual], + )) + + ub_df = DataFrame(Containers.rowtable( + dual.(Power_Flow_upper_b); + header = [:line_id, :datetime, :ub_dual], + )) + + + pf_df = leftjoin(pf_df, ub_df, on=[:line_id, :datetime]) + pf_df = leftjoin(pf_df, lb_df, on=[:line_id, :datetime]) + + Parquet2.writefile("power_flow.parquet", pf_df) +end + +function write_container_parquet( + container; + filename::AbstractString, + header::Vector{Symbol}, +) + df = DataFrame(Containers.rowtable(container; header = header)) + Parquet2.writefile(filename, df) +end + + +Benchmark.run(@__DIR__, main) + +# To test run from the benchmarks directory: +# julia --project=. src/energy_planning/bm_jump.jl solver=gurobi problem_size=1000 results_dir=src/energy_planning/results_jump \ No newline at end of file diff --git a/benchmarks/src/energy_planning/bm_linopy.py b/benchmarks/src/energy_planning/bm_linopy.py new file mode 100644 index 00000000..3b03df7b --- /dev/null +++ b/benchmarks/src/energy_planning/bm_linopy.py @@ -0,0 +1,294 @@ +import types + +import linopy as lp +import numpy as np +import pandas as pd +import xarray as xr +from benchmark_utils.linopy import Benchmark + + +class Bench(Benchmark): + def build( + self, + capacity_expansion: bool = True, + security_constrained: bool = True, + yearly_limits: bool = True, + variable_capacity_factors: bool = True, + **kwargs, + ): + assert len(kwargs) == 0, f"Unexpected kwargs: {kwargs}" + assert self.input_dir is not None, "Input directory must be specified." + + m = lp.Model() + + container = types.SimpleNamespace() + self.container = container + + ## LOAD DATA + container.gens = ( + pd.read_parquet("generators.parquet").set_index("gen_id").to_xarray() + ) + lines = ( + pd.read_parquet("lines_simplified.parquet").set_index("line_id").to_xarray() + ) + from_buses = lines["from_bus"].rename("bus") + to_buses = lines["to_bus"].rename("bus") + loads = ( + pd.read_parquet("loads.parquet") + .set_index(["bus", "datetime"])["active_load"] + .to_xarray() + ) + capex = ( + pd.read_csv("capex_costs.csv") + .set_index("type")["yearly_capex_cost_per_KW"] + .to_xarray() + ) + cost_params = pd.read_csv("cost_parameters.csv").set_index("name")["cost"] + + BASE_MW = 100 + COST_UNSERVED_LOAD = cost_params.loc["load_unserved_MWh"] + SLACK_BUS = 1 + + ## DEFINE SETS AND PARAMS ## + buses = ( + xr.concat( + [ + from_buses.drop_vars("line_id").rename({"line_id": "bus"}), + to_buses.drop_vars("line_id").rename({"line_id": "bus"}), + ], + dim="bus", + ) + .reset_coords() + .drop_duplicates(...) + .sortby("bus") + .bus + ) + + # Shrink the time horizon to allow for different model sizes + hours = loads["datetime"].drop_duplicates(...).sortby("datetime") + if self.size is not None: + hours = hours[: self.size] + loads = loads.sel(datetime=hours) + container.hours = hours + + container.line_rating = lines["line_rating_MW"] + susceptance = 1 / lines["reactance"] + + ## DEFINE VARIABLES ## + if capacity_expansion: + container.Build_Out = m.add_variables( + coords=[container.gens.gen_id], + lower=0, + upper=container.gens["Pmax"], + name="Build_Out", + ) + container.Dispatch = m.add_variables( + coords=[container.gens.gen_id, container.hours], + lower=0, + name="Dispatch", + ) + container.Dispatch_ub = m.add_constraints( + container.Dispatch <= container.Build_Out, name="Dispatch_ub" + ) + else: + container.Dispatch = m.add_variables( + coords=[container.gens.gen_id, container.hours], + lower=0, + upper=container.gens["Pmax"], + name="Dispatch", + ) + + container.Voltage_Angle = m.add_variables( + coords=[buses, container.hours], name="Voltage_Angle" + ) + container.Power_Flow = m.add_variables( + coords=[lines.line_id, container.hours], name="Power_Flow" + ) + container.Power_Flow_lb = m.add_constraints( + container.Power_Flow >= -container.line_rating, name="Power_Flow_lb" + ) + container.Power_Flow_ub = m.add_constraints( + container.Power_Flow <= container.line_rating, name="Power_Flow_ub" + ) + container.Load_Unserved = m.add_variables( + lower=0, upper=loads, name="Load_Unserved" + ) + + ## DEFINE CONSTRAINTS ## + container.Con_Slack_Bus = m.add_constraints( + container.Voltage_Angle.sel(bus=SLACK_BUS) == 0, name="Con_Slack_Bus" + ) + + container.Con_Power_Flow = m.add_constraints( + container.Power_Flow + == BASE_MW + * susceptance + * ( + container.Voltage_Angle.sel(bus=to_buses) + - container.Voltage_Angle.sel(bus=from_buses) + ), + name="Con_Power_Flow", + ) + + container.Con_Power_Balance = m.add_constraints( + 0 + == ( + container.Dispatch.groupby(container.gens.bus).sum() + - loads + + container.Load_Unserved + - container.Power_Flow.groupby(from_buses).sum() + + container.Power_Flow.groupby(to_buses).sum() + ), + name="Con_Power_Balance", + ) + + ## SET OBJECTIVE ## + m.add_objective(COST_UNSERVED_LOAD * container.Load_Unserved.sum()) + m.objective += ( + container.Dispatch * container.gens["cost_per_MWh_linear"] + ).sum() + + if capacity_expansion: + m.objective += ( + container.Build_Out.groupby(container.gens.type) + .sum() + .reindex(type=capex.type) + * capex + ).sum() + m.objective += ( + container.Build_Out * container.gens["hourly_overhead_per_MW_capacity"] + ).sum() * len(container.hours) + + if security_constrained: + self.add_security_constraints(m, container) + + if yearly_limits: + self.add_yearly_limits(m, container) + + if variable_capacity_factors: + self.add_vcf(m, container) + return m + + def add_security_constraints(self, m, container): + bodf = pd.read_parquet("branch_outage_dist_facts.parquet").rename( + columns={"affected_line_id": "line_id"} + ) + + # must use loop because the data structure is sparse! + # otherwise will create way too many constraints + for outage, bodf_outage in bodf.groupby("outage_line_id"): + bodf_outage = bodf_outage.set_index("line_id")["factor"].to_xarray() + extra_flow = container.Power_Flow.sel(line_id=outage) * bodf_outage + base_flow = container.Power_Flow.sel(line_id=extra_flow.data.line_id) + total_flow = base_flow + extra_flow + + rating_filtered = container.line_rating.sel(line_id=total_flow.data.line_id) + + container.Con_Security_lb = m.add_constraints( + total_flow <= rating_filtered, name=f"Con_Security_lb_{outage}" + ) + container.Con_Security_ub = m.add_constraints( + total_flow >= -rating_filtered, name=f"Con_Security_ub_{outage}" + ) + + def add_vcf(self, m, container): + vcf = pd.read_parquet("variable_capacity_factors.parquet") + vcf = vcf.loc[vcf["datetime"].isin(container.hours.to_numpy())] + vcf = vcf.set_index(["datetime", "vcf_type"])["capacity_factor"].to_xarray() + + vcf_type_to_type = ( + pd.read_csv("map_type_to_vcf_type.csv") + .set_index("type")["vcf_type"] + .to_xarray() + ) + + vcf_by_type = vcf.sel(vcf_type=vcf_type_to_type).drop_vars("vcf_type") + + gens_filtered = container.gens.type + gens_filtered = gens_filtered.sel( + gen_id=np.isin(gens_filtered.values, vcf_by_type.type.values) + ) + + vcf_by_gen = vcf_by_type.sel(type=gens_filtered).drop_vars("type") + + max_dispatch = vcf_by_gen * container.gens["Pmax"] + + filtered_dispatch = container.Dispatch.sel( + gen_id=np.isin( + container.Dispatch.data.gen_id.values, max_dispatch.gen_id.values + ) + ) + + container.Con_Variable_Dispatch_Limit = m.add_constraints( + filtered_dispatch <= max_dispatch, name="Con_Variable_Dispatch_Limit" + ) + + def add_yearly_limits(self, m, container): + yearly_limits = ( + pd.read_parquet("yearly_limits.parquet") + .set_index("type")["limit"] + .to_xarray() + ) + yearly_limits *= len(container.hours) / (24 * 365) + + dispatch_by_type = ( + container.Dispatch.sum("datetime").groupby(container.gens.type).sum() + ) + dispatch_by_type = dispatch_by_type.sel( + type=np.isin( + dispatch_by_type.data["type"].values, yearly_limits["type"].values + ) + ) + + container.Con_Yearly_Limits = m.add_constraints( + dispatch_by_type <= yearly_limits, name="Con_Yearly_Limits" + ) + + def write_results( + self, + model, + capacity_expansion: bool = True, + security_constrained: bool = True, + **kwargs, + ): + container = self.container + pf = container.Power_Flow.solution.to_dataframe() + + pf = pf.join( + container.Power_Flow_ub.dual.to_dataframe().rename( + columns={"dual": "ub_dual"} + ), + on=["line_id", "datetime"], + ).join( + container.Power_Flow_lb.dual.to_dataframe().rename( + columns={"dual": "lb_dual"} + ), + on=["line_id", "datetime"], + ) + pf.to_parquet("power_flow.parquet") + + if capacity_expansion: + container.Build_Out.solution.to_dataframe().to_parquet("build_out.parquet") + container.Dispatch.solution.to_dataframe().to_parquet("dispatch.parquet") + container.Load_Unserved.solution.to_dataframe().to_parquet( + "load_unserved.parquet" + ) + container.Con_Power_Balance.dual.to_dataframe().to_parquet( + "power_balance_duals.parquet" + ) + + +if __name__ == "__main__": + from pathlib import Path + + base_dir = Path(__file__).parent + benchmark = Bench( + size=2, + input_dir=base_dir / "model_data", + results_dir=base_dir / "results_linopy", + capacity_expansion=True, + security_constrained=False, + yearly_limits=True, + variable_capacity_factors=True, + ) + m = benchmark.run() diff --git a/benchmarks/src/energy_planning/bm_pyoframe.py b/benchmarks/src/energy_planning/bm_pyoframe.py new file mode 100644 index 00000000..3cecaacc --- /dev/null +++ b/benchmarks/src/energy_planning/bm_pyoframe.py @@ -0,0 +1,165 @@ +"""Pyoframe implementation of the facility location benchmark.""" + +from pathlib import Path + +import polars as pl +from benchmark_utils.pyoframe import Benchmark + +from pyoframe import Model, Param, Set, Variable + + +class Bench(Benchmark): + def build( + self, + capacity_expansion: bool = True, + security_constrained: bool = True, + yearly_limits: bool = True, + variable_capacity_factors: bool = True, + verbose: bool = False, + **kwargs, + ): + assert len(kwargs) == 0, f"Unexpected kwargs: {kwargs}" + assert self.input_dir is not None, "Input directory must be specified." + + m = Model(verbose=verbose) + + ## LOAD DATA + m.gens = pl.read_parquet("generators.parquet") + lines = pl.read_parquet("lines_simplified.parquet") + from_buses = lines.select("line_id", bus="from_bus") + to_buses = lines.select("line_id", bus="to_bus") + loads = pl.read_parquet("loads.parquet") + capex = pl.read_csv("capex_costs.csv") + cost_params = pl.read_csv("cost_parameters.csv") + + BASE_MW = 100 + COST_UNSERVED_LOAD = cost_params.filter(name="load_unserved_MWh")["cost"].item() + SLACK_BUS = 1 + + ## DEFINE SETS AND PARAMS ## + buses = Set(from_buses["bus"].unique()) + Set(to_buses["bus"].unique()) + + # Shrink the time horizon to allow for different model sizes + hours = loads.get_column("datetime").unique().sort() + if self.size is not None: + hours = hours.head(self.size) + loads = loads.filter(pl.col("datetime").is_in(hours.implode())) + m.hours = Set(hours) + + m.line_rating = Param(lines["line_id", "line_rating_MW"]) + susceptance = 1 / Param(lines["line_id", "reactance"]) + loads = Param(loads["bus", "datetime", "active_load"]) + + ## DEFINE VARIABLES ## + if capacity_expansion: + m.Build_Out = Variable(m.gens["gen_id"], lb=0, ub=m.gens["gen_id", "Pmax"]) + m.Dispatch = Variable(m.gens["gen_id"], m.hours, lb=0, ub=m.Build_Out) + else: + m.Dispatch = Variable( + m.gens["gen_id"], m.hours, lb=0, ub=m.gens["gen_id", "Pmax"] + ) + m.Voltage_Angle = Variable(buses, m.hours) + m.Power_Flow = Variable( + lines["line_id"], m.hours, lb=-m.line_rating, ub=m.line_rating + ) + m.Load_Unserved = Variable(loads, lb=0, ub=loads) + + ## DEFINE CONSTRAINTS ## + m.Con_Slack_Bus = m.Voltage_Angle.pick(bus=SLACK_BUS) == 0 + + m.Con_Power_Flow = m.Power_Flow == BASE_MW * susceptance * ( + m.Voltage_Angle.map(to_buses) - m.Voltage_Angle.map(from_buses) + ) + + m.Con_Power_Balance = 0 == ( + m.Dispatch.map(m.gens[["gen_id", "bus"]]) + | -loads + | m.Load_Unserved + | -m.Power_Flow.map(from_buses) + | m.Power_Flow.map(to_buses) + ) + + ## SET OBJECTIVE ## + m.minimize = COST_UNSERVED_LOAD * m.Load_Unserved.sum() + m.minimize += (m.Dispatch * m.gens["gen_id", "cost_per_MWh_linear"]).sum() + + if capacity_expansion: + m.minimize += (m.Build_Out.map(m.gens[["gen_id", "type"]]) * capex).sum() + m.minimize += ( + m.Build_Out * m.gens["gen_id", "hourly_overhead_per_MW_capacity"] + ).sum() * len(m.hours) + + if security_constrained: + self.add_security_constraints(m) + + if yearly_limits: + self.add_yearly_limits(m) + + if variable_capacity_factors: + self.add_vcf(m) + + return m + + def add_security_constraints(self, m: Model): + bodf = Param("branch_outage_dist_facts.parquet") + + # Power flow on outage_line * factor + affected_line_flow <= affected_line_rating + conting_flow = m.Power_Flow.over( + "outage_line_id" + ).drop_extras() + m.Power_Flow.rename( + {"line_id": "outage_line_id"} + ) * bodf.rename({"affected_line_id": "line_id"}) + line_rating = m.line_rating.over("outage_line_id", "datetime").drop_extras() + m.Con_Contingency_Pos = conting_flow <= line_rating + m.Con_Contingency_Neg = -line_rating <= conting_flow + + def add_vcf(self, m: Model): + vcf = Param("variable_capacity_factors.parquet").within(m.hours) + vcf_type_to_type = pl.read_csv("map_type_to_vcf_type.csv") + + m.Con_Variable_Dispatch_Limit = m.Dispatch.drop_extras() <= ( + vcf.map(vcf_type_to_type).map(m.gens[["gen_id", "type"]]) + * m.gens["gen_id", "Pmax"] + ) + + def add_yearly_limits(self, m: Model): + yearly_limits = Param("yearly_limits.parquet") + m.Con_Yearly_Limits = m.Dispatch.map(m.gens[["gen_id", "type"]]).sum( + "datetime" + ).drop_extras() <= yearly_limits * len(m.hours) / (24 * 365) + + def write_results( + self, + model, + capacity_expansion: bool = True, + security_constrained: bool = True, + **kwargs, + ): + model.Power_Flow.solution.join( + model.Power_Flow_ub.dual.rename({"dual": "ub_dual"}), + on=["line_id", "datetime"], + ).join( + model.Power_Flow_lb.dual.rename({"dual": "lb_dual"}), + on=["line_id", "datetime"], + ).write_parquet("power_flow.parquet") + + if capacity_expansion: + model.Build_Out.solution.write_parquet("build_out.parquet") + model.Dispatch.solution.write_parquet("dispatch.parquet") + model.Load_Unserved.solution.write_parquet("load_unserved.parquet") + model.Con_Power_Balance.dual.write_parquet("power_balance_duals.parquet") + + +if __name__ == "__main__": + base_dir = Path(__file__).parent + benchmark = Bench( + size=1, + input_dir=base_dir / "model_data", + results_dir=base_dir / "results_pyoframe", + verbose=True, + capacity_expansion=True, + security_constrained=True, + yearly_limits=True, + variable_capacity_factors=True, + ) + m = benchmark.run() diff --git a/benchmarks/src/energy_planning/bm_pyomo.py b/benchmarks/src/energy_planning/bm_pyomo.py new file mode 100644 index 00000000..1dcdd9ec --- /dev/null +++ b/benchmarks/src/energy_planning/bm_pyomo.py @@ -0,0 +1,307 @@ +from pathlib import Path + +import polars as pl +import pyomo.environ as pyo +from benchmark_utils.pyomo import Benchmark + + +class Bench(Benchmark): + def build( + self, + capacity_expansion: bool = True, + security_constrained: bool = True, + yearly_limits: bool = True, + variable_capacity_factors: bool = True, + **kwargs, + ): + m = pyo.ConcreteModel() + m.dual = pyo.Suffix(direction=pyo.Suffix.IMPORT) + + assert self.input_dir is not None, "Input directory must be specified." + + ## LOAD DATA + gens = pl.read_parquet("generators.parquet") + lines = pl.read_parquet("lines_simplified.parquet") + loads_df = pl.read_parquet("loads.parquet") + capex = pl.read_csv("capex_costs.csv") + cost_params = pl.read_csv("cost_parameters.csv") + + BASE_MW = 100 + COST_UNSERVED_LOAD = cost_params.filter(name="load_unserved_MWh")["cost"].item() + SLACK_BUS = 1 + + # shrink time horizon + hours = loads_df.get_column("datetime").unique().sort() + if self.size is not None: + hours = hours.head(self.size) + loads_df = loads_df.filter(pl.col("datetime").is_in(hours.implode())) + + # ------------------------- + # SETS + # ------------------------- + m.G = pyo.Set(initialize=gens["gen_id"].to_list()) + m.L = pyo.Set(initialize=lines["line_id"].to_list()) + m.T = pyo.Set(initialize=hours.to_list()) + + m.B = pyo.Set( + initialize=sorted( + set(lines["from_bus"].to_list() + lines["to_bus"].to_list()) + ) + ) + + # ------------------------- + # PARAMETERS + # ------------------------- + m.gen_bus = dict(zip(gens["gen_id"], gens["bus"])) + m.gen_type = dict(zip(gens["gen_id"], gens["type"])) + m.gen_pmax = dict(zip(gens["gen_id"], gens["Pmax"])) + m.gen_cost = dict(zip(gens["gen_id"], gens["cost_per_MWh_linear"])) + m.gen_overhead = dict( + zip(gens["gen_id"], gens["hourly_overhead_per_MW_capacity"]) + ) + m.line_from = dict(zip(lines["line_id"], lines["from_bus"])) + m.line_to = dict(zip(lines["line_id"], lines["to_bus"])) + m.line_rating = dict(zip(lines["line_id"], lines["line_rating_MW"])) + m.susceptance = { + lid: 1 / x for lid, x in zip(lines["line_id"], lines["reactance"]) + } + m.load = dict( + zip(zip(loads_df["bus"], loads_df["datetime"]), loads_df["active_load"]) + ) + m.capex = dict(zip(capex["type"], capex["yearly_capex_cost_per_KW"])) + + m.LOADS = pyo.Set(initialize=m.load.keys(), dimen=2) + + m.gens_at_bus = {b: [g for g in m.G if m.gen_bus[g] == b] for b in m.B} + m.lines_to_bus = {b: [l for l in m.L if m.line_to[l] == b] for b in m.B} + m.lines_from_bus = {b: [l for l in m.L if m.line_from[l] == b] for b in m.B} + + # ------------------------- + # VARIABLES + # ------------------------- + if capacity_expansion: + m.Build_Out = pyo.Var(m.G, bounds=lambda m, g: (0, m.gen_pmax[g])) + m.Dispatch = pyo.Var(m.G, m.T, within=pyo.NonNegativeReals) + m.Dispatch_ub = pyo.Constraint( + m.G, + m.T, + rule=lambda m, g, t: m.Dispatch[g, t] <= m.Build_Out[g], + ) + else: + m.Dispatch = pyo.Var(m.G, m.T, bounds=lambda m, g, t: (0, m.gen_pmax[g])) + + m.Voltage_Angle = pyo.Var(m.B, m.T) + + m.Power_Flow = pyo.Var(m.L, m.T) + m.Power_Flow_ub = pyo.Constraint( + m.L, m.T, rule=lambda m, l, t: m.Power_Flow[l, t] <= m.line_rating[l] + ) + m.Power_Flow_lb = pyo.Constraint( + m.L, m.T, rule=lambda m, l, t: m.Power_Flow[l, t] >= -m.line_rating[l] + ) + + m.Load_Unserved = pyo.Var(m.LOADS, bounds=lambda m, b, t: (0, m.load[b, t])) + + # ------------------------- + # CONSTRAINTS + # ------------------------- + # Slack bus + m.Con_Slack_Bus = pyo.Constraint( + m.T, rule=lambda m, t: m.Voltage_Angle[SLACK_BUS, t] == 0 + ) + + m.Con_Power_Flow = pyo.Constraint( + m.L, + m.T, + rule=lambda m, l, t: ( + m.Power_Flow[l, t] + == ( + BASE_MW + * m.susceptance[l] + * ( + m.Voltage_Angle[m.line_to[l], t] + - m.Voltage_Angle[m.line_from[l], t] + ) + ) + ), + ) + + m.Con_Power_Balance = pyo.Constraint( + m.B, + m.T, + rule=lambda m, b, t: ( + m.load.get((b, t), 0) + == sum(m.Dispatch[g, t] for g in m.gens_at_bus[b]) + + sum(m.Power_Flow[l, t] for l in m.lines_to_bus[b]) + - sum(m.Power_Flow[l, t] for l in m.lines_from_bus[b]) + + (m.Load_Unserved[b, t] if (b, t) in m.LOADS else 0) + ), + ) + + m.Obj = pyo.Objective( + sense=pyo.minimize, + rule=lambda m: ( + sum(COST_UNSERVED_LOAD * m.Load_Unserved[b, t] for b, t in m.LOADS) + + sum(m.gen_cost[g] * m.Dispatch[g, t] for g in m.G for t in m.T) + + ( + ( + sum( + m.capex[m.gen_type[g]] * m.Build_Out[g] + for g in m.G + if m.gen_type[g] in m.capex + ) + + sum( + m.gen_overhead[g] * m.Build_Out[g] * len(m.T) for g in m.G + ) + ) + if capacity_expansion + else 0 + ) + ), + ) + + if security_constrained: + self.add_security_constraints(m) + + if yearly_limits: + self.add_yearly_limits(m) + + if variable_capacity_factors: + self.add_vcf(m) + + return m + + def add_security_constraints(self, m): + bodf = pl.read_parquet("branch_outage_dist_facts.parquet").select( + ["outage_line_id", "affected_line_id", "factor"] + ) + bodf_dict = {(row[0], row[1]): row[2] for row in bodf.iter_rows()} + + m.CONTIG = pyo.Set(initialize=bodf_dict.keys(), dimen=2) + + m.Con_Contingency_Pos = pyo.Constraint( + m.CONTIG, + m.T, + rule=lambda m, out, aff, t: ( + m.Power_Flow[aff, t] + bodf_dict[out, aff] * m.Power_Flow[out, t] + <= m.line_rating[aff] + ), + ) + + m.Con_Contingency_Neg = pyo.Constraint( + m.CONTIG, + m.T, + rule=lambda m, out, aff, t: ( + m.Power_Flow[aff, t] + bodf_dict[out, aff] * m.Power_Flow[out, t] + >= -m.line_rating[aff] + ), + ) + + def add_vcf(self, m): + vcf_df = pl.read_parquet("variable_capacity_factors.parquet") + vcf_type_map = pl.read_csv("map_type_to_vcf_type.csv") + + type_to_vcf_type = dict(zip(vcf_type_map["type"], vcf_type_map["vcf_type"])) + + vcf_dict = { + (row["vcf_type"], row["datetime"]): row["capacity_factor"] + for row in vcf_df.iter_rows(named=True) + if row["datetime"] in m.T + } + m.Con_Variable_Dispatch_Limit = pyo.Constraint( + m.G, + m.T, + rule=lambda m, g, t: ( + m.Dispatch[g, t] + <= vcf_dict[type_to_vcf_type[m.gen_type[g]], t] * m.gen_pmax[g] + ) + if m.gen_type[g] in type_to_vcf_type + else pyo.Constraint.Skip, + ) + + def add_yearly_limits(self, m): + yearly_limits_df = pl.read_parquet("yearly_limits.parquet") + yearly_limit = dict(zip(yearly_limits_df["type"], yearly_limits_df["limit"])) + + m.TYPES_WITH_LIMIT = pyo.Set(initialize=yearly_limit.keys()) + m.Con_Yearly_Limits = pyo.Constraint( + m.TYPES_WITH_LIMIT, + rule=lambda m, cat: ( + sum(m.Dispatch[g, t] for g in m.G for t in m.T if m.gen_type[g] == cat) + <= yearly_limit[cat] * len(m.T) / (24 * 365) + ), + ) + + def write_results( + self, + model, + capacity_expansion: bool = True, + security_constrained: bool = True, + **kwargs, + ): + power_flow_keys = ["line_id", "datetime"] + create_result_df_for_var(model.Power_Flow, *power_flow_keys).join( + create_result_df_for_constr( + model, model.Power_Flow_ub, *power_flow_keys + ).rename({"dual": "ub_dual"}), + on=power_flow_keys, + ).join( + create_result_df_for_constr( + model, model.Power_Flow_lb, *power_flow_keys + ).rename({"dual": "lb_dual"}), + on=power_flow_keys, + ).write_parquet("power_flow.parquet") + + if capacity_expansion: + create_result_df_for_var(model.Build_Out, "gen_id").write_parquet( + "build_out.parquet" + ) + create_result_df_for_var(model.Dispatch, "gen_id", "datetime").write_parquet( + "dispatch.parquet" + ) + create_result_df_for_var(model.Load_Unserved, "bus", "datetime").write_parquet( + "load_unserved.parquet" + ) + create_result_df_for_constr( + model, model.Con_Power_Balance, "bus", "datetime" + ).write_parquet("power_balance_duals.parquet") + + +def create_result_df_for_var(var, *key_names): + if len(key_names) > 1: + data = { + key: [key[i] for key in var.index_set()] for i, key in enumerate(key_names) + } + else: + data = {key_names[0]: [key for key in var.index_set()]} + + data["solution"] = [pyo.value(var[key]) for key in var.index_set()] + + return pl.DataFrame(data) + + +def create_result_df_for_constr(m, constr, *key_names): + if len(key_names) > 1: + data = { + key: [key[i] for key in constr.index_set()] + for i, key in enumerate(key_names) + } + else: + data = {key_names[0]: [key for key in constr.index_set()]} + + data["dual"] = [m.dual[constr[key]] for key in constr.index_set()] + return pl.DataFrame(data) + + +if __name__ == "__main__": + base_dir = Path(__file__).parent + benchmark = Bench( + size=1, + input_dir=base_dir / "model_data", + results_dir=base_dir / "results_pyomo", + security_constrained=True, + capacity_expansion=False, + yearly_limits=False, + variable_capacity_factors=False, + ) + benchmark.run() diff --git a/benchmarks/src/energy_planning/bm_pyoptinterface.py b/benchmarks/src/energy_planning/bm_pyoptinterface.py new file mode 100644 index 00000000..11ff1dc9 --- /dev/null +++ b/benchmarks/src/energy_planning/bm_pyoptinterface.py @@ -0,0 +1,385 @@ +import types + +import polars as pl +import pyoptinterface as poi +from benchmark_utils.pyoptinterface import Benchmark + + +class Bench(Benchmark): + def build( + self, + capacity_expansion: bool = True, + security_constrained: bool = True, + yearly_limits: bool = True, + variable_capacity_factors: bool = True, + **kwargs, + ): + assert len(kwargs) == 0, f"Unexpected kwargs: {kwargs}" + assert self.input_dir is not None, "Input directory must be specified." + + model = super().build(**kwargs) + + container = types.SimpleNamespace() + self.container = container + + ## LOAD DATA + gens = pl.read_parquet("generators.parquet") + lines = pl.read_parquet("lines_simplified.parquet") + loads_df = pl.read_parquet("loads.parquet") + capex = pl.read_csv("capex_costs.csv") + cost_params = pl.read_csv("cost_parameters.csv") + + BASE_MW = 100 + COST_UNSERVED_LOAD = cost_params.filter(name="load_unserved_MWh")["cost"].item() + SLACK_BUS = 1 + + # shrink time horizon + hours = loads_df.get_column("datetime").unique().sort() + if self.size is not None: + hours = hours.head(self.size) + loads_df = loads_df.filter(pl.col("datetime").is_in(hours.implode())) + + # ------------------------- + # PARAMETERS + # ------------------------- + container.gen_bus = poi.tupledict(dict(zip(gens["gen_id"], gens["bus"]))) + container.gen_type = poi.tupledict(dict(zip(gens["gen_id"], gens["type"]))) + container.gen_pmax = poi.tupledict(dict(zip(gens["gen_id"], gens["Pmax"]))) + container.gen_cost = poi.tupledict( + dict(zip(gens["gen_id"], gens["cost_per_MWh_linear"])) + ) + container.gen_overhead = poi.tupledict( + zip(gens["gen_id"], gens["hourly_overhead_per_MW_capacity"]) + ) + container.line_from = poi.tupledict( + dict(zip(lines["line_id"], lines["from_bus"])) + ) + container.line_to = poi.tupledict(dict(zip(lines["line_id"], lines["to_bus"]))) + container.line_rating = poi.tupledict( + dict(zip(lines["line_id"], lines["line_rating_MW"])) + ) + container.susceptance = poi.tupledict( + {lid: 1 / x for lid, x in zip(lines["line_id"], lines["reactance"])} + ) + container.load = poi.tupledict( + zip(zip(loads_df["bus"], loads_df["datetime"]), loads_df["active_load"]) + ) + container.capex = poi.tupledict( + zip(capex["type"], capex["yearly_capex_cost_per_KW"]) + ) + + # ------------------------- + # SETS + # ------------------------- + container.G = gens["gen_id"] + container.L = lines["line_id"] + container.T = hours + + container.B = pl.concat([lines["from_bus"], lines["to_bus"]]).unique() + + container.LOADS = container.load.keys() + + # ------------------------- + # COMPUTED PARAMS + # ------------------------- + container.gens_at_bus = poi.tupledict( + { + b: [g for g in container.G if container.gen_bus[g] == b] + for b in container.B + } + ) + container.lines_to_bus = poi.tupledict( + { + b: [l for l in container.L if container.line_to[l] == b] + for b in container.B + } + ) + + container.lines_from_bus = poi.tupledict( + { + b: [l for l in container.L if container.line_from[l] == b] + for b in container.B + } + ) + + # ------------------------- + # VARIABLES + # ------------------------- + if capacity_expansion: + container.Build_Out = poi.make_tupledict( + container.G, + rule=lambda g: model.add_variable(lb=0, ub=container.gen_pmax[g]), + ) + container.Dispatch = model.add_variables(container.G, container.T, lb=0) + container.Dispatch_ub = poi.make_tupledict( + container.G, + container.T, + rule=lambda g, t: model.add_linear_constraint( + container.Dispatch[g, t] <= container.Build_Out[g] + ), + ) + else: + container.Dispatch = poi.make_tupledict( + container.G, + container.T, + rule=lambda g, t: model.add_variable(lb=0, ub=container.gen_pmax[g]), + ) + + container.Voltage_Angle = model.add_variables(container.B, container.T) + + container.Power_Flow = model.add_variables(container.L, container.T) + container.Power_Flow_ub = poi.make_tupledict( + container.L, + container.T, + rule=lambda l, t: model.add_linear_constraint( + container.Power_Flow[l, t] <= container.line_rating[l] + ), + ) + container.Power_Flow_lb = poi.make_tupledict( + container.L, + container.T, + rule=lambda l, t: model.add_linear_constraint( + container.Power_Flow[l, t] >= -container.line_rating[l] + ), + ) + + container.Load_Unserved = poi.make_tupledict( + container.LOADS, + rule=lambda b, t: model.add_variable(lb=0, ub=container.load[b, t]), + ) + + # ------------------------- + # CONSTRAINTS + # ------------------------- + # Slack bus + container.Con_Slack_Bus = poi.make_tupledict( + container.T, + rule=lambda t: model.add_linear_constraint( + container.Voltage_Angle[SLACK_BUS, t] == 0 + ), + ) + + container.Con_Power_Flow = poi.make_tupledict( + container.L, + container.T, + rule=lambda l, t: model.add_linear_constraint( + container.Power_Flow[l, t] + == ( + BASE_MW + * container.susceptance[l] + * ( + container.Voltage_Angle[container.line_to[l], t] + - container.Voltage_Angle[container.line_from[l], t] + ) + ) + ), + ) + + container.Con_Power_Balance = poi.make_tupledict( + container.B, + container.T, + rule=lambda b, t: model.add_linear_constraint( + container.load.get((b, t), 0) + == poi.quicksum( + container.Dispatch[g, t] for g in container.gens_at_bus[b] + ) + + poi.quicksum( + container.Power_Flow[l, t] for l in container.lines_to_bus[b] + ) + - poi.quicksum( + container.Power_Flow[l, t] for l in container.lines_from_bus[b] + ) + + (container.Load_Unserved[b, t] if (b, t) in container.LOADS else 0) + ), + ) + + container.Obj = model.set_objective( + ( + poi.quicksum( + COST_UNSERVED_LOAD * container.Load_Unserved[b, t] + for b, t in container.LOADS + ) + + poi.quicksum( + container.gen_cost[g] * container.Dispatch[g, t] + for g in container.G + for t in container.T + ) + + ( + ( + poi.quicksum( + container.capex[container.gen_type[g]] + * container.Build_Out[g] + for g in container.G + if container.gen_type[g] in container.capex + ) + + poi.quicksum( + container.gen_overhead[g] + * container.Build_Out[g] + * len(container.T) + for g in container.G + ) + ) + if capacity_expansion + else 0 + ) + ), + poi.ObjectiveSense.Minimize, + ) + + if security_constrained: + self.add_security_constraints(model, container) + + if yearly_limits: + self.add_yearly_limits(model, container) + + if variable_capacity_factors: + self.add_vcf(model, container) + + return model + + def add_security_constraints(self, model, container): + bodf = pl.read_parquet("branch_outage_dist_facts.parquet").select( + "outage_line_id", "affected_line_id", "factor" + ) + bodf_dict = poi.tupledict( + {(row[0], row[1]): row[2] for row in bodf.iter_rows()} + ) + + conting_flow = poi.make_tupledict( + bodf_dict.keys(), + container.T, + rule=lambda out, aff, t: ( + container.Power_Flow[aff, t] + + bodf_dict[out, aff] * container.Power_Flow[out, t] + ), + ) + + container.Con_Contingency_Pos = poi.make_tupledict( + conting_flow.keys(), + rule=lambda out, aff, t: model.add_linear_constraint( + conting_flow[out, aff, t] <= container.line_rating[aff] + ), + ) + + container.Con_Contingency_Neg = poi.make_tupledict( + conting_flow.keys(), + rule=lambda out, aff, t: model.add_linear_constraint( + -container.line_rating[aff] <= conting_flow[out, aff, t] + ), + ) + + def add_vcf(self, model, container): + vcf_df = pl.read_parquet("variable_capacity_factors.parquet") + vcf_type_map = pl.read_csv("map_type_to_vcf_type.csv") + + type_to_vcf_type = poi.tupledict( + dict(zip(vcf_type_map["type"], vcf_type_map["vcf_type"])) + ) + + vcf_dict = { + (row["vcf_type"], row["datetime"]): row["capacity_factor"] + for row in vcf_df.iter_rows(named=True) + if row["datetime"] in container.T + } + container.Con_Variable_Dispatch_Limit = poi.make_tupledict( + container.G, + container.T, + rule=lambda g, t: model.add_linear_constraint( + container.Dispatch[g, t] + <= vcf_dict[type_to_vcf_type[container.gen_type[g]], t] + * container.gen_pmax[g] + ) + if container.gen_type[g] in type_to_vcf_type + else None, + ) + + def add_yearly_limits(self, model, container): + yearly_limits_df = pl.read_parquet("yearly_limits.parquet") + yearly_limit = poi.tupledict( + dict(zip(yearly_limits_df["type"], yearly_limits_df["limit"])) + ) + + container.Con_Yearly_Limits = poi.make_tupledict( + yearly_limit.keys(), + rule=lambda cat: model.add_linear_constraint( + poi.quicksum( + container.Dispatch[g, t] + for g in container.G + for t in container.T + if container.gen_type[g] == cat + ) + <= yearly_limit[cat] * len(container.T) / (24 * 365) + ), + ) + + def write_results( + self, + model, + capacity_expansion: bool = True, + security_constrained: bool = True, + **kwargs, + ): + container = self.container + + power_flow_keys = ["line_id", "datetime"] + _tuple_dict_to_df(model, container.Power_Flow, *power_flow_keys).join( + _tuple_dict_to_df( + model, container.Power_Flow_ub, *power_flow_keys, dual=True + ).rename({"dual": "ub_dual"}), + on=power_flow_keys, + ).join( + _tuple_dict_to_df( + model, container.Power_Flow_lb, *power_flow_keys, dual=True + ).rename({"dual": "lb_dual"}), + on=power_flow_keys, + ).write_parquet("power_flow.parquet") + + if capacity_expansion: + _tuple_dict_to_df(model, container.Build_Out, "gen_id").write_parquet( + "build_out.parquet" + ) + _tuple_dict_to_df( + model, container.Dispatch, "gen_id", "datetime" + ).write_parquet("dispatch.parquet") + _tuple_dict_to_df( + model, container.Load_Unserved, "bus", "datetime" + ).write_parquet("load_unserved.parquet") + + _tuple_dict_to_df( + model, container.Con_Power_Balance, "bus", "datetime", dual=True + ).write_parquet("power_balance_duals.parquet") + + +def _tuple_dict_to_df(model, tdict, *key_names, dual=False): + if dual: + tdict = tdict.map( + lambda x: model.get_constraint_attribute(x, poi.ConstraintAttribute.Dual) + ) + else: + tdict = tdict.map(model.get_value) + + if len(key_names) > 1: + gen_expr = ((*key, value) for key, value in tdict.items()) + else: + gen_expr = ((key, value) for key, value in tdict.items()) + return pl.DataFrame( + gen_expr, + orient="row", + schema=list(key_names) + ["dual" if dual else "solution"], + ) + + +if __name__ == "__main__": + from pathlib import Path + + base_dir = Path(__file__).parent + benchmark = Bench( + size=1, + input_dir=base_dir / "model_data", + results_dir=base_dir / "results_pyoptinterface", + security_constrained=True, + capacity_expansion=True, + yearly_limits=True, + variable_capacity_factors=True, + ) + benchmark.run() diff --git a/benchmarks/src/energy_planning/model_data/.snakemake_timestamp b/benchmarks/src/energy_planning/model_data/.snakemake_timestamp new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/src/energy_planning/model_data/branch_outage_dist_facts.parquet b/benchmarks/src/energy_planning/model_data/branch_outage_dist_facts.parquet new file mode 100644 index 00000000..524c8fcd Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/branch_outage_dist_facts.parquet differ diff --git a/benchmarks/src/energy_planning/model_data/capex_costs.csv b/benchmarks/src/energy_planning/model_data/capex_costs.csv new file mode 100644 index 00000000..e2bdc431 --- /dev/null +++ b/benchmarks/src/energy_planning/model_data/capex_costs.csv @@ -0,0 +1,10 @@ +type,yearly_capex_cost_per_KW +Solar PV,59.67404719885268 +Nuclear,380.81794849770006 +Geothermal,324.112942393661 +Coal,195.2797913163305 +Biopower,238.904638837437 +CSP,258.99525386313184 +Hydropower,162.0668222019773 +Wind,70.397661179339 +Natural Gas,72.12940494501015 diff --git a/benchmarks/src/energy_planning/model_data/cost_parameters.csv b/benchmarks/src/energy_planning/model_data/cost_parameters.csv new file mode 100644 index 00000000..05d3c29b --- /dev/null +++ b/benchmarks/src/energy_planning/model_data/cost_parameters.csv @@ -0,0 +1,2 @@ +name,cost +load_unserved_MWh,35000 \ No newline at end of file diff --git a/benchmarks/src/energy_planning/model_data/generators.parquet b/benchmarks/src/energy_planning/model_data/generators.parquet new file mode 100644 index 00000000..2d1cf15f Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/generators.parquet differ diff --git a/benchmarks/src/energy_planning/model_data/lines_simplified.parquet b/benchmarks/src/energy_planning/model_data/lines_simplified.parquet new file mode 100644 index 00000000..b38603e1 Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/lines_simplified.parquet differ diff --git a/benchmarks/src/energy_planning/model_data/loads.parquet b/benchmarks/src/energy_planning/model_data/loads.parquet new file mode 100644 index 00000000..7432647b Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/loads.parquet differ diff --git a/benchmarks/src/energy_planning/model_data/map_type_to_vcf_type.csv b/benchmarks/src/energy_planning/model_data/map_type_to_vcf_type.csv new file mode 100644 index 00000000..37db9d5a --- /dev/null +++ b/benchmarks/src/energy_planning/model_data/map_type_to_vcf_type.csv @@ -0,0 +1,4 @@ +type,vcf_type +Solar PV,solar +CSP,solar +Wind,wind \ No newline at end of file diff --git a/benchmarks/src/energy_planning/model_data/variable_capacity_factors.parquet b/benchmarks/src/energy_planning/model_data/variable_capacity_factors.parquet new file mode 100644 index 00000000..ef9bd2e5 Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/variable_capacity_factors.parquet differ diff --git a/benchmarks/src/energy_planning/model_data/yearly_limits.parquet b/benchmarks/src/energy_planning/model_data/yearly_limits.parquet new file mode 100644 index 00000000..d08da99d Binary files /dev/null and b/benchmarks/src/energy_planning/model_data/yearly_limits.parquet differ diff --git a/benchmarks/src/energy_planning/raw_data/constants/cost_parameters.csv b/benchmarks/src/energy_planning/raw_data/constants/cost_parameters.csv new file mode 100644 index 00000000..05d3c29b --- /dev/null +++ b/benchmarks/src/energy_planning/raw_data/constants/cost_parameters.csv @@ -0,0 +1,2 @@ +name,cost +load_unserved_MWh,35000 \ No newline at end of file diff --git a/benchmarks/src/energy_planning/raw_data/constants/map_type_to_vcf_type.csv b/benchmarks/src/energy_planning/raw_data/constants/map_type_to_vcf_type.csv new file mode 100644 index 00000000..37db9d5a --- /dev/null +++ b/benchmarks/src/energy_planning/raw_data/constants/map_type_to_vcf_type.csv @@ -0,0 +1,4 @@ +type,vcf_type +Solar PV,solar +CSP,solar +Wind,wind \ No newline at end of file diff --git a/benchmarks/src/energy_planning/scripts/A_parse_matpower_case.py b/benchmarks/src/energy_planning/scripts/A_parse_matpower_case.py new file mode 100644 index 00000000..a46ad8db --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/A_parse_matpower_case.py @@ -0,0 +1,147 @@ +import marimo + +__generated_with = "0.18.4" +app = marimo.App() + + +@app.cell +def _(): + import polars as pl + + return (pl,) + + +@app.cell +def _(): + MATPOWER_CASE = "../raw_data/downloads/CaliforniaTestSystem.m" + BUS_OUTPUT_PATH = "../raw_data/preprocessed/matpower_bus.parquet" + BRANCH_OUTPUT_PATH = "../raw_data/preprocessed/matpower_branch.parquet" + GEN_OUTPUT_PATH = "../raw_data/preprocessed/matpower_gen.parquet" + return BRANCH_OUTPUT_PATH, BUS_OUTPUT_PATH, GEN_OUTPUT_PATH, MATPOWER_CASE + + +@app.cell +def _(MATPOWER_CASE): + # Load the MATPOWER case file + with open(MATPOWER_CASE) as f: + matpower_case = f.read() + matpower_case[:100] + return (matpower_case,) + + +@app.cell +def _(matpower_case, pl): + # Create tables for each section + tables = {} + ignored_sections = ("version", "baseMVA") + header = None + for i, section in enumerate(matpower_case.split("\nmpc.")): + if i == 0: + continue + section_name, _, section_content = section.partition("=") + section_content, _, next_header = section_content.partition( + ";" + ) # before first element nothing to do + section_name = section_name.strip() + section_content = section_content.strip("[]\n ") + next_header = next_header.strip() + if section_name in ignored_sections: + header = next_header + print(f"Ignoring: {section_name}") + continue + print(f"Processing: {section_name}") + assert header is not None, f"Last section should not be: {section_name}" + header = header.split("\n") + header = [h.strip() for h in header] + header = [h for h in header if h.startswith("%")] + header = header[-1].strip("%").strip() + header = header.split() + section_content = [ + [val for val in row.strip().split("\t")] + for row in section_content.split("\n") + ] + _table = pl.DataFrame(section_content, schema=header, orient="row") + tables[section_name] = _table + header = next_header + tables + return (tables,) + + +@app.cell +def _(pl, tables): + # Clean tables (we do not expect any strings!) + tables2 = {} + for table_name, _table in tables.items(): + for _col in _table.columns: + _table = _table.with_columns(pl.col(_col).cast(pl.Float64)) + if (_table[_col].round() == _table[_col]).all(): + _table = _table.with_columns(pl.col(_col).cast(pl.Int64)) + if _table[_col].is_in([0, 1]).all(): + _table = _table.with_columns(pl.col(_col).cast(pl.Boolean)) + _table = _table.with_columns(pl.col(_col).shrink_dtype()) + tables2[table_name] = _table + tables2 + return (tables2,) + + +@app.cell +def _(tables2): + assert {"bus", "gen", "branch", "gencost"} == set(tables2.keys()) + assert tables2["gen"].height == tables2["gencost"].height + bus, gen, branch, gencost = ( + tables2["bus"], + tables2["gen"], + tables2["branch"], + tables2["gencost"], + ) + assert (gencost["n"] == 3).all() + assert (gencost["2"] == 2).all() + gencost = gencost.drop("2", "n").rename( + {"c(n-1)": "cost_per_(MW)^2_h", "...": "cost_per_MWh", "c0": "cost_per_h"} + ) + gencost + return branch, bus, gen, gencost + + +@app.cell +def _(gen, gencost, pl): + gen_merged = pl.concat( + [gen.with_row_index(name="gen_id"), gencost], how="horizontal" + ) + gen_merged + return (gen_merged,) + + +@app.cell +def _(branch, bus, gen_merged): + # Drop columns with a single unique value + tables3 = (bus, branch, gen_merged) + tables4 = [] + for _table in tables3: + to_drop = [] + for _col in _table.columns: + if len(_table[_col].unique()) == 1: + to_drop.append(_col) + tables4.append(_table.drop(*to_drop)) + bus_1, branch_1, gen_merged_1 = tables4 + bus_1, branch_1, gen_merged_1 + return branch_1, bus_1, gen_merged_1 + + +@app.cell +def _( + BRANCH_OUTPUT_PATH, + BUS_OUTPUT_PATH, + GEN_OUTPUT_PATH, + branch_1, + bus_1, + gen_merged_1, +): + bus_1.write_parquet(BUS_OUTPUT_PATH) + branch_1.write_parquet(BRANCH_OUTPUT_PATH) + gen_merged_1.write_parquet(GEN_OUTPUT_PATH) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/B2_extract_load_data.py b/benchmarks/src/energy_planning/scripts/B2_extract_load_data.py new file mode 100644 index 00000000..0eecb2fb --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/B2_extract_load_data.py @@ -0,0 +1,139 @@ +import marimo + +__generated_with = "0.19.2" +app = marimo.App() + + +@app.cell +def _(): + import polars as pl + + DEBUG = False + return DEBUG, pl + + +@app.cell +def _(): + CATS_DATA = "../raw_data/downloads/CATS_loads.csv" + LOAD_DATA_OUT = "../raw_data/preprocessed/loads.parquet" + return CATS_DATA, LOAD_DATA_OUT + + +@app.cell +def _(CATS_DATA, pl): + # read dataframe + df = pl.scan_csv(CATS_DATA, has_header=False) + df.head().collect() + return (df,) + + +@app.cell +def _(df, pl): + # unpivot + df2 = ( + df.with_row_index(name="bus", offset=1) + .unpivot(index="bus", value_name="load", variable_name="hour") + .with_columns(pl.col("hour").str.strip_prefix("column_").cast(pl.Int32)) + .sort("bus", "hour") + # cache result since .unpivot cannot be streamed yet https://github.com/pola-rs/polars/issues/20947 + .collect() + .lazy() + ) + df2.collect() + return (df2,) + + +@app.cell +def _(DEBUG, df2, pl): + if DEBUG: + issues = df2.filter(~pl.col("load").str.ends_with("i")).collect() + if not issues.is_empty(): + raise ValueError(f"Some loads don't end with 'i' as expected\n: {issues}") + return + + +@app.cell +def _(df2, pl): + # keep only active loads + df3 = df2.with_columns( + pl.col("load") + .str.split_exact("+", 1) + .struct.field("field_0") + .cast(pl.Float64) + .alias("active_load") + ).drop("load") + + df3.head().collect() + return (df3,) + + +@app.cell +def _(df3, pl): + # parse date + df4 = df3.with_columns( + (pl.col("hour") - 1) * pl.duration(hours=1) + pl.datetime(2019, 1, 1) + ).rename({"hour": "datetime"}) + df4.head(25).collect() + return (df4,) + + +@app.cell +def _(DEBUG, df2, df4, pl): + # Confirm loads are always positive (i.e. dataset doesn't include generation) and drop zero loads + if DEBUG: + assert df4.filter(pl.col("active_load") < 0).collect().height == 0, ( + "Negative active loads found!" + ) + + df5 = df4.filter(pl.col("active_load") != 0) + + print( + df5.explain(optimized=True) + ) # Check the query is optimal (i.e. filter is done before other operations) + + if DEBUG: + df5 = df5.collect().lazy() # cache to avoid double computation + print( + f"{df5.collect().height / df2.collect().height:.2%} of loads are non-zero." + ) + return (df5,) + + +@app.cell +def _(DEBUG, df5, pl): + _plt = None + if DEBUG: + load_ca = df5.group_by("datetime").sum().drop("bus").collect() + _plt = ( + load_ca.group_by(pl.col("datetime").dt.hour()) + .mean() + .plot.line(x="datetime", y="active_load") + .properties(title="Average Load by Hour in California") + ) + _plt + return (load_ca,) + + +@app.cell +def _(DEBUG, load_ca, pl): + _plt = None + if DEBUG: + _plt = ( + load_ca.group_by(pl.col("datetime").dt.month()) + .mean() + .plot.line(x="datetime", y="active_load") + .properties(title="Average Load by Month in California") + ) + _plt + return + + +@app.cell +def _(LOAD_DATA_OUT, df5): + # Save processed data + df5.sink_parquet(LOAD_DATA_OUT) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/B_extract_generator_data.py b/benchmarks/src/energy_planning/scripts/B_extract_generator_data.py new file mode 100644 index 00000000..39d43129 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/B_extract_generator_data.py @@ -0,0 +1,385 @@ +import marimo + +__generated_with = "0.20.4" +app = marimo.App() + + +@app.cell +def _(): + import altair as alt + import marimo as mo + import polars as pl + + alt.data_transformers.enable("vegafusion") + return alt, mo, pl + + +@app.cell +def _(mo): + mo.md(r""" + ## Merge CATS and MATPOWER data + """) + return + + +@app.cell +def _(): + CATS_DATA = "../raw_data/downloads/CATS_generators.csv" + MATPOWER_DATA = "../raw_data/preprocessed/matpower_gen.parquet" + OUTPUT_PATH = "../raw_data/preprocessed/generators.parquet" + return CATS_DATA, MATPOWER_DATA, OUTPUT_PATH + + +@app.cell +def _(CATS_DATA, pl): + # Load cats data + cats_df = pl.read_csv(CATS_DATA, encoding="iso-8859-1") + cats_df + return (cats_df,) + + +@app.cell +def _(cats_df, pl): + # Clean CATS data + + # Order matters, lets add this before anything else happens + cats_df_clean = cats_df.with_row_index(name="gen_id") + + # file has whitespaces that we must strip + cats_df_clean.columns = [c.strip() for c in cats_df_clean.columns] + cats_df_clean = cats_df_clean.with_columns( + pl.col( + c + for c, t in zip(cats_df_clean.columns, cats_df_clean.dtypes) + if (t == pl.String) + ).str.strip_chars() + ) + + # now that they're removed we can convert to numbers + cats_df_clean = cats_df_clean.with_columns( + pl.col("PlantCode").cast(pl.Int64), + pl.col("Lat").cast(pl.Float64), + pl.col("Lon").cast(pl.Float64), + pl.col("bus").cast(pl.Int64), + ) + cats_df_clean + return (cats_df_clean,) + + +@app.cell +def _(MATPOWER_DATA, cats_df_clean, pl): + # Load matpower data and check that they line up + matpower_df = pl.read_parquet(MATPOWER_DATA) + matpower_df + + from polars.testing import assert_frame_equal + + assert_frame_equal(matpower_df[["gen_id", "bus"]], cats_df_clean[["gen_id", "bus"]]) + return assert_frame_equal, matpower_df + + +@app.cell +def _(assert_frame_equal, cats_df_clean, matpower_df): + # Now let's merge with the matpower data to get costs + joined_df = cats_df_clean.join( + matpower_df, on=["gen_id", "bus"], how="full", coalesce=True, validate="1:1" + ) + + # Check that these columns are identical across both DFs, then remove + duplicate_cols = ["Qmax", "Qmin", "Pg", "Pmax"] + for c in duplicate_cols: + assert_frame_equal( + joined_df[[c]], joined_df[[f"{c}_right"]].rename({f"{c}_right": c}) + ) + joined_df = joined_df.drop(f"{c}_right") + + joined_df + return (joined_df,) + + +@app.cell +def _(mo): + mo.md(r""" + ## Analyze and clean data + """) + return + + +@app.cell +def _(joined_df, pl): + # Remove reactive elements + assert (joined_df["Pmin"] == 0).all() + df_clean1 = joined_df.drop("Pmin") + + reactive_elements = df_clean1.filter(pl.col("Pmax") == 0) + assert ( + reactive_elements["FuelType"].is_null().all() + and (reactive_elements["Qmax"] != 0).all() + ), "Expected reactive elements to have Pmax == 0 and Qmax != 0" + df_clean1 = df_clean1.filter(pl.col("Pmax") > 0).drop("Qmax", "Qmin", "Qg") + df_clean1 + return (df_clean1,) + + +@app.cell +def _(df_clean1, pl): + # Plot Pg and ultimately drop it since it only represents an hour (as evident by the constant capacity factor) + _plot = df_clean1.with_columns( + capacity_factor=pl.col("Pg") / pl.col("Pmax") + ).plot.scatter(x="Pmax", y="capacity_factor", color="FuelType") + df_clean2 = df_clean1.drop("Pg") + _plot + return (df_clean2,) + + +@app.cell +def _(mo): + mo.md(r""" + ### Cost analysis + """) + return + + +@app.cell +def _(mo): + mo.md(r""" + Costs were constructed in + > J. M. Snodgrass, "Tractable Algorithms for Constructing Electric Power Network Models," Ph.D. Thesis, The University of Wisconsin - Madison, 2021. https://github.com/WISPO-POP/SyntheticElectricNetworkModels/blob/master/Synthetic%20Generator%20Cost%20Curves/Creating%20synthetic%20generator%20cost%20curves.pdf + + We confirm that often the fixed overhead costs (`cost_per_h`) often scale with capacity so we choose to integrate them into the capacity expansion (CAPEX) costs. + + Meanwhile, the cost curves show little curvature meaning that a linear approximation is good enough (rather than a quadratic cost curve). + """) + return + + +@app.cell +def _(alt, df_clean2): + # Plot the various costs. Notice how fixed overheads (cost_per_h) often scale with capacity based on assumptions from data cons + x_axis = alt.X("Pmax", title="Max Capacity (MW)") + ( + df_clean2.plot.scatter(x=x_axis, y="cost_per_h", color="FuelType") + | df_clean2.plot.scatter(x=x_axis, y="cost_per_MWh", color="FuelType") + | df_clean2.plot.scatter(x=x_axis, y="cost_per_(MW)^2_h", color="FuelType") + ) + return + + +@app.cell +def _(alt, df_clean2, pl): + # Now ignoring fixed costs, plot normalized cost curves. Notice how the quadratic component is quite small! + import numpy as np + + x_vals = list(np.linspace(0, 1, 11)) + + assert (df_clean2["cost_per_(MW)^2_h"] >= 0).all() + cost_analysis = ( + df_clean2.with_columns( + cost_Pmax=pl.col("cost_per_(MW)^2_h") * pl.col("Pmax") ** 2 + + pl.col("cost_per_MWh") * pl.col("Pmax"), + ) + .filter(pl.col("cost_Pmax") != 0) + .with_columns( + cost_a=pl.col("cost_per_(MW)^2_h") + * pl.col("Pmax") ** 2 + / pl.col("cost_Pmax"), + cost_b=pl.col("cost_per_MWh") * pl.col("Pmax") / pl.col("cost_Pmax"), + ) + .unique(["cost_a", "cost_b", "cost_per_h", "FuelType"]) + .sort("cost_a", descending=True) + .sample(100) + .with_columns(x=pl.lit(x_vals)) + .explode("x") + .with_columns( + y=pl.col("cost_a") * pl.col("x") ** 2 + pl.col("cost_b") * pl.col("x"), + Px=pl.col("Pmax") * pl.col("x"), + ) + ) + cost_analysis.plot.line( + x=alt.X("x", title="Fraction of Pmax"), + y=alt.Y("y", title="Fraction of max cost"), + detail="gen_id", + color="FuelType", + ).mark_line(strokeWidth=0.2) + return + + +@app.cell +def _(alt, df_clean2, pl): + # Develop linear approximation and hourly overhead per MW capacity + df_clean3 = df_clean2.with_columns( + cost_per_MWh_linear=pl.col("cost_per_MWh") + + pl.col("cost_per_(MW)^2_h") * pl.col("Pmax"), + hourly_overhead_per_MW_capacity=pl.col("cost_per_h") / pl.col("Pmax"), + ).drop("cost_per_h") + ( + df_clean3.plot.scatter(x="Pmax", y="cost_per_MWh_linear", color="FuelType") + | df_clean3.plot.scatter( + x="Pmax", + y=alt.Y("hourly_overhead_per_MW_capacity", scale=alt.Scale(type="symlog")), + color="FuelType", + ) + ) + return (df_clean3,) + + +@app.cell +def _(mo): + mo.md(r""" + ### Fuel merging + """) + return + + +@app.cell +def _(alt, df_clean3): + # Inspect fuel types + df_clean3.group_by("FuelType").sum().plot.bar( + x=alt.X("FuelType", sort=alt.SortField(field="Pmax", order="descending")), + y=alt.Y("Pmax", title="Total Capacity (MW)"), + color="FuelType", + ) + return + + +@app.cell +def _(alt, df_clean3, pl): + # Merge fuel types and remove storage as well as 'all other' + + MAPPING = { + "Wood/Wood Waste Biomass": "Biopower", + "Municipal Solid Waste": "Biopower", + "Other Waste Biomass": "Biopower", + "Landfill Gas": "Biopower", + "Solar Photovoltaic": "Solar PV", + "Solar Thermal without Energy Storage": "CSP", + "Conventional Hydroelectric": "Hydropower", + "Onshore Wind Turbine": "Wind", + "Conventional Steam Coal": "Coal", + "Natural Gas Fired Combined Cycle": "Natural Gas", + "Natural Gas Fired Combustion Turbine": "Natural Gas", + "Natural Gas Steam Turbine": "Natural Gas", + "Natural Gas Internal Combustion Engine": "Natural Gas", + } + + DROP_TYPES = [ + "Batteries", + "All Other", + "Hydroelectric Pumped Storage", + "Petroleum Coke", + "Other Gases", + "Petroleum Liquids", + ] + + df_clean4 = df_clean3.with_columns(pl.col("FuelType").replace(MAPPING)).rename( + {"FuelType": "type"} + ) + + # Remove storage to simplify model and 'all other' since it's negligible + prior_capacity = df_clean4["Pmax"].sum() + df_clean4 = df_clean4.filter(~pl.col("type").is_in(DROP_TYPES)) + final_capacity = df_clean4["Pmax"].sum() + all_other_proportion = (prior_capacity - final_capacity) / prior_capacity + + df_clean4.group_by("type").sum().plot.bar( + x=alt.X("type", sort=alt.SortField(field="Pmax", order="descending")), + y=alt.Y("Pmax", title="Total Capacity (MW)"), + color="type", + ) + return all_other_proportion, df_clean4 + + +@app.cell +def _(all_other_proportion): + f"Removed {all_other_proportion:.2%} of total capacity" + return + + +@app.cell +def _(df_clean4, pl): + # Remove mBase since it's always 0 + assert (df_clean4["mBase"] == 0).all() + df_clean5 = df_clean4.drop("mBase") + + # Merge PlantCode and GenID + _SEP = "/" + assert df_clean5["GenID"].str.contains(_SEP).sum() == 0, ( + f"Cannot use {_SEP} as separator since GenID contains it" + ) + df_clean5 = df_clean5.with_columns( + pl.concat_str( + [pl.col("PlantCode").cast(pl.Utf8), pl.col("GenID").cast(pl.Utf8)], + separator=_SEP, + ).alias("PlantAndGenID") + ).drop("PlantCode", "GenID") + + df_clean5 + return (df_clean5,) + + +@app.cell +def _(mo): + mo.md(r""" + ### Aggregate like generators + """) + return + + +@app.cell +def _(df_clean5, pl): + # Fuse plants that are identical and have costs + _unique_cols = [ + "bus", + "type", + "cost_per_MWh_linear", + "hourly_overhead_per_MW_capacity", + ] + df_clean6 = df_clean5.group_by(_unique_cols).agg( + pl.col("gen_id").min(), + pl.col("Pmax").sum(), + pl.col("PlantAndGenID").unique(), + pl.col("Lat", "Lon").mean(), + ) + # Since Julia doesn't supported nested schemas + df_clean6 = df_clean6.with_columns(pl.col("PlantAndGenID").list.join(";")) + + ( + f"Reduced number of dispatchable generators by {len(df_clean5) - len(df_clean6)} by fusing identical plants.", + df_clean6, + ) + return (df_clean6,) + + +@app.cell +def _(df_clean6, pl): + # Produce summary table + summary_table = ( + df_clean6.group_by("type") + .agg( + n=pl.len(), + total_capacity_MW=pl.col("Pmax").sum().round(0), + median_cost_per_MWh_linear=pl.col("cost_per_MWh_linear").median().round(0), + meidan_capex_overhead_per_MWh=pl.col("hourly_overhead_per_MW_capacity") + .median() + .round(0), + ) + .sort("total_capacity_MW", descending=True) + ) + summary_table, summary_table.select("n", "total_capacity_MW").sum() + return + + +@app.cell +def _(df_clean6): + assert df_clean6["gen_id"].is_unique().all() + return + + +@app.cell +def _(OUTPUT_PATH, df_clean6): + df_clean6.write_parquet(OUTPUT_PATH) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/C1_simplify_network.py b/benchmarks/src/energy_planning/scripts/C1_simplify_network.py new file mode 100644 index 00000000..dedac142 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/C1_simplify_network.py @@ -0,0 +1,284 @@ +"""Simplifies the network topology using various tricks and identifies leaf nodes. + +Tricks are: + - combining lines in parallel, + - removing lines that lead nowhere, + - and removing intermediary buses with no loads or generators. + +Line ratings and reactances are updated accordingly. +Line IDs are roughly preserved; merged lines take on the IDs of the original lines. +Node IDs are preserved, but the number of nodes is reduced. + +As of Aug 12, 2025, this script reduces the number of lines by 14.5%, mainly due to lines in series. + +Leaf nodes are nodes containing either loads or generators that can be wholly disconnected from the +grid by a single contingency. They are flagged so such contingencies are not considered. +""" + +from pathlib import Path + +import polars as pl + +f_bus = pl.col("from_bus") +t_bus = pl.col("to_bus") +voltage = pl.col("voltage_kv") +EXPECTED_COLS = ["line_id", f_bus, t_bus, "reactance", "line_rating_MW", voltage] + + +def get_buses_degree( + lines: pl.DataFrame, degree: int, exclude: pl.Series | None = None +) -> pl.Series: + """Returns a Series of buses with degree `degree` that are not in `exclude`.""" + lines = ( + pl.concat([lines["from_bus"], lines["to_bus"]]) + .value_counts(name="degree") + .filter(degree=degree) + ) + if exclude is not None: + lines = lines.filter(~f_bus.is_in(exclude.implode())) + + return lines.get_column("from_bus") + + +def remove_dead_ends(lines, buses_to_keep): + """Remove lines going from or to a bus with degree 1 (no other connections) since the line serves no purpose.""" + dead_end_buses = get_buses_degree(lines, degree=1, exclude=buses_to_keep) + + return lines.filter( + ~f_bus.is_in(dead_end_buses.implode()) & ~t_bus.is_in(dead_end_buses.implode()) + ), len(dead_end_buses) + + +def combine_parallel_lines(lines: pl.DataFrame): + """Combine lines in parallel (e.g., two lines going from bus 1 to bus 2).""" + initial = lines.height + lines = swap_direction(lines, condition=f_bus > t_bus) + lines = ( + lines.with_columns((1 / pl.col("reactance")).alias("reactance")) + .group_by([f_bus, t_bus, voltage]) + .agg( + pl.col("reactance").sum(), + pl.col("line_id").min(), + pl.col("line_rating_MW").sum(), + ) + .with_columns((1 / pl.col("reactance")).alias("reactance")) + ) + return lines, initial - lines.height + + +def swap_direction( + lines: pl.DataFrame, + condition: pl.Expr, + from_col=f_bus, + to_col=t_bus, +): + """Swap the direction of lines based on a condition.""" + return lines.with_columns( + pl.when(condition) + .then(pl.struct(from_bus=to_col, to_bus=from_col)) + .otherwise(pl.struct(from_col, to_col)) + .struct.unnest() + ) + + +def combine_sequential_line(lines: pl.DataFrame, buses_to_keep: pl.Series): + """Combine lines in series (e.g., a line going from bus 1 to bus 2 with a line going from bus 2 to bus 3). + + This is done by doing two types of combinations: + 1. Combining line series of length 2 (e.g. A-B-C becomes A-C). + 2. Combining the first two lines in a series of length 3 or more (in series of length 3, A-B-C-D becomes A-C-D; in series of length 4 or more A-B-C-D-...-X-Y-Z becomes A-C-D-...-X-Z). + Note that this second type of operation doesn't fully combine the series, but if this function is called repeatedly, eventually all series will be combined. + """ + bus_to_remove = get_buses_degree(lines, degree=2, exclude=buses_to_keep).implode() + + l_edge = lines.filter(f_bus.is_in(bus_to_remove) ^ t_bus.is_in(bus_to_remove)) + l_middle = lines.filter(f_bus.is_in(bus_to_remove) & t_bus.is_in(bus_to_remove)) + l_keep = lines.filter(~f_bus.is_in(bus_to_remove) & ~t_bus.is_in(bus_to_remove)) + + # order such that the series bus is in "to_bus" + l_edge = swap_direction(l_edge, condition=f_bus.is_in(bus_to_remove)) + + l_edge_short = l_edge.filter(t_bus.is_duplicated()) + l_edge_long = l_edge.filter(~t_bus.is_duplicated()) + + l_edge_short = ( + l_edge_short.rename({"to_bus": "mid_bus"}) + .group_by("mid_bus") + .agg( + pl.col("line_id").min(), + f_bus.first(), + to_bus=f_bus.last(), + line_rating_MW=pl.col("line_rating_MW").min(), + reactance=pl.col("reactance").sum(), + # voltage should be identical so we could just use .first(), but if voltage has issue .mean() will reveal the problem + voltage_kv=voltage.mean(), + ) + .drop("mid_bus") + ) + + l_middle = swap_direction( + l_middle, condition=~t_bus.is_in(l_edge_long["to_bus"].implode()) + ) + + l_edge_long_uncombined = l_edge_long.join(l_middle, on=t_bus, how="anti") + l_edge_long = l_edge_long.join(l_middle, on=t_bus, how="inner") + l_middle_uncombined = l_middle.join(l_edge_long, on=t_bus, how="anti") + + l_edge_long = l_edge_long.select( + "line_id", + f_bus, + pl.col("from_bus_right").alias("to_bus"), + pl.min_horizontal("line_rating_MW", "line_rating_MW_right").alias( + "line_rating_MW" + ), + pl.sum_horizontal("reactance", "reactance_right").alias("reactance"), + voltage, + ) + + initial = lines.height + lines = pl.concat( + [ + l_keep.select(EXPECTED_COLS), + l_edge_short.select(EXPECTED_COLS), + l_edge_long.select(EXPECTED_COLS), + l_edge_long_uncombined.select(EXPECTED_COLS), + l_middle_uncombined.select(EXPECTED_COLS), + ] + ) + return lines, initial - lines.height + + +def remove_self_connections(lines): + """Remove lines that connect a bus to itself (e.g., a line going from bus 1 to bus 1).""" + initial = lines.height + lines = lines.filter(f_bus != t_bus) + return lines, initial - lines.height + + +def identify_leafs(lines, buses_to_keep): + """Flags lines that are at the very edge of the grid. + + These lines will later be excluded from branch outages since they would effectively split the network. + """ + lines = lines.with_columns(is_leaf=pl.lit(False)) + passes = 0 + + while True: + leaf_buses = get_buses_degree(lines.filter(~pl.col("is_leaf")), 1) + if len(leaf_buses) == 0: + break + if passes == 0: + assert leaf_buses.is_in(buses_to_keep.implode()).all(), ( + "Dead end buses were not properly removed" + ) + + lines = lines.with_columns( + is_leaf=( + pl.col("is_leaf") + | f_bus.is_in(leaf_buses.implode()) + | t_bus.is_in(leaf_buses.implode()) + ) + ) + passes += 1 + print( + f"Detected {lines.filter('is_leaf').height} leaf buses after {passes} passes." + ) + return lines + + +def main_loop(lines, buses_to_keep): + """Simplify the network until there is no more simplification possible.""" + lines = lines.select(*EXPECTED_COLS) + + num_lines_initial = lines.height + lines_removed = 0 + lines_removed_prev = float("-inf") + while lines_removed > lines_removed_prev: + lines_removed_prev = lines_removed + lines, lines_merged = combine_parallel_lines(lines) + if lines_merged != 0: + print(f"Removed {lines_merged} lines in parallel...") + + lines, dead_ends = remove_dead_ends(lines, buses_to_keep) + if dead_ends != 0: + print(f"Removed {dead_ends} dead-end lines...") + + lines_concatenated = 0 + while True: + lines, new_lines_concatenated = combine_sequential_line( + lines, buses_to_keep + ) + if new_lines_concatenated == 0: + break + lines_concatenated += new_lines_concatenated + if lines_concatenated != 0: + print(f"Removed {lines_concatenated} lines in series...") + + lines, self_connections = remove_self_connections(lines) + if self_connections != 0: + print(f"Removed {self_connections} lines connected to themselves...") + + lines_removed += ( + dead_ends + lines_merged + lines_concatenated + self_connections + ) + + print( + f"Removed {lines_removed / num_lines_initial:.2%} of lines in total (l={lines.height})." + ) + + lines = identify_leafs(lines, buses_to_keep) + + return lines + + +def main( + lines_path: Path | str, + gens_path: Path | str, + loads_path: Path | str, + output_path: Path | str, +): + lines = pl.read_parquet(lines_path) + num_buses = len(lines["from_bus"].append(lines["to_bus"]).unique()) + print(f"Network has {lines.height} lines and {num_buses} buses.") + + gens = pl.scan_parquet(gens_path) + loads = pl.scan_parquet(loads_path) + transformers = lines.filter(pl.col("transformer")).lazy() + buses_to_keep = ( + pl.concat( + [ + gens.select("bus"), + loads.select("bus"), + transformers.select(bus=f_bus), + transformers.select(bus=t_bus), + ], + how="vertical_relaxed", + ) + .unique() + .collect() + .to_series() + .sort() + ) + print( + f"{len(buses_to_keep)} buses with loads or generators that should not be removed." + ) + + lines = main_loop(lines, buses_to_keep) + + num_buses_simplified = len(lines["from_bus"].append(lines["to_bus"]).unique()) + + print( + f"Simplified network has {lines.height} lines and {num_buses_simplified} buses." + ) + + lines.write_parquet(output_path) + + +if __name__ == "__main__": + input_dir = Path("../raw_data/") + main( + lines_path=input_dir / "lines.parquet", + gens_path=input_dir / "generators.parquet", + loads_path=input_dir / "loads.parquet", + output_path=input_dir / "simplified_lines.parquet", + ) diff --git a/benchmarks/src/energy_planning/scripts/C2_compute_power_transfer_dist_facts.py b/benchmarks/src/energy_planning/scripts/C2_compute_power_transfer_dist_facts.py new file mode 100644 index 00000000..84d36d60 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/C2_compute_power_transfer_dist_facts.py @@ -0,0 +1,220 @@ +import marimo + +__generated_with = "0.19.2" +app = marimo.App() + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Compute Power Transfer Distribution Factors + """) + return + + +@app.cell +def _(): + import marimo as mo + import polars as pl + from scipy.sparse import coo_array, diags_array + from sksparse.cholmod import cholesky + + return cholesky, coo_array, diags_array, mo, pl + + +@app.cell +def _(): + DF_CUTOFF = 1e-10 + return (DF_CUTOFF,) + + +@app.cell +def _(): + LINES_DATA = "../raw_data/preprocessed/lines_simplified.parquet" + OUTPUT_PATH = "../raw_data/preprocessed/power_transfer_dist_facts.parquet" + return LINES_DATA, OUTPUT_PATH + + +@app.cell +def _(LINES_DATA, pl): + lines = pl.read_parquet( + LINES_DATA, columns=["line_id", "from_bus", "to_bus", "reactance"] + ) + L = lines.height + return L, lines + + +@app.cell +def _(lines): + # Step 1: Renumber lines from 1 to L contiguously. + lines_1 = lines.sort("line_id") + lines_1 = lines_1.with_row_index(name="line_index") + lines_map = lines_1.select("line_id", "line_index") + lines_1 = lines_1.drop("line_id") + lines_1 + return lines_1, lines_map + + +@app.cell +def _(lines_1, pl): + # Step 2: Renumber buses such that they go from 1 to N contiguously. + bus_map = ( + pl.concat([lines_1["from_bus"], lines_1["to_bus"]]) + .unique() + .sort() + .rename("bus") + .to_frame() + .with_row_index(name="bus_index") + ) + N = bus_map.height + lines_2 = ( + lines_1.join(bus_map, left_on="from_bus", right_on="bus", coalesce=True) + .drop("from_bus") + .rename({"bus_index": "from_bus"}) + .join(bus_map, left_on="to_bus", right_on="bus", coalesce=True) + .drop("to_bus") + .rename({"bus_index": "to_bus"}) + ) + lines_2 + return N, bus_map, lines_2 + + +@app.cell +def _(N, lines_2, pl): + # Step 3: Form an admittance matrix in COO form (row_index (i), col_index (j), value) + lines_3 = lines_2.with_columns(suscep=1 / lines_2["reactance"]) + off_diagonal = pl.concat( + [ + lines_3.select( + pl.col("from_bus").alias("i"), + pl.col("to_bus").alias("j"), + -pl.col("suscep").alias("val"), + ), + lines_3.select( + pl.col("to_bus").alias("i"), + pl.col("from_bus").alias("j"), + -pl.col("suscep").alias("val"), + ), + ] + ) + diagonal = ( + pl.concat( + [ + lines_3.select("suscep", i=pl.col("from_bus")), + lines_3.select("suscep", i=pl.col("to_bus")), + ] + ) + .group_by("i") + .agg(pl.sum("suscep").alias("val")) + .select("i", pl.col("i").alias("j"), "val") + ) + assert len(diagonal) == N, "Diagonal entries should match the number of buses." + admittance_matrix_df = pl.concat([off_diagonal, diagonal]) + admittance_matrix_df + return admittance_matrix_df, lines_3 + + +@app.cell +def _(N, admittance_matrix_df, coo_array, pl): + # Step 4: Drop last row and column (slack bus). + admittance_matrix_df_1 = admittance_matrix_df.filter( + pl.col("i") < N - 1, pl.col("j") < N - 1 + ) + admittance_matrix = coo_array( + ( + admittance_matrix_df_1["val"].to_numpy(), + ( + admittance_matrix_df_1["i"].cast(pl.Float64).to_numpy(), + admittance_matrix_df_1["j"].cast(pl.Float64).to_numpy(), + ), + ), + shape=(N - 1, N - 1), + ) + admittance_matrix + return (admittance_matrix,) + + +@app.cell +def _(admittance_matrix, cholesky): + # Step 5: Inverse the admittance matrix to get voltage angles. + # We use the sparse Cholesky factorization since, based on my tests, this is much faster than using any of the following: + # scipy.linalg.inv, scipy.sparse.linalg.inv, scipy.linag.pinv, scipy.sparse.linalg.spsolve + factor = cholesky(admittance_matrix.tocsc()) + voltage_angles = factor.inv() + voltage_angles + return (voltage_angles,) + + +@app.cell +def _(L, N, coo_array, diags_array, lines_3, pl, voltage_angles): + # Step 6: Calculate the power flow by multiplying the voltage angles by diag(S)*A^T + # A^T is the L by N adjacency matrix + adjacency_matrix_T = pl.concat( + [ + lines_3.select( + i=pl.col("line_index"), j=pl.col("from_bus"), val=pl.lit(1) + ).filter(pl.col("j") < N - 1), + lines_3.select( + i=pl.col("line_index"), j=pl.col("to_bus"), val=pl.lit(-1) + ).filter(pl.col("j") < N - 1), + ] + ) + adjacency_matrix_T = coo_array( + ( + adjacency_matrix_T["val"].to_numpy(), + (adjacency_matrix_T["i"].to_numpy(), adjacency_matrix_T["j"].to_numpy()), + ), + shape=(L, N - 1), + ) + diag_suscep = diags_array(lines_3.sort("line_index")["suscep"].to_numpy()) + power_flow = diag_suscep @ adjacency_matrix_T @ voltage_angles + power_flow = power_flow.tocoo() + power_flow_df = pl.DataFrame( + { + "injection": power_flow.col, + "line_index": power_flow.row, + "factor": power_flow.data, + } + ) + power_flow_df # Exclude slack bus, # Exclude slack bus + return (power_flow_df,) + + +@app.cell +def _(DF_CUTOFF, pl, power_flow_df): + # Step 7: Filter out small values + power_flow_df_1 = power_flow_df.filter(pl.col("factor").abs() > DF_CUTOFF) + power_flow_df_1 + return (power_flow_df_1,) + + +@app.cell +def _(bus_map, lines_map, power_flow_df_1): + # Step 8: Unmap buses and lines to original IDs. + power_flow_df_unmapped = ( + power_flow_df_1.join( + bus_map, + left_on="injection", + right_on="bus_index", + coalesce=True, + validate="m:1", + ) + .drop("injection") + .rename({"bus": "injection"}) + .join(lines_map, on="line_index") + .drop("line_index") + .select("injection", "line_id", "factor") + .sort("injection", "line_id") + ) + power_flow_df_unmapped + return (power_flow_df_unmapped,) + + +@app.cell +def _(OUTPUT_PATH, power_flow_df_unmapped): + power_flow_df_unmapped.write_parquet(OUTPUT_PATH) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/C3_compute_branch_outage_dist_facts.py b/benchmarks/src/energy_planning/scripts/C3_compute_branch_outage_dist_facts.py new file mode 100644 index 00000000..f36756ab --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/C3_compute_branch_outage_dist_facts.py @@ -0,0 +1,183 @@ +import marimo + +__generated_with = "0.19.2" +app = marimo.App() + + +@app.cell +def _(): + # See https://pypsa.readthedocs.io/en/latest/user-guide/contingency-analysis.html#branch-outage-distribution-factors-bodf + import altair as alt + import polars as pl + + alt.data_transformers.enable("vegafusion") + return alt, pl + + +@app.cell +def _(): + DF_PERC_CUTOFF = 0.02 + MIN_KV = 115 + ISLAND_SENSITIVITY = 1e-5 + return DF_PERC_CUTOFF, ISLAND_SENSITIVITY, MIN_KV + + +@app.cell +def _(): + LINES_DATA = "../raw_data/preprocessed/lines_simplified.parquet" + PTDF_DATA = "../raw_data/preprocessed/power_transfer_dist_facts.parquet" + OUTPUT_PATH = "../raw_data/preprocessed/branch_outage_dist_facts.parquet" + return LINES_DATA, OUTPUT_PATH, PTDF_DATA + + +@app.cell +def _(PTDF_DATA, pl): + ptdf = pl.read_parquet(PTDF_DATA) + ptdf + return (ptdf,) + + +@app.cell +def _(LINES_DATA, pl): + lines = pl.read_parquet( + LINES_DATA, + columns=[ + "line_id", + "from_bus", + "to_bus", + "is_leaf", + "voltage_kv", + "line_rating_MW", + ], + ) + lines + return (lines,) + + +@app.cell +def _(MIN_KV, lines, pl): + # Step 1: Remove leaf lines because a contingency would create an island. + lines_1 = lines.filter(~pl.col("is_leaf")).drop("is_leaf") + # Step 1b: Remove low-voltage lines (distribution system) (and keep transformers) + lines_1 = lines_1.filter( + (pl.col("voltage_kv") >= MIN_KV) | pl.col("voltage_kv").is_null() + ) + lines_1 + return (lines_1,) + + +@app.cell +def _(lines_1, pl, ptdf): + # Step 2: Compute the difference in PTDF for every from and to bus. + bptdf = ( + lines_1.rename({"line_id": "outage_line_id"}) + .join( + ptdf.rename({"injection": "from_bus", "factor": "injection"}), on="from_bus" + ) + .join( + ptdf.rename({"injection": "to_bus", "factor": "withdrawal"}), + on=["to_bus", "line_id"], + ) + .select( + "outage_line_id", + "line_id", + factor=pl.col("injection") - pl.col("withdrawal"), + ) + ) + bptdf + return (bptdf,) + + +@app.cell +def _(bptdf, pl): + # Step 3: Separate diagonal and off-diagonal entries + diagonal_entries = bptdf.filter(pl.col("outage_line_id") == pl.col("line_id")).drop( + "line_id" + ) + offdiagonal_entries = bptdf.filter(pl.col("outage_line_id") != pl.col("line_id")) + diagonal_entries + return diagonal_entries, offdiagonal_entries + + +@app.cell +def _(ISLAND_SENSITIVITY, diagonal_entries, pl): + # Step 4: Confirm that there are no bridge lines (lines that would cause the grid to split into two unconnected networks) + bridge_lines = diagonal_entries.filter(pl.col("factor") >= (1 - ISLAND_SENSITIVITY)) + if bridge_lines.height > 0: + raise ValueError( + f"Grid is not well connected\n{diagonal_entries.filter(pl.col('factor') >= (1 - ISLAND_SENSITIVITY))}" + ) + return + + +@app.cell +def _(diagonal_entries, offdiagonal_entries, pl): + # Step 5: Normalize the factors to account for the flow on the line in outage itself + bodf = ( + offdiagonal_entries.join( + diagonal_entries.select("outage_line_id", denominator=1 - pl.col("factor")), + on="outage_line_id", + ) + .with_columns(pl.col("factor") / pl.col("denominator")) + .drop("denominator") + .rename({"line_id": "affected_line_id"}) + ) + bodf + return (bodf,) + + +@app.cell +def _(alt, bodf, lines_1, pl): + # Compute percent increase in line rating + ratings = lines_1[["line_id", "line_rating_MW"]] + bodf_1 = bodf.join( + ratings.rename( + {"line_id": "affected_line_id", "line_rating_MW": "affected_line_rating_MW"} + ), + on="affected_line_id", + ).join( + ratings.rename( + {"line_id": "outage_line_id", "line_rating_MW": "outage_line_rating_MW"} + ), + on="outage_line_id", + ) + bodf_1 = bodf_1.with_columns( + percent_increase=pl.col("outage_line_rating_MW") + * pl.col("factor").abs() + / pl.col("affected_line_rating_MW") + ) + bodf_1 = bodf_1.drop("affected_line_rating_MW", "outage_line_rating_MW") + bodf_1.select(pl.col("percent_increase").log10()).plot.bar( + x=alt.X("percent_increase", bin=True), y="count()" + ) + return (bodf_1,) + + +@app.cell +def _(DF_PERC_CUTOFF, bodf_1, pl): + # Plot distribution of factors + bodf_1.sample(10_000).with_columns( + pl.col("percent_increase", "factor").abs().log10(), + keep=pl.col("percent_increase") > DF_PERC_CUTOFF, + ).plot.scatter(x="factor", y="percent_increase", color="keep") + return + + +@app.cell +def _(DF_PERC_CUTOFF, bodf_1, pl): + # Filter out near zeros + bodf_2 = bodf_1.filter(pl.col("percent_increase") > DF_PERC_CUTOFF) + bodf_2 = bodf_2.drop("percent_increase") + bodf_2 = bodf_2.sort("outage_line_id", "affected_line_id") + bodf_2 + return (bodf_2,) + + +@app.cell +def _(OUTPUT_PATH, bodf_2): + bodf_2.write_parquet(OUTPUT_PATH) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/C_extract_line_data.py b/benchmarks/src/energy_planning/scripts/C_extract_line_data.py new file mode 100644 index 00000000..f96e1b2d --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/C_extract_line_data.py @@ -0,0 +1,256 @@ +import marimo + +__generated_with = "0.18.4" +app = marimo.App() + + +@app.cell +def _(): + import json + + import altair as alt + import marimo as mo + + alt.data_transformers.enable("vegafusion") + + import pandas as pd + import polars as pl + + return alt, json, mo, pd, pl + + +@app.cell +def _(): + CATS_DATA = "../raw_data/downloads/CATS_lines.json" + MATPOWER_DATA = "../raw_data/preprocessed/matpower_branch.parquet" + LINE_DATA_OUT = "../raw_data/preprocessed/lines.parquet" + return CATS_DATA, LINE_DATA_OUT, MATPOWER_DATA + + +@app.cell +def _(mo): + mo.md(r""" + ## Load CATS data + """) + return + + +@app.cell +def _(CATS_DATA, json, pd): + geojson = json.load(open(CATS_DATA)) + df_cats = pd.json_normalize(geojson, record_path=["features"]) + df_cats + return (df_cats,) + + +@app.cell +def _(df_cats, pl): + # Clean data + + # Don't include geometry since some lines are MultiLineStrings (lines with multiple segments) + # and these 2D arrays cannot be represented in the PyArrow format. + + # Simplify column names + df_cats_clean = pl.from_pandas( + df_cats.drop(columns=["geometry.coordinates", "geometry.type"]) + ) + df_cats_clean.columns = [ + c.replace("properties.", "") for c in df_cats_clean.columns + ] + + # # Remove columns with no data and the coords since they're in the geometry column + df_cats_clean = df_cats_clean.drop( + "Lat1", + "Lon1", + "Lat2", + "Lon2", + "type", + "Structure_Type", + "Type", + "Circuit", + "Structure_Material", + ) + + # # Keep only one set of IDS + assert df_cats_clean["CATS_ID"].is_unique().all() + df_cats_clean = df_cats_clean.drop("id").cast({"CATS_ID": pl.Int64}) + # # Improve naming + col_names = { + "CATS_ID": "line_id", + "f_bus": "from_bus", + "t_bus": "to_bus", + "kV": "voltage_kv", + "rate_a": "line_rating", + "br_r": "resistance", + "br_x": "reactance", + "br_b": "total_capactive_susceptance", + } + df_cats_clean = df_cats_clean.rename(col_names) + df_cats_clean = df_cats_clean.select( + list(col_names.values()) + + [c for c in df_cats_clean.columns if c not in col_names.values()] + ) + + # Reorder from_bus and to_bus so from_bus < to_bus + df_cats_clean = df_cats_clean.with_columns( + pl.when(pl.col("from_bus") < pl.col("to_bus")) + .then(pl.col("from_bus")) + .otherwise(pl.col("to_bus")) + .alias("from_bus"), + pl.when(pl.col("from_bus") < pl.col("to_bus")) + .then(pl.col("to_bus")) + .otherwise(pl.col("from_bus")) + .alias("to_bus"), + ) + df_cats_clean + return (df_cats_clean,) + + +@app.cell +def _(df_cats_clean, pl): + assert df_cats_clean.filter(pl.col("from_bus") == pl.col("to_bus")).height == 0, ( + "Lines must not connect a bus to itself" + ) + + assert (df_cats_clean["reactance"] > 0).all(), ( + "Cannot have zero or negative reactance" + ) + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Load MATPOWER data + """) + return + + +@app.cell +def _(MATPOWER_DATA, pl): + df_matpower = pl.read_parquet(MATPOWER_DATA) + df_matpower + return (df_matpower,) + + +@app.cell +def _(df_matpower, pl): + # Clean + df_matpower_clean = df_matpower.with_row_index("line_id", 1) + df_matpower_clean = df_matpower_clean.rename( + { + "f_bus": "from_bus", + "t_bus": "to_bus", + "x": "reactance", + "r": "resistance", + "b": "total_capactive_susceptance", + "rateA": "line_rating", + "ratio": "transformer", + } + ) + + # Reorder from_bus and to_bus so from_bus < to_bus + df_matpower_clean = df_matpower_clean.with_columns( + pl.when(pl.col("from_bus") < pl.col("to_bus")) + .then(pl.col("from_bus")) + .otherwise(pl.col("to_bus")) + .alias("from_bus"), + pl.when(pl.col("from_bus") < pl.col("to_bus")) + .then(pl.col("to_bus")) + .otherwise(pl.col("from_bus")) + .alias("to_bus"), + ) + + df_matpower_clean + return (df_matpower_clean,) + + +@app.cell +def _(mo): + mo.md(r""" + ## Check data matches + """) + return + + +@app.cell +def _(df_cats_clean, df_matpower_clean): + df_joined = df_cats_clean.join( + df_matpower_clean, on=["line_id"], validate="1:1", how="full", coalesce=True + ) + + # Drop columns that are identical + potential_duplicates = [ + c for c in df_joined.columns if f"{c}_right" in df_joined.columns + ] + for c in potential_duplicates: + if df_joined[f"{c}"].equals(df_joined[f"{c}_right"]): + df_joined = df_joined.drop(f"{c}_right") + + df_joined + return (df_joined,) + + +@app.cell +def _(alt, df_joined): + x_axis = alt.X("line_rating", title="Line Rating (?)", scale=alt.Scale(type="log")) + x_axis_right = alt.X( + "line_rating_right", title="Line Rating (MVA)", scale=alt.Scale(type="log") + ) + df_joined.plot.bar( + x=x_axis, y="count()", color="voltage_kv:N" + ) | df_joined.plot.bar(x=x_axis_right, y="count()", color="voltage_kv:N") + return + + +@app.cell +def _(df_joined): + # Matpower values are accurate. For some reason the GIS values were divided by 100. We keep the Matpower values. + assert (df_joined["line_rating"] * 100 == df_joined["line_rating_right"]).all() + df_final = df_joined.drop("line_rating").rename( + {"line_rating_right": "line_rating_MW"} + ) + return (df_final,) + + +@app.cell +def _(LINE_DATA_OUT, df_final): + df_final.write_parquet(LINE_DATA_OUT) + return + + +@app.cell +def _(mo): + mo.md(r""" + # Plot some more + """) + return + + +@app.cell +def _(df_final): + df_final.plot.bar(x="voltage_kv:O", y="count()") + return + + +@app.cell +def _(alt, df_final, pl): + buses_analysis = pl.concat( + [ + df_final.select("voltage_kv", bus=pl.col("from_bus")), + df_final.select("voltage_kv", bus=pl.col("to_bus")), + ] + ) + buses_analysis = buses_analysis.group_by("bus").agg( + pl.col("voltage_kv").mean(), degree=pl.len() + ) + buses_analysis.plot.bar( + x="voltage_kv:O", + y=alt.Y("count()", stack="normalize", title="Distribution of bus degree"), + color="degree:N", + ) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/D_extract_capex_data.py b/benchmarks/src/energy_planning/scripts/D_extract_capex_data.py new file mode 100644 index 00000000..832a2bb7 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/D_extract_capex_data.py @@ -0,0 +1,107 @@ +import marimo + +__generated_with = "0.18.4" +app = marimo.App() + + +@app.cell +def _(): + import altair as alt + import polars as pl + + alt.data_transformers.enable("vegafusion") + return (pl,) + + +@app.cell +def _(): + PAYBACK_PERIOD_YEARS = 20 + return (PAYBACK_PERIOD_YEARS,) + + +@app.cell +def _(): + NREL_DATA = "../raw_data/downloads/NREL_ATB_data.parquet" + OUTPUT_PATH = "../raw_data/preprocessed/capex_costs.csv" + return NREL_DATA, OUTPUT_PATH + + +@app.cell +def _(NREL_DATA, pl): + df = pl.read_parquet(NREL_DATA).cast({"default": pl.Boolean}) + df + return (df,) + + +@app.cell +def _(PAYBACK_PERIOD_YEARS, df, pl): + df_filter = ( + df.filter( + core_metric_parameter="CAPEX", + core_metric_case="R&D", + maturity="Y", + scenario="Moderate", + core_metric_variable=2030, + scale="Utility", + crpyears=str(PAYBACK_PERIOD_YEARS), + ) + .with_columns(has_default=pl.max("default").over("technology").cast(pl.Boolean)) + .filter((pl.col("has_default") & pl.col("default")) | (~pl.col("has_default"))) + ) + + df_filter + return (df_filter,) + + +@app.cell +def _(PAYBACK_PERIOD_YEARS, df_filter, pl): + df_clean = df_filter + for c in df_clean.columns: + if df_clean[c].unique().len() == 1: + df_clean = df_clean.drop(c) + + df_clean = ( + df_clean.group_by("technology_alias") + .agg(pl.col("value").mean()) + .rename({"technology_alias": "type"}) + ) + + df_clean = df_clean.with_columns( + yearly_capex_cost_per_KW=pl.col("value") / PAYBACK_PERIOD_YEARS + ).drop("value") + + df_clean + return (df_clean,) + + +@app.cell +def _(df_clean, pl): + MAPPING = { + "Land-Based Wind": "Wind", + "Utility PV": "Solar PV", + } + + DROP = [ + "Utility-Scale Battery Storage", + "Pumped Storage Hydropower", + "Utility-Scale PV-Plus-Battery", + "Offshore Wind", + "Midsize DW", + "Large DW", + ] + + df_final = df_clean.with_columns( + pl.col("type").replace(MAPPING).str.strip_chars() + ).filter(~pl.col("type").is_in(DROP)) + df_final + return (df_final,) + + +@app.cell +def _(OUTPUT_PATH, df_final): + df_final.write_csv(OUTPUT_PATH) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/E_compute_capacity_factors.py b/benchmarks/src/energy_planning/scripts/E_compute_capacity_factors.py new file mode 100644 index 00000000..4999cb56 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/E_compute_capacity_factors.py @@ -0,0 +1,161 @@ +import marimo + +__generated_with = "0.18.4" +app = marimo.App() + + +@app.cell +def _(): + import altair as alt + import marimo as mo + import polars as pl + + alt.data_transformers.enable("vegafusion") + return mo, pl + + +@app.cell +def _(): + GENERATOR_DATA = "../raw_data/preprocessed/generators.parquet" + HOURLY_GENERATION_DATA = "../raw_data/downloads/CATS_generation.csv" + TYPE_MAPPING = "../raw_data/constants/map_type_to_vcf_type.csv" + YEARLY_LIMIT_OUTPUT = "../raw_data/preprocessed/yearly_limits.parquet" + VCF_OUTPUT = "../raw_data/preprocessed/variable_capacity_factors.parquet" + return ( + GENERATOR_DATA, + HOURLY_GENERATION_DATA, + TYPE_MAPPING, + VCF_OUTPUT, + YEARLY_LIMIT_OUTPUT, + ) + + +@app.cell +def _(TYPE_MAPPING, pl): + df_type_mapping = pl.read_csv(TYPE_MAPPING) + df_type_mapping + VCF_TYPES = df_type_mapping["vcf_type"].unique().to_list() + return VCF_TYPES, df_type_mapping + + +@app.cell +def _(GENERATOR_DATA, pl): + gen = pl.read_parquet(GENERATOR_DATA, columns=["gen_id", "type", "Pmax"]) + gen + return (gen,) + + +@app.cell +def _(HOURLY_GENERATION_DATA, pl): + df = pl.read_csv(HOURLY_GENERATION_DATA, null_values=["NA", "#VALUE!"]) + df = df.with_columns(pl.col("Date").str.to_datetime("%d-%m-%Y %H:%M")) + df = df.rename(lambda c: c.lower()).rename({"date": "datetime"}) + df + return (df,) + + +@app.cell +def _(YEARLY_LIMIT_OUTPUT, df, pl): + # get large hydro upper limit + max_energy_genearation = pl.DataFrame( + {"type": ["Hydropower"], "limit": [df.get_column("large hydro").sum()]} + ) + max_energy_genearation.write_parquet(YEARLY_LIMIT_OUTPUT) + max_energy_genearation + return + + +@app.cell +def _(df_type_mapping, gen, pl): + max_capacity = gen.join(df_type_mapping, on="type", how="inner") + max_capacity = max_capacity.group_by("vcf_type").agg(pl.col("Pmax").sum()) + max_capacity + return (max_capacity,) + + +@app.cell +def _(VCF_TYPES, df): + df2 = df.select(["datetime"] + VCF_TYPES) + df2 + return (df2,) + + +@app.cell +def _(VCF_TYPES, df2, max_capacity, pl): + df3 = df2 + for t in VCF_TYPES: + df3 = df3.with_columns( + pl.col(t) / max_capacity.filter(vcf_type=t).get_column("Pmax") + ) + df3 = df3.unpivot( + index=["datetime"], variable_name="vcf_type", value_name="capacity_factor" + ) + df3 + return (df3,) + + +@app.cell +def _(mo): + mo.md(r""" + # Plot capacity factors + """) + return + + +@app.cell +def _(df3, pl): + _cf_by_hour = ( + df3.group_by(pl.col("datetime").dt.hour().alias("hour"), "vcf_type") + .mean() + .sort("hour") + .drop("datetime") + ) + _cf_by_hour.plot.line(x="hour", y="capacity_factor", color="vcf_type").properties( + title="Capacity Factors by Hour" + ) + return + + +@app.cell +def _(df3, pl): + _cf_by_hour = ( + df3.group_by(pl.col("datetime").dt.month().alias("month"), "vcf_type") + .mean() + .sort("month") + .drop("datetime") + ) + _cf_by_hour.plot.line(x="month", y="capacity_factor", color="vcf_type").properties( + title="Capacity Factors by Month" + ) + return + + +@app.cell +def _(VCF_TYPES, df3): + dist_bucket_edges = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + plot = None + for _t in VCF_TYPES: + subplot = ( + df3.filter(vcf_type=_t) + .get_column("capacity_factor") + .hist(bins=dist_bucket_edges) + .plot.bar(x="breakpoint:O", y="count") + .properties(title=f"{_t.title()} Capacity Factor Distribution") + ) + if plot is None: + plot = subplot + else: + plot |= subplot + plot + return + + +@app.cell +def _(VCF_OUTPUT, df3): + df3.write_parquet(VCF_OUTPUT) + df3 + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/Z_analyze_results.py b/benchmarks/src/energy_planning/scripts/Z_analyze_results.py new file mode 100644 index 00000000..6d2de633 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/Z_analyze_results.py @@ -0,0 +1,172 @@ +import marimo + +__generated_with = "0.19.11" +app = marimo.App() + + +@app.cell +def _(): + from pathlib import Path + + import altair as alt + import marimo as mo + import polars as pl + + alt.data_transformers.enable("vegafusion") + return Path, alt, mo, pl + + +@app.cell +def _(Path): + RESULTS_DIR = Path("../results/") + INPUT_DIR = Path("../model_data/") + return INPUT_DIR, RESULTS_DIR + + +@app.cell +def _(mo): + mo.md(r""" + ## Analayze buildout + """) + return + + +@app.cell +def _(INPUT_DIR, RESULTS_DIR, pl): + buildout = pl.read_parquet(RESULTS_DIR / "build_out.parquet") + gens = pl.read_parquet(INPUT_DIR / "generators.parquet") + gen_data = gens.join(buildout.rename({"solution": "build_mw"}), on="gen_id") + gen_data + return gen_data, gens + + +@app.cell +def _(gen_data, pl): + _plot_data = gen_data.group_by("type").agg(pl.col("build_mw", "Pmax").sum()) + + _plt = _plot_data.plot.bar( + x="type", y="build_mw", color="type" + ) + _plot_data.plot.scatter(x="type", y="Pmax", color="type") + # add title to atlas plot + _plt.properties( + title="Built Capacity Relative to Total Potential Capacity by Generator Type", + # xlabel="Generator Type", + # ylabel="Capacity (MW)", + ) + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Analzye dispatch + """) + return + + +@app.cell +def _(INPUT_DIR, RESULTS_DIR, alt, gens, pl): + dispatch = pl.read_parquet(RESULTS_DIR / "dispatch.parquet").filter( + pl.col("solution") != 0 + ) + + dispatch = dispatch.join(gens.select(["gen_id", "type"]), on="gen_id") + dispatch = dispatch.group_by("type", "datetime").agg(pl.col("solution").sum()) + dispatch = dispatch.join( + dispatch.group_by("type").agg(std=pl.col("solution").std()), on="type" + ).sort("std", "type", "datetime") + _plot = dispatch.plot.area( + x="datetime:T", + y=alt.Y("solution:Q"), + color=alt.Color("type:N", sort=alt.SortField("std", order="descending")), + order=alt.Order("std:Q"), + ) + + load = pl.read_parquet(INPUT_DIR / "loads.parquet") + load = load.group_by("datetime").agg(pl.col("active_load").sum()) + load = load.filter(pl.col("datetime").is_in(dispatch["datetime"].implode())) + _plot += load.plot.line(x="datetime:T", y="active_load", color=alt.value("black")) + _plot + return (load,) + + +@app.cell +def _(mo): + mo.md(r""" + ## Plot congestion and line flow + """) + return + + +@app.cell +def _(RESULTS_DIR, pl): + df_flow = pl.read_parquet(RESULTS_DIR / "power_flow.parquet") + df_flow = df_flow.with_columns( + congested=(pl.col("ub_dual").abs() + pl.col("lb_dual").abs()) > 0.01 + ) + df_flow + return (df_flow,) + + +@app.cell +def _(alt, df_flow, pl): + df_flow_agg = df_flow.group_by("datetime").agg( + pl.col("solution").abs().sum().alias("total_flow") + ) + _left = df_flow_agg.plot.line(x="datetime:T", y="total_flow") + + congested_count = df_flow.group_by("datetime").agg( + pl.col("congested").sum().alias("num_congested") + ) + _right = congested_count.plot.line( + x="datetime:T", + y=alt.Y("num_congested", axis=alt.Axis(orient="right")), + color=alt.value("red"), + ) + chart = alt.layer(_left, _right).resolve_scale(y="independent") + chart + return + + +@app.cell +def _(mo): + mo.md(r""" + ## Plot unserved load + """) + return + + +@app.cell +def _(RESULTS_DIR, pl): + df_unserved = pl.read_parquet(RESULTS_DIR / "load_unserved.parquet") + df_unserved = df_unserved.filter(pl.col("solution") >= 1e-6) + df_unserved + return (df_unserved,) + + +@app.cell +def _(df_unserved, pl): + df_unserved.group_by("bus").agg(pl.sum("solution")).plot.bar( + x="bus:O", y="solution" + ) + return + + +@app.cell +def _(df_unserved, load, pl): + unserved_date = df_unserved.group_by("datetime").agg(pl.sum("solution")) + unserved_date = unserved_date.join( + load.group_by("datetime").agg(pl.col("active_load").sum()), on="datetime" + ) + unserved_date = unserved_date.with_columns( + percent_unserved=pl.col("solution") / pl.col("active_load") + ) + ( + unserved_date.plot.line(x="datetime", y="solution") + | unserved_date.plot.line(x="datetime", y="percent_unserved") + ) + return + + +if __name__ == "__main__": + app.run() diff --git a/benchmarks/src/energy_planning/scripts/__init__.py b/benchmarks/src/energy_planning/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/src/energy_planning/scripts/tests/expected_bodfs.csv b/benchmarks/src/energy_planning/scripts/tests/expected_bodfs.csv new file mode 100644 index 00000000..a46f51d4 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/tests/expected_bodfs.csv @@ -0,0 +1,36 @@ +outage_line_id,affected_line_id,factor +1,2,0.5862068965517241 +1,3,0.4137931034482758 +1,4,-0.5517241379310341 +1,5,0.24137931034482754 +1,6,0.10344827586206888 +1,7,-0.4482758620689653 +1,8,0.3448275862068964 +2,1,0.5862068965517241 +2,3,0.4137931034482759 +2,4,0.24137931034482774 +2,5,-0.5517241379310345 +2,6,0.10344827586206903 +2,7,0.3448275862068968 +2,8,-0.4482758620689654 +3,1,0.5 +3,2,0.5000000000000001 +3,4,0.37500000000000006 +3,5,0.375 +3,6,-0.25 +3,7,0.12500000000000008 +3,8,0.125 +4,1,-0.4571428571428571 +4,2,0.2 +4,3,0.2571428571428571 +4,5,0.3142857142857142 +4,6,-0.4285714285714285 +4,7,0.5428571428571428 +4,8,-0.11428571428571424 +5,1,0.2 +5,2,-0.4571428571428571 +5,3,0.2571428571428571 +5,4,0.3142857142857142 +5,6,-0.4285714285714286 +5,7,-0.11428571428571435 +5,8,0.5428571428571427 diff --git a/benchmarks/src/energy_planning/scripts/tests/expected_ptdfs.csv b/benchmarks/src/energy_planning/scripts/tests/expected_ptdfs.csv new file mode 100644 index 00000000..a6e84ffa --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/tests/expected_ptdfs.csv @@ -0,0 +1,48 @@ +injection,line_id,factor +1,1,1.0 +1,2,0.39130434782608714 +1,3,0.39130434782608714 +1,4,0.21739130434782616 +1,5,0.04347826086956519 +1,6,0.04347826086956519 +1,7,0.3043478260869563 +1,8,0.3478260869565215 +1,9,0.3478260869565215 +1,10,0.9999999999999993 +3,2,0.39130434782608714 +3,3,0.39130434782608714 +3,4,0.21739130434782616 +3,5,0.04347826086956519 +3,6,0.04347826086956519 +3,7,0.3043478260869563 +3,8,0.3478260869565215 +3,9,0.3478260869565215 +3,10,0.9999999999999993 +4,2,-0.188405797101449 +4,3,0.14492753623188426 +4,4,0.0434782608695653 +4,5,0.2753623188405796 +4,6,-0.05797101449275366 +4,7,0.26086956521739113 +4,8,0.5362318840579707 +4,9,0.20289855072463747 +4,10,0.9999999999999993 +5,2,0.04347826086956519 +5,3,0.04347826086956519 +5,4,-0.08695652173913049 +5,5,-0.21739130434782616 +5,6,-0.21739130434782616 +5,7,0.4782608695652173 +5,8,0.26086956521739113 +5,9,0.26086956521739113 +5,10,0.9999999999999993 +6,2,0.14492753623188426 +6,3,-0.188405797101449 +6,4,0.0434782608695653 +6,5,-0.05797101449275366 +6,6,0.2753623188405796 +6,7,0.26086956521739113 +6,8,0.20289855072463747 +6,9,0.5362318840579707 +6,10,0.9999999999999993 +7,10,0.9999999999999996 diff --git a/benchmarks/src/energy_planning/scripts/tests/test_branch_outage_dist_fact.py b/benchmarks/src/energy_planning/scripts/tests/test_branch_outage_dist_fact.py new file mode 100644 index 00000000..491d449a --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/tests/test_branch_outage_dist_fact.py @@ -0,0 +1,92 @@ +"""Tests for the branch outage distribution factor script. + +I made up an example and manualy checked the results were as expected. +""" + +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +import pytest +from benchmark_utils import run_notebook +from polars.testing import assert_frame_equal + +ROOT = Path(__file__).parent.parent.parent.parent.parent + + +def test_mini_grid(): + """Testing the following grid. + + Assume reactances are all 1 except for line 3 where reactance is 0.5. + + 4 + ➚↓↘ + 1➚ ↓ ↘ + ➚ ↓4 ↘7 + ➚ ↓ ↘ + 3→→→→5→→→→→7 + ↘ 3 ↑ 6 ↗ + 2↘ ↑5 ↗8 + ↘ ↑ ↗ + ↘↑↗ + 6 + + """ + lines = pl.DataFrame( + schema=["line_id", "from_bus", "to_bus", "reactance"], + data=[ + [1, 3, 4, 1], + [2, 3, 6, 1], + [3, 3, 5, 2], + [4, 4, 5, 1], + [5, 6, 5, 1], + [6, 5, 7, 1], + [7, 4, 7, 1], + [8, 6, 7, 1], + ], + orient="row", + ).with_columns(is_leaf=pl.lit(False)) + + # Save mini grid data to a temporary file + with TemporaryDirectory() as tmpdirname: + tmpdirname = Path(tmpdirname) + lines.write_parquet(tmpdirname / "lines.parquet") + + run_notebook( + notebook_path=ROOT + / "src/energy_planning/scripts/compute_power_transfer_dist_facts.ipynb", + working_directory=Path(tmpdirname), + first_cell=""" +from unittest.mock import Mock +snakemake = Mock() +snakemake.input = Mock() +snakemake.input.lines = "lines.parquet" +snakemake.output = ["ptdf.parquet"] +""", + ) + + run_notebook( + notebook_path=ROOT + / "src/energy_planning/scripts/compute_branch_outage_dist_facts.ipynb", + working_directory=Path(tmpdirname), + first_cell=""" +from unittest.mock import Mock +snakemake = Mock() +snakemake.input = Mock() +snakemake.input.ptdf = "ptdf.parquet" +snakemake.input.lines = "lines.parquet" +snakemake.output = ["bodf.parquet"] +""", + ) + + bodf = pl.read_parquet(tmpdirname / "bodf.parquet") + + assert_frame_equal( + bodf, + pl.read_csv(Path(__file__).parent / "expected_bodfs.csv"), + check_dtypes=False, + ) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/benchmarks/src/energy_planning/scripts/tests/test_compute_power_distribution_factor.py b/benchmarks/src/energy_planning/scripts/tests/test_compute_power_distribution_factor.py new file mode 100644 index 00000000..a68b2882 --- /dev/null +++ b/benchmarks/src/energy_planning/scripts/tests/test_compute_power_distribution_factor.py @@ -0,0 +1,82 @@ +"""Tests for the compute power distribution factor script. + +I made up an example and manualy checked the results were as expected. +""" + +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +import pytest +from benchmark_utils import run_notebook +from polars.testing import assert_frame_equal + +ROOT = Path(__file__).parent.parent.parent.parent.parent + + +def test_mini_grid(): + """Testing the following grid. + + Assume reactances are all 1 except for line 4 where reactance is 2. + + 4 + ➚↓↘ + 2➚ ↓ ↘8 + ➚ ↓5 ↘ + ➚ ↓ ↘ 10 + 1→→→→3→→→→5→→→→→7→→→→→8 + 1 ↘ 4 ↑ 7 ↗ + 3↘ ↑6 ↗9 + ↘ ↑ ↗ + ↘↑↗ + 6 + + """ + lines = pl.DataFrame( + schema=["line_id", "from_bus", "to_bus", "reactance"], + data=[ + [1, 1, 3, 1], + [2, 3, 4, 1], + [3, 3, 6, 1], + [4, 3, 5, 2], + [5, 4, 5, 1], + [6, 6, 5, 1], + [7, 5, 7, 1], + [8, 4, 7, 1], + [9, 6, 7, 1], + [10, 7, 8, 1], + ], + orient="row", + ) + + # Save mini grid data to a temporary file + with TemporaryDirectory() as tmpdirname: + tmpdirname = Path(tmpdirname) + lines.write_parquet(tmpdirname / "lines.parquet") + + run_notebook( + notebook_path=ROOT + / "src/energy_planning/scripts/compute_power_transfer_dist_facts.ipynb", + working_directory=Path(tmpdirname), + first_cell=""" +from unittest.mock import Mock +snakemake = Mock() +snakemake.input = Mock() +snakemake.input.lines = "lines.parquet" +snakemake.output = ["ptdf.parquet"] +""", + debug=False, + ) + + ptdf = pl.read_parquet(tmpdirname / "ptdf.parquet") + + ptdf.write_csv(Path(__file__).parent / "expected_ptdfs_new.csv") + assert_frame_equal( + ptdf, + pl.read_csv(Path(__file__).parent / "expected_ptdfs.csv"), + check_dtypes=False, + ) + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/benchmarks/src/facility_location/README.md b/benchmarks/src/facility_location/README.md new file mode 100644 index 00000000..a8803e6a --- /dev/null +++ b/benchmarks/src/facility_location/README.md @@ -0,0 +1,42 @@ +# Facility Location Problem + +This is the exact same problem as described in the original [JuMP paper](https://epubs.siam.org/doi/10.1137/15M1020575) (see Section 8.2). It is re-explained here for clarity. + +### Problem description + +The purpose of this problem is to choose the location of $F$ facilities such that the maximum distance between any customer and its nearest facility is minimized. The $C^2$ Customers are evenly spread out over a square grid (see image below). Facilities can be placed anywhere in the grid. + + +### Problem formulation + +To simplify the formulation, let us scale our square grid such that its axes span go from $0$ to $1$. + +We define a variable $y_{f,ax}$ which is the location of every facility ($f$) for each axis ($ax$) (either x-axis or y-axis). + +$$ 0 \leq y_{f,ax} \leq 1$$ + +Next, we define the distance between every facility-customer pair, $s_{i,j,f}$. + $$r^x_{i,j,f} = (i / N - y_{f,1}) \qquad \forall i, j, f$$ +$$r^y_{i,j,f} = (j / N - y_{f,2}) \qquad \forall i,j,f$$ + +$$s_{i,j,f} ^2 = (r_{i,j,f}^x) ^ 2 + (r^y_{i,j,f}) ^2 \qquad \forall i,j,f$$ +$$ s_{i,j,f} \geq 0 \qquad \forall i,j,f$$ + +We'd like to minimize the maximum distance between any facility and its nearest neighbor $d$. + +$$\text{min} \quad d$$ + +At first, we might try to simply define $d$ as follows: + +$$ d^{max} \geq s_{i,j,f} \qquad \forall i,j,f$$ + +However this is not quite right. We only wish to ensure the maximum distance is bounded by the distances between every customer and its _nearest_ facility. To do this, suppose we had a binary variable, $z_{i,j,f}$ that equals $1$ for customer-facility pairs that are relevant (when the facility is the nearest to the customer) and $0$ otherwise. We can now rewrite the above constraint as follows to fix our issue. + +$$ d^{max} \geq s_{i,j,f} - \sqrt{2} (1 - z_{i,j,f}) \qquad \forall i,j,f $$ + +Why does this work? When $z_{i,j,f} = 0$ the right hand side is necessarily zero or less (since $\sqrt{2}$ is the largest possible distance in a 1-by-1 square) and the constraint is thus non-binding as desired. + +Finally, how do we define this magical $z_{i,j,f}$ variable? We simply ensure that exactly one customer-facility pair exists for each customer. The optimization will automatically choose the nearest facility if it matters since it wishes to minimize the objective. + +$$\sum_{f} z_{i,j,f} = 1 \qquad \forall i,j$$ + diff --git a/benchmarks/src/facility_location/__init__.py b/benchmarks/src/facility_location/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/src/facility_location/bm_ampl.py b/benchmarks/src/facility_location/bm_ampl.py new file mode 100644 index 00000000..51f8b7fa --- /dev/null +++ b/benchmarks/src/facility_location/bm_ampl.py @@ -0,0 +1,55 @@ +from amplpy import AMPL +from benchmark_utils.ampl import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + model = AMPL() + + model.eval(""" + param G; + param F; + param M := 2*sqrt(2); + + set Grid := 0..G; + set Facs := 1..F; + set Dims := 1..2; + + var facility_loc{f in Facs, d in Dims} >= 0, <= 1; + var max_dist >= 0; + var is_closest{i in Grid, j in Grid, f in Facs} binary; + var dist{i in Grid, j in Grid, f in Facs} >= 0; + var r{i in Grid, j in Grid, f in Facs, d in Dims}; + + minimize obj: max_dist; + + # Each grid point is assigned to exactly one facility + s.t. assignment{i in Grid, j in Grid}: + sum {f in Facs} is_closest[i,j,f] = 1; + + # Define r (coordinate differences) + s.t. r_def_x{i in Grid, j in Grid, f in Facs}: + r[i,j,f,1] = i/G - facility_loc[f,1]; + + s.t. r_def_y{i in Grid, j in Grid, f in Facs}: + r[i,j,f,2] = j/G - facility_loc[f,2]; + + # Second-order cone distance + s.t. soc_dist{i in Grid, j in Grid, f in Facs}: + r[i,j,f,1]^2 + r[i,j,f,2]^2 <= dist[i,j,f]^2; + + # Big-M linking for max distance + s.t. bigM_link{i in Grid, j in Grid, f in Facs}: + dist[i,j,f] == max_dist + M*(1 - is_closest[i,j,f]); + """) + + model.param["G"] = self.size + model.param["F"] = self.size + + return model + + +if __name__ == "__main__": + bench = Bench(size=4) + bench.run() + print(bench.get_objective()) diff --git a/benchmarks/src/facility_location/bm_gurobipy.py b/benchmarks/src/facility_location/bm_gurobipy.py new file mode 100644 index 00000000..196dadcf --- /dev/null +++ b/benchmarks/src/facility_location/bm_gurobipy.py @@ -0,0 +1,62 @@ +"""GurobiPy implementation of the facility location benchmark. + +Copyright (c) 2023: Yue Yang + +Use of this source code is governed by an MIT-style license that can be found +in the LICENSE.md file or at https://opensource.org/licenses/MIT. +""" + +from benchmark_utils.gurobipy import Benchmark +from gurobipy import GRB, Model + + +class Bench(Benchmark): + def build(self): + if isinstance(self.size, int): + G = F = self.size + else: + G, F = self.size + + m = Model() + + # Create variables + y = m.addVars(range(1, F + 1), range(1, 3), ub=1.0, vtype=GRB.CONTINUOUS) + s = m.addVars(range(G + 1), range(G + 1), range(1, F + 1)) + z = m.addVars(range(G + 1), range(G + 1), range(1, F + 1), vtype=GRB.BINARY) + r = m.addVars( + range(G + 1), + range(G + 1), + range(1, F + 1), + range(1, 3), + name="r", + lb=-GRB.INFINITY, + ) + d = m.addVar() + + # Set objective + m.setObjective(d, GRB.MINIMIZE) + + # Add constraints + for i in range(G + 1): + for j in range(G + 1): + m.addConstr(z.sum(i, j, "*") == 1) + + M = 2 * 1.414 + for i in range(G + 1): + for j in range(G + 1): + for f in range(1, F + 1): + m.addConstr(s[i, j, f] - (M * (1 - z[i, j, f])) <= d) + m.addConstr(r[i, j, f, 1] == -y[f, 1] + (1.0 * i) / G) + m.addConstr(r[i, j, f, 2] == -y[f, 2] + (1.0 * j) / G) + m.addConstr( + r[i, j, f, 1] * r[i, j, f, 1] + r[i, j, f, 2] * r[i, j, f, 2] + <= s[i, j, f] * s[i, j, f] + ) + + return m + + +if __name__ == "__main__": + bench = Bench("gurobi", 2, block_solver=False) + bench.run() + print(bench.get_objective()) diff --git a/benchmarks/src/facility_location/bm_jump.jl b/benchmarks/src/facility_location/bm_jump.jl new file mode 100644 index 00000000..5af5160b --- /dev/null +++ b/benchmarks/src/facility_location/bm_jump.jl @@ -0,0 +1,51 @@ +# Inspired from: +# Copyright (c) 2022: Miles Lubin and contributors +# +# Use of this source code is governed by an MIT-style license that can be found +# in the LICENSE.md file or at https://opensource.org/licenses/MIT. +# See https://github.com/jump-dev/JuMPPaperBenchmarks + + +include("../benchmark_utils/jump.jl") + +using JuMP +using ..Benchmark +using Parquet +using DataFrames + +function main(model::JuMP.Model, size::Int) + F = G = size + + @variables(model, begin + 0 <= y[1:F, 1:2] <= 1 + s[0:G, 0:G, 1:F] >= 0 + z[0:G, 0:G, 1:F], Bin + r[0:G, 0:G, 1:F, 1:2] + d + end) + @objective(model, Min, d) + @constraint(model, [i in 0:G, j in 0:G], sum(z[i,j,f] for f in 1:F) == 1) + # d is the maximum of distances between customers + # and their facilities. The original constraint is # d >= ||x - y|| - M(1-z) + # where M = 1 for our data. Because Gurobi/CPLEX can't take SOCPs directly, + # we need to rewite as a set of constraints and auxiliary variables: + # s = d + M(1 - z) >= 0 + # r = x - y + # r'r <= s^2 + M = 2 * sqrt(2) + for i in 0:G, j in 0:G, f in 1:F + @constraints(model, begin + s[i, j, f] == d + M * (1 - z[i, j, f]) + r[i, j, f, 1] == i / G - y[f, 1] + r[i, j, f, 2] == j / G - y[f, 2] + sum(r[i, j, f, k]^2 for k in 1:2) <= s[i, j, f]^2 + end) + end + + + Benchmark.optimize!(model) +end + +Benchmark.run(@__DIR__, main) + + diff --git a/benchmarks/src/facility_location/bm_pyoframe.py b/benchmarks/src/facility_location/bm_pyoframe.py new file mode 100644 index 00000000..a8deeea3 --- /dev/null +++ b/benchmarks/src/facility_location/bm_pyoframe.py @@ -0,0 +1,53 @@ +"""Pyoframe implementation of the facility location benchmark.""" + +from benchmark_utils.pyoframe import Benchmark + +from pyoframe import Model, Param, Set, Variable + + +class Bench(Benchmark): + def build(self, **kwargs): + G = F = self.size + assert G is not None + + grid = range(G + 1) + + customer_position_x = Param({"x": grid, "x_pos": [step / G for step in grid]}) + customer_position_y = Param({"y": grid, "y_pos": [step / G for step in grid]}) + + model = Model() + model.facilities = Set(f=range(1, F + 1)) + model.grid = Set(x=grid, y=grid) + model.axis = Set(d=[1, 2]) + + model.facility_position = Variable(model.facilities, model.axis, lb=0, ub=1) + + model.max_distance = Variable(lb=0) + + model.is_closest = Variable(model.grid, model.facilities, vtype="binary") + model.con_only_one_closest = model.is_closest.sum("f") == 1 + model.dist_xy = Variable(model.grid, model.facilities, model.axis) + model.con_dist_x = model.dist_xy.pick(d=1) == ( + customer_position_x.over("f") - model.facility_position.pick(d=1).over("x") + ).over("y") + model.con_dist_y = model.dist_xy.pick(d=2) == ( + customer_position_y.over("f") - model.facility_position.pick(d=2).over("y") + ).over("x") + model.dist = Variable(model.grid, model.facilities, lb=0) + model.con_dist = ( + model.dist**2 == model.dist_xy.pick(d=1) ** 2 + model.dist_xy.pick(d=2) ** 2 + ) + + # Twice the max distance which ensures that when is_closest is 0, the constraint is not binding. + M = 2 * 1.414 + model.con_max_distance = model.max_distance.over( + "x", "y", "f" + ) >= model.dist - M * (1 - model.is_closest) + + model.minimize = model.max_distance + + return model + + +if __name__ == "__main__": + Bench("gurobi", size=2).run() diff --git a/benchmarks/src/facility_location/bm_pyomo.py b/benchmarks/src/facility_location/bm_pyomo.py new file mode 100644 index 00000000..dce3fee7 --- /dev/null +++ b/benchmarks/src/facility_location/bm_pyomo.py @@ -0,0 +1,75 @@ +"""Pyomo implementation of the facility location benchmark. + +Copyright (c) 2022: Miles Lubin and contributors + +Use of this source code is governed by an MIT-style license that can be found +in the LICENSE.md file or at https://opensource.org/licenses/MIT. +See https://github.com/jump-dev/JuMPPaperBenchmarks +""" + +import pyomo.environ as pyo +from benchmark_utils.pyomo import Benchmark + + +class Bench(Benchmark): + def build(self): + G = F = self.size + + model = pyo.ConcreteModel() + model.Grid = pyo.RangeSet(0, G) + model.Facs = pyo.RangeSet(1, F) + model.Dims = pyo.RangeSet(1, 2) + model.facility_position = pyo.Var(model.Facs, model.Dims, bounds=(0.0, 1.0)) + model.dist = pyo.Var(model.Grid, model.Grid, model.Facs, bounds=(0.0, None)) + model.is_closest = pyo.Var( + model.Grid, model.Grid, model.Facs, within=pyo.Binary + ) + model.r = pyo.Var(model.Grid, model.Grid, model.Facs, model.Dims) + model.max_distance = pyo.Var() + model.obj = pyo.Objective(expr=1.0 * model.max_distance) + + model.assmt = pyo.Constraint( + model.Grid, + model.Grid, + rule=lambda mod, i, j: sum([mod.is_closest[i, j, f] for f in mod.Facs]) + == 1, + ) + M = 2 * 1.414 + + model.con_max_distance = pyo.Constraint( + model.Grid, + model.Grid, + model.Facs, + rule=lambda mod, i, j, f: mod.dist[i, j, f] + == mod.max_distance + M * (1 - mod.is_closest[i, j, f]), + ) + + model.quaddistk1 = pyo.Constraint( + model.Grid, + model.Grid, + model.Facs, + rule=lambda mod, i, j, f: mod.r[i, j, f, 1] + == (1.0 * i) / G - mod.facility_position[f, 1], + ) + + model.quaddistk2 = pyo.Constraint( + model.Grid, + model.Grid, + model.Facs, + rule=lambda mod, i, j, f: mod.r[i, j, f, 2] + == (1.0 * j) / G - mod.facility_position[f, 2], + ) + + model.quaddist = pyo.Constraint( + model.Grid, + model.Grid, + model.Facs, + rule=lambda mod, i, j, f: mod.r[i, j, f, 1] ** 2 + mod.r[i, j, f, 2] ** 2 + <= mod.dist[i, j, f] ** 2, + ) + + return model + + +if __name__ == "__main__": + Bench("gurobi", 5).run() diff --git a/benchmarks/src/facility_location/bm_pyoptinterface.py b/benchmarks/src/facility_location/bm_pyoptinterface.py new file mode 100644 index 00000000..17e91edb --- /dev/null +++ b/benchmarks/src/facility_location/bm_pyoptinterface.py @@ -0,0 +1,64 @@ +"""PyOptInterface implementation of the facility location benchmark. + +Copyright (c) 2023: Yue Yang +https://github.com/metab0t/PyOptInterface_benchmark + +Use of this source code is governed by an MIT-style license that can be found +in the LICENSE.md file or at https://opensource.org/licenses/MIT. +""" + +import numpy as np +import pyoptinterface as poi +from benchmark_utils.pyoptinterface import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + if isinstance(self.size, int): + G = F = self.size + else: + G, F = self.size + + m = super().build(**kwargs) + # Create variables + y = add_ndarray_variable(m, (F, 2), lb=0.0, ub=1.0) + s = add_ndarray_variable(m, (G + 1, G + 1, F), lb=0.0) + z = add_ndarray_variable(m, (G + 1, G + 1, F), domain=poi.VariableDomain.Binary) + r = add_ndarray_variable(m, (G + 1, G + 1, F, 2)) + d = m.add_variable() + + # Set objective + m.set_objective(d * 1.0) + + # Add constraints + for i in range(G + 1): + for j in range(G + 1): + expr = poi.quicksum(z[i, j, :]) + m.add_linear_constraint(expr, poi.Eq, 1.0) + + M = 2 * 1.414 + for i in range(G + 1): + for j in range(G + 1): + for f in range(F): + expr = s[i, j, f] - d - M * (1 - z[i, j, f]) + m.add_linear_constraint(expr, poi.Eq, 0.0) + expr = r[i, j, f, 0] - i / G + y[f, 0] + m.add_linear_constraint(expr, poi.Eq, 0.0) + expr = r[i, j, f, 1] - j / G + y[f, 1] + m.add_linear_constraint(expr, poi.Eq, 0.0) + m.add_second_order_cone_constraint( + [s[i, j, f], r[i, j, f, 0], r[i, j, f, 1]] + ) + return m + + +def add_ndarray_variable(m, shape, **kwargs): + array = np.empty(shape, dtype=object) + array_flat = array.flat + for i in range(array.size): + array_flat[i] = m.add_variable(**kwargs) + return array + + +if __name__ == "__main__": + Bench("gurobi", 5).run() diff --git a/benchmarks/src/simple_problem/.gitignore b/benchmarks/src/simple_problem/.gitignore new file mode 100644 index 00000000..8ac50daf --- /dev/null +++ b/benchmarks/src/simple_problem/.gitignore @@ -0,0 +1,2 @@ +model_data/ +model_results/ \ No newline at end of file diff --git a/benchmarks/src/simple_problem/README.md b/benchmarks/src/simple_problem/README.md new file mode 100644 index 00000000..79481c82 --- /dev/null +++ b/benchmarks/src/simple_problem/README.md @@ -0,0 +1,11 @@ +# Dummy benchmark + +This benchmark requires reading a parquet file containing $N$ rows with a row ID and a cost parameter $c_{id}$. + +For each row, a variable $0 \leq X_{id} \leq 1$ is created. The objective is to minimize the total cost $\sum c_{id}X_{id}$. To make the problem not completely trivial, we add the constraint: $\sum X_{id} \leq 0.5 N$. + +Results for each variable must be written back to a parquet file. + +## Running the benchmark + +To generate the input parquet files, run, from this directory, `snakemake --cores "all" model_data/input_N.parquet` where `N` is replaced with the number of desired rows. Next, run the benchmark file. \ No newline at end of file diff --git a/benchmarks/src/simple_problem/Snakefile b/benchmarks/src/simple_problem/Snakefile new file mode 100644 index 00000000..0635a98e --- /dev/null +++ b/benchmarks/src/simple_problem/Snakefile @@ -0,0 +1,21 @@ +from pathlib import Path + +INPUT_DIR = Path("./model_data") + + +rule generate_input: + output: + INPUT_DIR / "input_{n}.parquet", + run: + import polars as pl + import numpy as np + + pl.set_random_seed(42) + generator = np.random.default_rng(seed=42) + + n = int(wildcards.n) + + df = pl.DataFrame(dict(id=range(1, 2 * n + 1))).sample(n) + + df = df.with_columns(cost=pl.Series(generator.uniform(1, 1000, size=n))) + df.sort("id").write_parquet(output[0]) diff --git a/benchmarks/src/simple_problem/__init__.py b/benchmarks/src/simple_problem/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/src/simple_problem/bm_ampl.py b/benchmarks/src/simple_problem/bm_ampl.py new file mode 100644 index 00000000..7da8ca40 --- /dev/null +++ b/benchmarks/src/simple_problem/bm_ampl.py @@ -0,0 +1,48 @@ +from pathlib import Path + +import pandas as pd +from amplpy import AMPL +from benchmark_utils.ampl import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + model = AMPL() + + model.eval(""" + set IDS; + + param cost {IDS}; + + var X {i in IDS} >= 0, <= 1; + + minimize obj: + sum {i in IDS} cost[i] * X[i]; + + s.t. con_X: + sum {i in IDS} X[i] >= card(IDS) / 2; + """) + + cost = pd.read_parquet(f"input_{self.size}.parquet").set_index("id") + model.set["IDS"] = cost.index + model.param["cost"] = cost + + return model + + def write_results(self, model, **kwargs): + model.var["X"].get_values().to_pandas().reset_index(names="id").rename( + columns={"X.val": "solution"} + ).to_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + + bench = Bench( + size=10_000, + input_dir=cwd / "model_data", + results_dir=results_dir, + ) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_cvxpy.py b/benchmarks/src/simple_problem/bm_cvxpy.py new file mode 100644 index 00000000..00621514 --- /dev/null +++ b/benchmarks/src/simple_problem/bm_cvxpy.py @@ -0,0 +1,38 @@ +import cvxpy as cp +import polars as pl +from benchmark_utils.cvxpy import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + df = pl.read_parquet(f"input_{self.size}.parquet") + self.ids = df.select("id") + costs = df["cost"].to_numpy() + + n = len(self.ids) + + self.X = cp.Variable(n) + + objective = cp.Minimize(costs @ self.X) + + constraints = [0 <= self.X, self.X <= 1, cp.sum(self.X) >= n / 2] + + model = cp.Problem(objective, constraints) + + return model + + def write_results(self, model, **kwargs): + solution = self.ids.with_columns(solution=pl.lit(self.X.value)) + + solution.write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_gurobipy.py b/benchmarks/src/simple_problem/bm_gurobipy.py new file mode 100644 index 00000000..4e6823c7 --- /dev/null +++ b/benchmarks/src/simple_problem/bm_gurobipy.py @@ -0,0 +1,34 @@ +import gurobipy as gp +import polars as pl +from benchmark_utils.gurobipy import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + df = pl.read_parquet(f"input_{self.size}.parquet") + + m = gp.Model() + + self.X = m.addVars(df["id"].to_list(), lb=0, ub=1) + cost = gp.tupledict(df.rows()) + m.setObjective(self.X.prod(cost), gp.GRB.MINIMIZE) + m.addConstr(self.X.sum() >= len(df) / 2) + + return m + + def write_results(self, model, **kwargs): + pl.DataFrame( + [(id, var.getAttr(gp.GRB.Attr.X)) for id, var in self.X.items()], + schema={"id": pl.Int64, "solution": pl.Float64}, + orient="row", + ).write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_jump.jl b/benchmarks/src/simple_problem/bm_jump.jl new file mode 100644 index 00000000..11b8d26e --- /dev/null +++ b/benchmarks/src/simple_problem/bm_jump.jl @@ -0,0 +1,30 @@ +include("../benchmark_utils/jump.jl") + +using JuMP +using ..Benchmark +using Parquet2 +using DataFrames +using Parquet2: Dataset + +function main(model::JuMP.Model, size::Int) + # Load input data + data = DataFrame(Dataset("input_$(size).parquet")) + + @variable(model, 0 <= x[data.id] <= 1) + data.x = Array(x) + @objective(model, Min, data.cost' * data.x) + @constraint(model, sum(data.x) >= size / 2) + + Benchmark.optimize!(model) + + table = Containers.rowtable(value, x; header = [:id, :solution]) + solution = DataFrames.DataFrame(table) + + # Write via Polars again + Parquet2.writefile("output_$(size).parquet", solution) +end + +Benchmark.run(@__DIR__, main) + +# To test run from the benchmarks directory: +# julia --project=. src/simple_problem/bm_jump.jl gurobi 1000 src/simple_problem/model_results diff --git a/benchmarks/src/simple_problem/bm_linopy.py b/benchmarks/src/simple_problem/bm_linopy.py new file mode 100644 index 00000000..19e46abe --- /dev/null +++ b/benchmarks/src/simple_problem/bm_linopy.py @@ -0,0 +1,31 @@ +import linopy as lp +import pandas as pd +from benchmark_utils.linopy import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + cost = ( + pd.read_parquet(f"input_{self.size}.parquet") + .set_index("id")["cost"] + .to_xarray() + ) + + m = lp.Model() + self.X = m.add_variables(lower=0, upper=1, coords=cost.coords, name="X") + m.add_objective((self.X * cost).sum(), sense="min") + m.add_constraints(self.X.sum() >= cost.sizes["id"] / 2) + return m + + def write_results(self, model, **kwargs): + self.X.solution.to_dataframe().to_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_pulp.py b/benchmarks/src/simple_problem/bm_pulp.py new file mode 100644 index 00000000..64d2244c --- /dev/null +++ b/benchmarks/src/simple_problem/bm_pulp.py @@ -0,0 +1,36 @@ +import polars as pl +import pulp +from benchmark_utils.pulp import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + costs = pl.read_parquet(f"input_{self.size}.parquet") + costs = {r["id"]: r["cost"] for r in costs.to_dicts()} + + model = pulp.LpProblem() + + self.X = pulp.LpVariable.dicts("X", costs.keys(), lowBound=0, upBound=1) + + model += pulp.lpSum(cost * self.X[id] for id, cost in costs.items()) + model += pulp.lpSum(self.X[id] for id in costs.keys()) >= len(costs) / 2 + + return model + + def write_results(self, model, **kwargs): + data = [(i, pulp.value(var)) for i, var in self.X.items()] + + pl.DataFrame( + data, schema={"id": pl.Int64, "value": pl.Float64}, orient="row" + ).write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_pyoframe.py b/benchmarks/src/simple_problem/bm_pyoframe.py new file mode 100644 index 00000000..14bde814 --- /dev/null +++ b/benchmarks/src/simple_problem/bm_pyoframe.py @@ -0,0 +1,26 @@ +from benchmark_utils.pyoframe import Benchmark + +import pyoframe as pf + + +class Bench(Benchmark): + def build(self, **kwargs): + m = pf.Model() + cost = pf.Param(f"input_{self.size}.parquet") + m.X = pf.Variable(cost, lb=0, ub=1) + m.minimize = (cost * m.X).sum() + m.con_X = m.X.sum() >= len(cost) / 2 + return m + + def write_results(self, model, **kwargs): + model.X.solution.write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + bench = Bench(size=100_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_pyomo.py b/benchmarks/src/simple_problem/bm_pyomo.py new file mode 100644 index 00000000..ee6ff453 --- /dev/null +++ b/benchmarks/src/simple_problem/bm_pyomo.py @@ -0,0 +1,33 @@ +import polars as pl +import pyomo.environ as pyo +from benchmark_utils.pyomo import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + df = pl.read_parquet(f"input_{self.size}.parquet") + + m = pyo.ConcreteModel() + m.Xs = pyo.Set(initialize=df["id"].to_list()) + m.cost = pyo.Param(m.Xs, initialize={id: cost for id, cost in df.iter_rows()}) + m.X = pyo.Var(m.Xs, bounds=(0, 1)) + m.OBJ = pyo.Objective(expr=pyo.sum_product(m.cost, m.X), sense=pyo.minimize) + m.con_X = pyo.Constraint(expr=pyo.summation(m.X) >= len(df) / 2) + return m + + def write_results(self, model, **kwargs): + pl.DataFrame( + [(id, model.X[id].value) for id in model.Xs], + schema={"id": pl.Int64, "solution": pl.Float64}, + orient="row", + ).write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/src/simple_problem/bm_pyoptinterface.py b/benchmarks/src/simple_problem/bm_pyoptinterface.py new file mode 100644 index 00000000..edc55bda --- /dev/null +++ b/benchmarks/src/simple_problem/bm_pyoptinterface.py @@ -0,0 +1,43 @@ +import polars as pl +import pyoptinterface as poi +from benchmark_utils.pyoptinterface import Benchmark + + +class Bench(Benchmark): + def build(self, **kwargs): + m = super().build(**kwargs) + + df = pl.read_parquet(f"input_{self.size}.parquet") + cost = poi.tupledict({id: cost for id, cost in df.iter_rows()}) + + self.X = m.add_variables(df["id"], lb=0, ub=1) + + obj = poi.ExprBuilder() + for id, x in self.X.items(): + obj += x * cost[id] + + m.set_objective(obj, sense=poi.ObjectiveSense.Minimize) + + m.add_linear_constraint(poi.quicksum(self.X) >= len(cost) / 2) + + return m + + def write_results(self, model, **kwargs): + pl.DataFrame( + [ + (id, self.model.get_variable_attribute(x, poi.VariableAttribute.Value)) + for id, x in self.X.items() + ], + schema={"id": pl.Int64, "solution": pl.Float64}, + orient="row", + ).write_parquet(f"output_{self.size}.parquet") + + +if __name__ == "__main__": + from pathlib import Path + + cwd = Path(__file__).parent + results_dir = cwd / "model_results" + results_dir.mkdir(exist_ok=True) + bench = Bench(size=10_000, input_dir=cwd / "model_data", results_dir=results_dir) + bench.run() diff --git a/benchmarks/test.py b/benchmarks/test.py new file mode 100644 index 00000000..b9ba3d0a --- /dev/null +++ b/benchmarks/test.py @@ -0,0 +1,47 @@ +"""Calls run.py using the test configuration file.""" + +import argparse + +from plot import plot_all +from run import read_config, run_all_benchmarks + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--reuse", action="store_true", help="Reuse past results if available." + ) + parser.add_argument( + "-p", + "--problem", + default=None, + help="Only run a specific problem (e.g., 'energy_planning').", + ) + parser.add_argument( + "-l", + "--library", + default=None, + help="Only run a specific library (e.g., 'pyoframe').", + ) + args = parser.parse_args() + + config = read_config(name="config.test.yaml") + + if args.problem is not None: + if args.problem not in config["problems"]: + raise ValueError( + f"Problem '{args.problem}' not found in config. Options are: {list(config['problems'].keys())}" + ) + config["problems"] = { + k: v for k, v in config["problems"].items() if k == args.problem + } + + if args.library is not None: + if args.library not in config["libraries"]: + raise ValueError( + f"Library '{args.library}' not found in config. Options are: {config['libraries']}" + ) + config["libraries"] = [args.library] + + run_all_benchmarks(config, ignore_past_results=not args.reuse) + + plot_all(config_name="config.test.yaml") diff --git a/pyproject.toml b/pyproject.toml index e3a4bd9e..7f5f23a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,10 @@ ignore = [ [tool.ruff.lint.per-file-ignores] "{docs,tests,scripts,benchmarks}/**" = ["D101", "D102", "D103", "D104"] +"benchmarks/src/**" = [ + "D100", # allow missing docstrings since benchmarks are often one-off scripts + "E741" # allow single letter variable names since they often represent mathematical variables +] "conftest.py" = ["D"] [tool.ruff.lint.pydocstyle] diff --git a/src/pyoframe/_model.py b/src/pyoframe/_model.py index 8fe45bbf..5ac8dd73 100644 --- a/src/pyoframe/_model.py +++ b/src/pyoframe/_model.py @@ -88,6 +88,7 @@ class Model: "attr", "sense", "_solver_uses_variable_names", + "_last_update", "ONE", "solver_name", "minimize", @@ -104,6 +105,7 @@ def __init__( name: str | None = None, solver_uses_variable_names: bool = False, print_uses_variable_names: bool = True, + debug: bool = False, sense: ObjSense | ObjSenseValue | None = None, verbose: bool = False, ): @@ -119,6 +121,9 @@ def __init__( self._params = Container(self._set_param, self._get_param) self._attr = Container(self._set_attr, self._get_attr) self._solver_uses_variable_names = solver_uses_variable_names + self._last_update = None + if debug: + self._last_update = time.time() self._logger = None self._last_log = None