Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions mlperf_logging/compliance_checker/mlp_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ def configured_checks(self, loglines, config_file):
if 'REQ' not in v:
continue

if v['REQ'] == 'OPTIONAL':
if len(reported_values[k]) == 0:
# self.put_warning(f"OPTIONAL key '{k}' not reported",
# key=k)
continue


if v['REQ']=='EXACTLY_ONE':
if len(reported_values[k]) !=1:
if reported_values[k] and all_same(reported_values[k]):
Expand Down
30 changes: 30 additions & 0 deletions mlperf_logging/compliance_checker/training_6.0.0/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,33 @@
REQ: EXACTLY_ONE
CHECK: " v['value'] != '' "

# Optional parallelism keys
- KEY:
NAME: tensor_parallelism
REQ: OPTIONAL
CHECK: " is_integer(v['value']) "

- KEY:
NAME: pipeline_parallelism
REQ: OPTIONAL
CHECK: " is_integer(v['value']) "

- KEY:
NAME: context_parallelism
REQ: OPTIONAL
CHECK: " is_integer(v['value']) "

- KEY:
NAME: expert_parallelism
REQ: OPTIONAL
CHECK: " is_integer(v['value']) "

- KEY:
NAME: micro_batch_size
REQ: OPTIONAL
CHECK: " is_integer(v['value']) "

- KEY:
NAME: config_filename
REQ: OPTIONAL

9 changes: 9 additions & 0 deletions mlperf_logging/mllog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,12 @@
POWER_READING = "power_reading"
CONVERTION_EFF = "conversion_eff"
INTERCONNECT_POWER_EST = "interconnect_power_est"

# Parallelism constants
TENSOR_PARALLELISM = "tensor_parallelism"
PIPELINE_PARALLELISM = "pipeline_parallelism"
CONTEXT_PARALLELISM = "context_parallelism"
EXPERT_PARALLELISM = "expert_parallelism"
MICRO_BATCH_SIZE = "micro_batch_size"

CONFIG_FILENAME = "config_filename"
53 changes: 53 additions & 0 deletions mlperf_logging/mllog/examples/parallelism.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2019 MLBenchmark Group. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os

from mlperf_logging import mllog


def parallelism_example():
"""Example usage of mllog with parallelism and config keys"""

mllogger = mllog.get_mllogger()

mllog.config(
filename="parallelism_example.log",
default_namespace="worker1",
default_stack_offset=1,
default_clear_line=False,
root_dir=os.path.normpath(
os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..")))

mllogger.start(key=mllog.constants.RUN_START)

# Log the model config file used for this run
mllogger.event(key=mllog.constants.CONFIG_FILENAME, value="llama31_405b_config.yaml")

# Log parallelism strategy
mllogger.event(key=mllog.constants.TENSOR_PARALLELISM, value=8)
mllogger.event(key=mllog.constants.PIPELINE_PARALLELISM, value=4)
mllogger.event(key=mllog.constants.CONTEXT_PARALLELISM, value=2)
mllogger.event(key=mllog.constants.EXPERT_PARALLELISM, value=1)

# Log micro batch size alongside global batch size
mllogger.event(key=mllog.constants.GLOBAL_BATCH_SIZE, value=2048)
mllogger.event(key=mllog.constants.MICRO_BATCH_SIZE, value=1)

mllogger.end(key=mllog.constants.RUN_STOP, metadata={mllog.constants.STATUS: mllog.constants.SUCCESS})


if __name__ == "__main__":
parallelism_example()
Loading