| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- # coding=utf-8
- # Copyright 2020 The HuggingFace Team. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from math import ceil
- def assert_device_map(device_map, num_blocks):
- blocks = list(range(0, num_blocks))
- device_map_blocks = [item for sublist in list(device_map.values()) for item in sublist]
- # Duplicate check
- duplicate_blocks = []
- for i in device_map_blocks:
- if device_map_blocks.count(i) > 1 and i not in duplicate_blocks:
- duplicate_blocks.append(i)
- # Missing blocks
- missing_blocks = [i for i in blocks if i not in device_map_blocks]
- extra_blocks = [i for i in device_map_blocks if i not in blocks]
- if len(duplicate_blocks) != 0:
- raise ValueError(
- "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device."
- " These attention blocks were specified more than once: " + str(duplicate_blocks)
- )
- if len(missing_blocks) != 0:
- raise ValueError(
- "There are attention blocks for this model that are not specified in the device_map. Add these attention "
- "blocks to a device on the device_map: " + str(missing_blocks)
- )
- if len(extra_blocks) != 0:
- raise ValueError(
- "The device_map contains more attention blocks than this model has. Remove these from the device_map:"
- + str(extra_blocks)
- )
- def get_device_map(n_layers, devices):
- """Returns a dictionary of layers distributed evenly across all devices."""
- layers = list(range(n_layers))
- n_blocks = int(ceil(n_layers / len(devices)))
- layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)]
- return dict(zip(devices, layers_list))
|