| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- # coding=utf-8
- # Copyright 2020 Hugging Face
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import re
- import time
- from typing import Optional
- import IPython.display as disp
- from ..trainer_callback import TrainerCallback
- from ..trainer_utils import IntervalStrategy, has_length
- def format_time(t):
- "Format `t` (in seconds) to (h):mm:ss"
- t = int(t)
- h, m, s = t // 3600, (t // 60) % 60, t % 60
- return f"{h}:{m:02d}:{s:02d}" if h != 0 else f"{m:02d}:{s:02d}"
- def html_progress_bar(value, total, prefix, label, width=300):
- # docstyle-ignore
- return f"""
- <div>
- {prefix}
- <progress value='{value}' max='{total}' style='width:{width}px; height:20px; vertical-align: middle;'></progress>
- {label}
- </div>
- """
- def text_to_html_table(items):
- "Put the texts in `items` in an HTML table."
- html_code = """<table border="1" class="dataframe">\n"""
- html_code += """ <thead>\n <tr style="text-align: left;">\n"""
- for i in items[0]:
- html_code += f" <th>{i}</th>\n"
- html_code += " </tr>\n </thead>\n <tbody>\n"
- for line in items[1:]:
- html_code += " <tr>\n"
- for elt in line:
- elt = f"{elt:.6f}" if isinstance(elt, float) else str(elt)
- html_code += f" <td>{elt}</td>\n"
- html_code += " </tr>\n"
- html_code += " </tbody>\n</table><p>"
- return html_code
- class NotebookProgressBar:
- """
- A progress par for display in a notebook.
- Class attributes (overridden by derived classes)
- - **warmup** (`int`) -- The number of iterations to do at the beginning while ignoring `update_every`.
- - **update_every** (`float`) -- Since calling the time takes some time, we only do it every presumed
- `update_every` seconds. The progress bar uses the average time passed up until now to guess the next value
- for which it will call the update.
- Args:
- total (`int`):
- The total number of iterations to reach.
- prefix (`str`, *optional*):
- A prefix to add before the progress bar.
- leave (`bool`, *optional*, defaults to `True`):
- Whether or not to leave the progress bar once it's completed. You can always call the
- [`~utils.notebook.NotebookProgressBar.close`] method to make the bar disappear.
- parent ([`~notebook.NotebookTrainingTracker`], *optional*):
- A parent object (like [`~utils.notebook.NotebookTrainingTracker`]) that spawns progress bars and handle
- their display. If set, the object passed must have a `display()` method.
- width (`int`, *optional*, defaults to 300):
- The width (in pixels) that the bar will take.
- Example:
- ```python
- import time
- pbar = NotebookProgressBar(100)
- for val in range(100):
- pbar.update(val)
- time.sleep(0.07)
- pbar.update(100)
- ```"""
- warmup = 5
- update_every = 0.2
- def __init__(
- self,
- total: int,
- prefix: Optional[str] = None,
- leave: bool = True,
- parent: Optional["NotebookTrainingTracker"] = None,
- width: int = 300,
- ):
- self.total = total
- self.prefix = "" if prefix is None else prefix
- self.leave = leave
- self.parent = parent
- self.width = width
- self.last_value = None
- self.comment = None
- self.output = None
- self.value = None
- self.label = None
- def update(self, value: int, force_update: bool = False, comment: str = None):
- """
- The main method to update the progress bar to `value`.
- Args:
- value (`int`):
- The value to use. Must be between 0 and `total`.
- force_update (`bool`, *optional*, defaults to `False`):
- Whether or not to force and update of the internal state and display (by default, the bar will wait for
- `value` to reach the value it predicted corresponds to a time of more than the `update_every` attribute
- since the last update to avoid adding boilerplate).
- comment (`str`, *optional*):
- A comment to add on the left of the progress bar.
- """
- self.value = value
- if comment is not None:
- self.comment = comment
- if self.last_value is None:
- self.start_time = self.last_time = time.time()
- self.start_value = self.last_value = value
- self.elapsed_time = self.predicted_remaining = None
- self.first_calls = self.warmup
- self.wait_for = 1
- self.update_bar(value)
- elif value <= self.last_value and not force_update:
- return
- elif force_update or self.first_calls > 0 or value >= min(self.last_value + self.wait_for, self.total):
- if self.first_calls > 0:
- self.first_calls -= 1
- current_time = time.time()
- self.elapsed_time = current_time - self.start_time
- # We could have value = self.start_value if the update is called twixe with the same start value.
- if value > self.start_value:
- self.average_time_per_item = self.elapsed_time / (value - self.start_value)
- else:
- self.average_time_per_item = None
- if value >= self.total:
- value = self.total
- self.predicted_remaining = None
- if not self.leave:
- self.close()
- elif self.average_time_per_item is not None:
- self.predicted_remaining = self.average_time_per_item * (self.total - value)
- self.update_bar(value)
- self.last_value = value
- self.last_time = current_time
- if (self.average_time_per_item is None) or (self.average_time_per_item == 0):
- self.wait_for = 1
- else:
- self.wait_for = max(int(self.update_every / self.average_time_per_item), 1)
- def update_bar(self, value, comment=None):
- spaced_value = " " * (len(str(self.total)) - len(str(value))) + str(value)
- if self.elapsed_time is None:
- self.label = f"[{spaced_value}/{self.total} : < :"
- elif self.predicted_remaining is None:
- self.label = f"[{spaced_value}/{self.total} {format_time(self.elapsed_time)}"
- else:
- self.label = (
- f"[{spaced_value}/{self.total} {format_time(self.elapsed_time)} <"
- f" {format_time(self.predicted_remaining)}"
- )
- if self.average_time_per_item == 0:
- self.label += ", +inf it/s"
- else:
- self.label += f", {1/self.average_time_per_item:.2f} it/s"
- self.label += "]" if self.comment is None or len(self.comment) == 0 else f", {self.comment}]"
- self.display()
- def display(self):
- self.html_code = html_progress_bar(self.value, self.total, self.prefix, self.label, self.width)
- if self.parent is not None:
- # If this is a child bar, the parent will take care of the display.
- self.parent.display()
- return
- if self.output is None:
- self.output = disp.display(disp.HTML(self.html_code), display_id=True)
- else:
- self.output.update(disp.HTML(self.html_code))
- def close(self):
- "Closes the progress bar."
- if self.parent is None and self.output is not None:
- self.output.update(disp.HTML(""))
- class NotebookTrainingTracker(NotebookProgressBar):
- """
- An object tracking the updates of an ongoing training with progress bars and a nice table reporting metrics.
- Args:
- num_steps (`int`): The number of steps during training. column_names (`List[str]`, *optional*):
- The list of column names for the metrics table (will be inferred from the first call to
- [`~utils.notebook.NotebookTrainingTracker.write_line`] if not set).
- """
- def __init__(self, num_steps, column_names=None):
- super().__init__(num_steps)
- self.inner_table = None if column_names is None else [column_names]
- self.child_bar = None
- def display(self):
- self.html_code = html_progress_bar(self.value, self.total, self.prefix, self.label, self.width)
- if self.inner_table is not None:
- self.html_code += text_to_html_table(self.inner_table)
- if self.child_bar is not None:
- self.html_code += self.child_bar.html_code
- if self.output is None:
- self.output = disp.display(disp.HTML(self.html_code), display_id=True)
- else:
- self.output.update(disp.HTML(self.html_code))
- def write_line(self, values):
- """
- Write the values in the inner table.
- Args:
- values (`Dict[str, float]`): The values to display.
- """
- if self.inner_table is None:
- self.inner_table = [list(values.keys()), list(values.values())]
- else:
- columns = self.inner_table[0]
- for key in values.keys():
- if key not in columns:
- columns.append(key)
- self.inner_table[0] = columns
- if len(self.inner_table) > 1:
- last_values = self.inner_table[-1]
- first_column = self.inner_table[0][0]
- if last_values[0] != values[first_column]:
- # write new line
- self.inner_table.append([values[c] if c in values else "No Log" for c in columns])
- else:
- # update last line
- new_values = values
- for c in columns:
- if c not in new_values.keys():
- new_values[c] = last_values[columns.index(c)]
- self.inner_table[-1] = [new_values[c] for c in columns]
- else:
- self.inner_table.append([values[c] for c in columns])
- def add_child(self, total, prefix=None, width=300):
- """
- Add a child progress bar displayed under the table of metrics. The child progress bar is returned (so it can be
- easily updated).
- Args:
- total (`int`): The number of iterations for the child progress bar.
- prefix (`str`, *optional*): A prefix to write on the left of the progress bar.
- width (`int`, *optional*, defaults to 300): The width (in pixels) of the progress bar.
- """
- self.child_bar = NotebookProgressBar(total, prefix=prefix, parent=self, width=width)
- return self.child_bar
- def remove_child(self):
- """
- Closes the child progress bar.
- """
- self.child_bar = None
- self.display()
- class NotebookProgressCallback(TrainerCallback):
- """
- A [`TrainerCallback`] that displays the progress of training or evaluation, optimized for Jupyter Notebooks or
- Google colab.
- """
- def __init__(self):
- self.training_tracker = None
- self.prediction_bar = None
- self._force_next_update = False
- def on_train_begin(self, args, state, control, **kwargs):
- self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step"
- self.training_loss = 0
- self.last_log = 0
- column_names = [self.first_column] + ["Training Loss"]
- if args.eval_strategy != IntervalStrategy.NO:
- column_names.append("Validation Loss")
- self.training_tracker = NotebookTrainingTracker(state.max_steps, column_names)
- def on_step_end(self, args, state, control, **kwargs):
- epoch = int(state.epoch) if int(state.epoch) == state.epoch else f"{state.epoch:.2f}"
- self.training_tracker.update(
- state.global_step + 1,
- comment=f"Epoch {epoch}/{state.num_train_epochs}",
- force_update=self._force_next_update,
- )
- self._force_next_update = False
- def on_prediction_step(self, args, state, control, eval_dataloader=None, **kwargs):
- if not has_length(eval_dataloader):
- return
- if self.prediction_bar is None:
- if self.training_tracker is not None:
- self.prediction_bar = self.training_tracker.add_child(len(eval_dataloader))
- else:
- self.prediction_bar = NotebookProgressBar(len(eval_dataloader))
- self.prediction_bar.update(1)
- else:
- self.prediction_bar.update(self.prediction_bar.value + 1)
- def on_predict(self, args, state, control, **kwargs):
- if self.prediction_bar is not None:
- self.prediction_bar.close()
- self.prediction_bar = None
- def on_log(self, args, state, control, logs=None, **kwargs):
- # Only for when there is no evaluation
- if args.eval_strategy == IntervalStrategy.NO and "loss" in logs:
- values = {"Training Loss": logs["loss"]}
- # First column is necessarily Step sine we're not in epoch eval strategy
- values["Step"] = state.global_step
- self.training_tracker.write_line(values)
- def on_evaluate(self, args, state, control, metrics=None, **kwargs):
- if self.training_tracker is not None:
- values = {"Training Loss": "No log", "Validation Loss": "No log"}
- for log in reversed(state.log_history):
- if "loss" in log:
- values["Training Loss"] = log["loss"]
- break
- if self.first_column == "Epoch":
- values["Epoch"] = int(state.epoch)
- else:
- values["Step"] = state.global_step
- metric_key_prefix = "eval"
- for k in metrics:
- if k.endswith("_loss"):
- metric_key_prefix = re.sub(r"\_loss$", "", k)
- _ = metrics.pop("total_flos", None)
- _ = metrics.pop("epoch", None)
- _ = metrics.pop(f"{metric_key_prefix}_runtime", None)
- _ = metrics.pop(f"{metric_key_prefix}_samples_per_second", None)
- _ = metrics.pop(f"{metric_key_prefix}_steps_per_second", None)
- _ = metrics.pop(f"{metric_key_prefix}_jit_compilation_time", None)
- for k, v in metrics.items():
- splits = k.split("_")
- name = " ".join([part.capitalize() for part in splits[1:]])
- if name == "Loss":
- # Single dataset
- name = "Validation Loss"
- values[name] = v
- self.training_tracker.write_line(values)
- self.training_tracker.remove_child()
- self.prediction_bar = None
- # Evaluation takes a long time so we should force the next update.
- self._force_next_update = True
- def on_train_end(self, args, state, control, **kwargs):
- self.training_tracker.update(
- state.global_step,
- comment=f"Epoch {int(state.epoch)}/{state.num_train_epochs}",
- force_update=True,
- )
- self.training_tracker = None
|