Select a project to see usage.
- 1 project
- 1 run per project
- 60 sec metric delay
- 7 day history
- 3 metrics tracked
- 200 logs/day
- 3 projects
- 20 runs per project
- 30 sec metric delay
- 30 day history
- 6 metrics tracked
- 1,000 logs/day
- 10 projects
- 100 runs per project
- 10 sec metric delay
- 90 day history
- Unlimited run comparison
- Email alerts
- Dead-run email alerts
- Team workspaces (up to 3)
- 10 metrics tracked
- 5,000 logs/day
- Unlimited projects
- Unlimited runs
- Real-time streaming
- 200 day history
- Unlimited run comparison
- Email alerts
- Dead-run email alerts
- Unlimited team workspaces
- 30 metrics tracked
- 10,000 logs/day
Copy runlogger.py into your project (recommended) — required dependency is only requests.
pip install requests
Optional (system stats):
pip install psutil nvidia-ml-py
from runlogger import RunLogger
logger = RunLogger(
base_url="http://localhost:8000", # your Runlog URL
project_name="my-project", # created automatically if missing
api_token="rl-gb-...", # from Dashboard → Project → API Tokens
run_name="run-1",
)
for step in range(1000):
loss = train_one_step()
# log any metrics — they become charts automatically
logger.log(step=step, total_steps=1000, loss=loss, lr=scheduler.get_lr())
if step % 100 == 0:
val_loss = evaluate()
logger.log_eval(step=step, val_loss=val_loss, is_best=val_loss < best)
logger.finish() # marks run as completed
Log any numeric metrics. Throttled by plan interval. Returns True if sent, False if throttled.
logger.log(step=100, loss=0.5, lr=0.001, accuracy=0.92)
Log evaluation metrics. Always sent — bypasses throttle. Use for validation metrics.
logger.log_eval(step=1000, val_loss=0.4, accuracy=0.95,
is_best=True, checkpoint_path="ckpt/best.pt")
Record a file artifact. Type: model | dataset | image | file.
logger.log_artifact("checkpoints/best.pt", name="best-model", type="model")
Returns True if pause was clicked in dashboard. Check in your loop.
if logger.should_pause():
save_checkpoint(step)
logger.finish("paused")
sys.exit(0)
Mark run as done. Status: completed | crashed | paused.
Make run publicly viewable. Returns shareable URL.
url = logger.make_public()
print(f"Share: {url}")
with RunLogger(...) as logger:
for step in range(steps):
logger.log(step=step, loss=loss)
# auto-calls finish("completed") or finish("crashed")
logger = RunLogger(base_url=..., project_name=..., api_token=..., run_name=...)
try:
for step in range(total_steps):
loss = criterion(model(x), y)
loss.backward()
optimizer.step()
logger.log(
step = step,
total_steps = total_steps,
train_loss = loss.item(),
lr = scheduler.get_last_lr()[0],
tokens_per_sec = batch_size * seq_len / step_time,
)
if step % eval_every == 0:
val_loss = evaluate(model, val_loader)
is_best = val_loss < best_loss
if is_best:
torch.save(model.state_dict(), "best.pt")
logger.log_eval(step=step, val_loss=val_loss, is_best=is_best,
checkpoint_path="best.pt" if is_best else None)
if logger.should_pause():
torch.save(model.state_dict(), f"pause_{step}.pt")
logger.finish("paused")
break
logger.finish("completed")
except Exception:
logger.finish("crashed")
raise
from runlogger import RunLogger
from transformers import TrainerCallback
class TTKCallback(TrainerCallback):
def __init__(self, logger):
self.logger = logger
def on_log(self, args, state, control, logs=None, **kwargs):
if logs:
self.logger.log(step=state.global_step,
total_steps=state.max_steps, **logs)
def on_evaluate(self, args, state, control, metrics=None, **kwargs):
if metrics:
self.logger.log_eval(step=state.global_step, **metrics)
def on_train_end(self, args, state, control, **kwargs):
self.logger.finish()
# usage:
logger = RunLogger(...)
trainer = Trainer(..., callbacks=[TTKCallback(logger)])
import tensorflow as tf
from runlogger import RunLogger
class TTKCallback(tf.keras.callbacks.Callback):
def __init__(self, logger, total_epochs):
self.logger = logger
self.total_epochs = total_epochs
def on_epoch_end(self, epoch, logs=None):
self.logger.log(step=epoch, total_steps=self.total_epochs, **(logs or {}))
def on_train_end(self, logs=None):
self.logger.finish()
# usage:
logger = RunLogger(...)
model.fit(X, y, epochs=50, callbacks=[TTKCallback(logger, total_epochs=50)])
import xgboost as xgb
from runlogger import RunLogger
class TTKXGBCallback(xgb.callback.TrainingCallback):
def __init__(self, logger, total_rounds):
self.logger = logger
self.total_rounds = total_rounds
def after_iteration(self, model, epoch, evals_log):
metrics = {}
for data, metric_dict in evals_log.items():
for name, vals in metric_dict.items():
metrics[f"{data}_{name}"] = vals[-1]
self.logger.log(step=epoch, total_steps=self.total_rounds, **metrics)
return False
# usage:
logger = RunLogger(...)
bst = xgb.train(params, dtrain, num_boost_round=100,
evals=[(dval, "val")],
callbacks=[TTKXGBCallback(logger, 100)])
Log any file as an artifact — models, datasets, plots, configs. Artifacts appear in the run's Artifacts panel.
# log model checkpoint
logger.log_artifact("checkpoints/best.pt",
name="best-model",
type="model",
metadata={"val_loss": 0.42, "step": 5000})
# log dataset
logger.log_artifact("data/train.csv",
name="training-data",
type="dataset",
metadata={"rows": 50000})
# log evaluation plot
logger.log_artifact("outputs/confusion_matrix.png",
name="confusion-matrix",
type="image")
Pro and Elite plans support team workspaces. Create a workspace, invite teammates by email, and share projects.
# sharing a run publicly (no login needed to view)
url = logger.make_public()
print(f"Share this run: {url}")
# → https://your-dashboard.com/share/abc123
For team workspaces, go to Workspace in the sidebar. Roles:
Plans and limits are managed from the dashboard’s Plans page (DB-driven). Upgrade/downgrade anytime.
—