diff --git a/README.md b/README.md
index b945387..4122c9b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@ packaged on PyPi. You should clone the project to your local machine and
 do an [editable install](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs) 
 in a [virtual environment](https://docs.python.org/3/library/venv.html).
 
-Note: You will also need ffmpeg.
+Note: You will also need ffmpeg and ffprobe in order to interrogate audio 
+files for their metadata.
 
 ```sh
 $ brew install ffmpeg
diff --git a/ucsinfer/__main__.py b/ucsinfer/__main__.py
index de27a56..2f87283 100644
--- a/ucsinfer/__main__.py
+++ b/ucsinfer/__main__.py
@@ -21,6 +21,7 @@ def ucsinfer():
 def recommend():
     """
     Infer a UCS category for a text description
+
     """
     pass
 
@@ -32,6 +33,18 @@ def recommend():
 def gather(paths, outfile):
     """
     Scan files to build a training dataset at PATH
+    
+    $ ucsinfer gather [OPTIONS] [PATHS] ...
+
+    The `gather` command walks the directory hierarchy for each path in PATHS 
+    and looks for .wav and .flac files that are named according to the UCS 
+    file naming guidelines, with at least a CatID and FX Name, divided by an 
+    underscore.
+
+    For every file ucsinfer finds that meets this criteria, it creates a record
+    in an output dataset CSV file. The dataset file has two columns: the first
+    is the CatID indicated for the file, and the second is the embedded file 
+    description for the file as returned by ffprobe.
     """
     types = ['.wav', '.flac']
     table = csv.writer(outfile)
@@ -78,6 +91,24 @@ def finetune():
 def evaluate(dataset, offset, limit, model, no_foley):
     """
     Use datasets to evaluate model performance 
+
+    ucsinfer evaluate [OPTIONS] [DATASET]
+
+    The `evaluate` command reads the input DATASET file row by row and 
+    performs a classifcation of the given description against the selected 
+    model (either the default or using the --model option). The command then 
+    checks if the model inferred the correct category as given by the dataset.
+
+    The model gives its top 10 possible categories for a given description, 
+    and the results are tabulated according to (1) wether the top
+    classification was correct, (2) wether the correct classifcation was in the
+    top 5, or (3) wether it was in the top 10. The worst-performing category,
+    the one with the most misses, is also reported as well as the category
+    coverage, how many categories are present in the dataset.
+
+    NOTE: With experimentation it was found that foley items generally were 
+    classified according to their subject and not wether or not they were
+    foley, and so these categories can be excluded with the --no-foley option.
     """
     m = SentenceTransformer(model)
     ctx = InferenceContext(m, model)