Hardening and deployment¶
Part 3: Create production inference function
The blue boxes show the steps implemented in this notebook.
In Part 2, we trained our AI model. Now, it's time to create General Robust End-to-end Automated Trustworthy deployment.
Create the inference function¶
Next to the prediction, we also return the top-n most influential words based on their weights.
In [1]:
Copied!
import re
import numpy as np
from sklearn.pipeline import Pipeline
from great_ai.utilities import clean
from great_ai import (
MultiLabelClassificationOutput,
ClassificationOutput,
GreatAI,
use_model,
parameter,
)
@GreatAI.create
@use_model("small-domain-prediction", version="latest")
@parameter("target_confidence", validate=lambda c: 0 <= c <= 100)
def predict_domain(
text: str, model: Pipeline, target_confidence: int = 50
) -> MultiLabelClassificationOutput:
"""
Predict the scientific domain of the input text.
Return labels until their sum likelihood is larger than `target_confidence`.
"""
preprocessed = re.sub(r"[^a-zA-Z\s]", "", clean(text, convert_to_ascii=True))
features = model.named_steps["vectorizer"].transform([preprocessed])
prediction = model.named_steps["classifier"].predict_proba(features)[0]
best_classes = sorted(enumerate(prediction), key=lambda v: v[1], reverse=True)
results = MultiLabelClassificationOutput()
for class_index, probability in best_classes:
results.labels.append(
get_label(
model=model,
features=features,
class_index=class_index,
probability=probability,
)
)
if sum(r.confidence for r in results.labels) >= target_confidence:
break
return results
def get_label(
model: Pipeline, features: np.ndarray, class_index: int, probability: float
) -> ClassificationOutput:
return ClassificationOutput(
label=model.named_steps["classifier"].classes_[class_index],
confidence=round(probability * 100),
explanation=[
word
for _, word in sorted(
(
(weight, word)
for weight, word, count in zip(
model.named_steps["classifier"].feature_log_prob_[class_index],
model.named_steps["vectorizer"].get_feature_names_out(),
features.A[0],
)
if count > 0
),
reverse=True,
)
][:5],
)
import re
import numpy as np
from sklearn.pipeline import Pipeline
from great_ai.utilities import clean
from great_ai import (
MultiLabelClassificationOutput,
ClassificationOutput,
GreatAI,
use_model,
parameter,
)
@GreatAI.create
@use_model("small-domain-prediction", version="latest")
@parameter("target_confidence", validate=lambda c: 0 <= c <= 100)
def predict_domain(
text: str, model: Pipeline, target_confidence: int = 50
) -> MultiLabelClassificationOutput:
"""
Predict the scientific domain of the input text.
Return labels until their sum likelihood is larger than `target_confidence`.
"""
preprocessed = re.sub(r"[^a-zA-Z\s]", "", clean(text, convert_to_ascii=True))
features = model.named_steps["vectorizer"].transform([preprocessed])
prediction = model.named_steps["classifier"].predict_proba(features)[0]
best_classes = sorted(enumerate(prediction), key=lambda v: v[1], reverse=True)
results = MultiLabelClassificationOutput()
for class_index, probability in best_classes:
results.labels.append(
get_label(
model=model,
features=features,
class_index=class_index,
probability=probability,
)
)
if sum(r.confidence for r in results.labels) >= target_confidence:
break
return results
def get_label(
model: Pipeline, features: np.ndarray, class_index: int, probability: float
) -> ClassificationOutput:
return ClassificationOutput(
label=model.named_steps["classifier"].classes_[class_index],
confidence=round(probability * 100),
explanation=[
word
for _, word in sorted(
(
(weight, word)
for weight, word, count in zip(
model.named_steps["classifier"].feature_log_prob_[class_index],
model.named_steps["vectorizer"].get_feature_names_out(),
features.A[0],
)
if count > 0
),
reverse=True,
)
][:5],
)
Environment variable ENVIRONMENT is not set, defaulting to development mode ‼️ Cannot find credentials files, defaulting to using ParallelTinyDbDriver The selected tracing database (ParallelTinyDbDriver) is not recommended for production Cannot find credentials files, defaulting to using LargeFileLocal GreatAI (v0.1.6): configured ✅ 🔩 tracing_database: ParallelTinyDbDriver 🔩 large_file_implementation: LargeFileLocal 🔩 is_production: False 🔩 should_log_exception_stack: True 🔩 prediction_cache_size: 512 🔩 dashboard_table_size: 50 You still need to check whether you follow all best practices before trusting your deployment. > Find out more at https://se-ml.github.io/practices Fetching cached versions of small-domain-prediction Latest version of small-domain-prediction is 2 (from versions: 0, 1, 2) File small-domain-prediction-2 found in cache
Check accuracy on the test split¶
Anything under if __name__ == "__main__": will not be run when the script is executed by the great-ai CLI app. This, combined with query_ground_truth and the /traces/{trace_id}/feedback endpoint are ideal for creating a continuous-integration job for checking the quality of the model before deployment.
In [2]:
Copied!
if __name__ == "__main__":
from great_ai import query_ground_truth
from sklearn import metrics
data = query_ground_truth("test")
X = [d.input for d in data]
y_actual = [d.feedback for d in data]
y_predicted = [
d.output.labels[0].label
for d in predict_domain.process_batch(X, do_not_persist_traces=True)
]
y_actual_aligned = [p if p in a else a[0] for p, a in zip(y_predicted, y_actual)]
import matplotlib.pyplot as plt
# Configure matplotlib to have nice, high-resolution charts
%matplotlib inline
plt.rcParams["figure.figsize"] = (19, 18)
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["font.size"] = 16
plt.rcParams["axes.xmargin"] = 0
print(metrics.classification_report(y_actual_aligned, y_predicted))
metrics.ConfusionMatrixDisplay.from_predictions(
y_true=y_actual_aligned,
y_pred=y_predicted,
xticks_rotation="vertical",
normalize="pred",
values_format=".2f",
)
plt.tight_layout()
plt.savefig("ss-confusion.png", dpi=600)
if __name__ == "__main__":
from great_ai import query_ground_truth
from sklearn import metrics
data = query_ground_truth("test")
X = [d.input for d in data]
y_actual = [d.feedback for d in data]
y_predicted = [
d.output.labels[0].label
for d in predict_domain.process_batch(X, do_not_persist_traces=True)
]
y_actual_aligned = [p if p in a else a[0] for p, a in zip(y_predicted, y_actual)]
import matplotlib.pyplot as plt
# Configure matplotlib to have nice, high-resolution charts
%matplotlib inline
plt.rcParams["figure.figsize"] = (19, 18)
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["font.size"] = 16
plt.rcParams["axes.xmargin"] = 0
print(metrics.classification_report(y_actual_aligned, y_predicted))
metrics.ConfusionMatrixDisplay.from_predictions(
y_true=y_actual_aligned,
y_pred=y_predicted,
xticks_rotation="vertical",
normalize="pred",
values_format=".2f",
)
plt.tight_layout()
plt.savefig("ss-confusion.png", dpi=600)
100%|██████████| 12272/12272 [01:10<00:00, 174.77it/s]
precision recall f1-score support
Art 0.54 0.38 0.45 126
Biology 0.77 0.84 0.80 1215
Business 0.47 0.73 0.57 311
Chemistry 0.82 0.67 0.74 1205
Computer Science 0.77 0.76 0.76 1277
Economics 0.69 0.55 0.61 270
Engineering 0.55 0.52 0.53 754
Environmental Science 0.56 0.55 0.55 227
Geography 0.54 0.39 0.45 276
Geology 0.74 0.67 0.70 265
History 0.35 0.18 0.24 140
Materials Science 0.72 0.81 0.76 1011
Mathematics 0.77 0.70 0.74 498
Medicine 0.96 0.77 0.86 2835
Philosophy 0.57 0.06 0.10 71
Physics 0.66 0.75 0.70 611
Political Science 0.44 0.61 0.51 291
Psychology 0.52 0.84 0.64 574
Sociology 0.33 0.59 0.42 315
accuracy 0.71 12272
macro avg 0.62 0.60 0.59 12272
weighted avg 0.74 0.71 0.71 12272
Last update:
July 16, 2022