Hardening and deployment¶
Part 3: Create production inference function
The blue boxes show the steps implemented in this notebook.
In Part 2, we trained our AI model. Now, it's time to create General Robust End-to-end Automated Trustworthy deployment.
Create the inference function¶
Next to the prediction, we also return the top-n most influential words based on their weights.
In [1]:
Copied!
import re
import numpy as np
from sklearn.pipeline import Pipeline
from great_ai.utilities import clean
from great_ai import (
MultiLabelClassificationOutput,
ClassificationOutput,
GreatAI,
use_model,
parameter,
)
@GreatAI.create
@use_model("small-domain-prediction", version="latest")
@parameter("target_confidence", validate=lambda c: 0 <= c <= 100)
def predict_domain(
text: str, model: Pipeline, target_confidence: int = 50
) -> MultiLabelClassificationOutput:
"""
Predict the scientific domain of the input text.
Return labels until their sum likelihood is larger than `target_confidence`.
"""
preprocessed = re.sub(r"[^a-zA-Z\s]", "", clean(text, convert_to_ascii=True))
features = model.named_steps["vectorizer"].transform([preprocessed])
prediction = model.named_steps["classifier"].predict_proba(features)[0]
best_classes = sorted(enumerate(prediction), key=lambda v: v[1], reverse=True)
results = MultiLabelClassificationOutput()
for class_index, probability in best_classes:
results.labels.append(
get_label(
model=model,
features=features,
class_index=class_index,
probability=probability,
)
)
if sum(r.confidence for r in results.labels) >= target_confidence:
break
return results
def get_label(
model: Pipeline, features: np.ndarray, class_index: int, probability: float
) -> ClassificationOutput:
return ClassificationOutput(
label=model.named_steps["classifier"].classes_[class_index],
confidence=round(probability * 100),
explanation=[
word
for _, word in sorted(
(
(weight, word)
for weight, word, count in zip(
model.named_steps["classifier"].feature_log_prob_[class_index],
model.named_steps["vectorizer"].get_feature_names_out(),
features.A[0],
)
if count > 0
),
reverse=True,
)
][:5],
)
import re
import numpy as np
from sklearn.pipeline import Pipeline
from great_ai.utilities import clean
from great_ai import (
MultiLabelClassificationOutput,
ClassificationOutput,
GreatAI,
use_model,
parameter,
)
@GreatAI.create
@use_model("small-domain-prediction", version="latest")
@parameter("target_confidence", validate=lambda c: 0 <= c <= 100)
def predict_domain(
text: str, model: Pipeline, target_confidence: int = 50
) -> MultiLabelClassificationOutput:
"""
Predict the scientific domain of the input text.
Return labels until their sum likelihood is larger than `target_confidence`.
"""
preprocessed = re.sub(r"[^a-zA-Z\s]", "", clean(text, convert_to_ascii=True))
features = model.named_steps["vectorizer"].transform([preprocessed])
prediction = model.named_steps["classifier"].predict_proba(features)[0]
best_classes = sorted(enumerate(prediction), key=lambda v: v[1], reverse=True)
results = MultiLabelClassificationOutput()
for class_index, probability in best_classes:
results.labels.append(
get_label(
model=model,
features=features,
class_index=class_index,
probability=probability,
)
)
if sum(r.confidence for r in results.labels) >= target_confidence:
break
return results
def get_label(
model: Pipeline, features: np.ndarray, class_index: int, probability: float
) -> ClassificationOutput:
return ClassificationOutput(
label=model.named_steps["classifier"].classes_[class_index],
confidence=round(probability * 100),
explanation=[
word
for _, word in sorted(
(
(weight, word)
for weight, word, count in zip(
model.named_steps["classifier"].feature_log_prob_[class_index],
model.named_steps["vectorizer"].get_feature_names_out(),
features.A[0],
)
if count > 0
),
reverse=True,
)
][:5],
)
Environment variable ENVIRONMENT is not set, defaulting to development mode ‼️ Cannot find credentials files, defaulting to using ParallelTinyDbDriver The selected tracing database (ParallelTinyDbDriver) is not recommended for production Cannot find credentials files, defaulting to using LargeFileLocal GreatAI (v0.1.6): configured ✅ 🔩 tracing_database: ParallelTinyDbDriver 🔩 large_file_implementation: LargeFileLocal 🔩 is_production: False 🔩 should_log_exception_stack: True 🔩 prediction_cache_size: 512 🔩 dashboard_table_size: 50 You still need to check whether you follow all best practices before trusting your deployment. > Find out more at https://se-ml.github.io/practices Fetching cached versions of small-domain-prediction Latest version of small-domain-prediction is 2 (from versions: 0, 1, 2) File small-domain-prediction-2 found in cache
Check accuracy on the test split¶
Anything under if __name__ == "__main__":
will not be run when the script is executed by the great-ai
CLI app. This, combined with query_ground_truth
and the /traces/{trace_id}/feedback
endpoint are ideal for creating a continuous-integration job for checking the quality of the model before deployment.
In [2]:
Copied!
if __name__ == "__main__":
from great_ai import query_ground_truth
from sklearn import metrics
data = query_ground_truth("test")
X = [d.input for d in data]
y_actual = [d.feedback for d in data]
y_predicted = [
d.output.labels[0].label
for d in predict_domain.process_batch(X, do_not_persist_traces=True)
]
y_actual_aligned = [p if p in a else a[0] for p, a in zip(y_predicted, y_actual)]
import matplotlib.pyplot as plt
# Configure matplotlib to have nice, high-resolution charts
%matplotlib inline
plt.rcParams["figure.figsize"] = (19, 18)
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["font.size"] = 16
plt.rcParams["axes.xmargin"] = 0
print(metrics.classification_report(y_actual_aligned, y_predicted))
metrics.ConfusionMatrixDisplay.from_predictions(
y_true=y_actual_aligned,
y_pred=y_predicted,
xticks_rotation="vertical",
normalize="pred",
values_format=".2f",
)
plt.tight_layout()
plt.savefig("ss-confusion.png", dpi=600)
if __name__ == "__main__":
from great_ai import query_ground_truth
from sklearn import metrics
data = query_ground_truth("test")
X = [d.input for d in data]
y_actual = [d.feedback for d in data]
y_predicted = [
d.output.labels[0].label
for d in predict_domain.process_batch(X, do_not_persist_traces=True)
]
y_actual_aligned = [p if p in a else a[0] for p, a in zip(y_predicted, y_actual)]
import matplotlib.pyplot as plt
# Configure matplotlib to have nice, high-resolution charts
%matplotlib inline
plt.rcParams["figure.figsize"] = (19, 18)
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["font.size"] = 16
plt.rcParams["axes.xmargin"] = 0
print(metrics.classification_report(y_actual_aligned, y_predicted))
metrics.ConfusionMatrixDisplay.from_predictions(
y_true=y_actual_aligned,
y_pred=y_predicted,
xticks_rotation="vertical",
normalize="pred",
values_format=".2f",
)
plt.tight_layout()
plt.savefig("ss-confusion.png", dpi=600)
100%|██████████| 12272/12272 [01:10<00:00, 174.77it/s]
precision recall f1-score support Art 0.54 0.38 0.45 126 Biology 0.77 0.84 0.80 1215 Business 0.47 0.73 0.57 311 Chemistry 0.82 0.67 0.74 1205 Computer Science 0.77 0.76 0.76 1277 Economics 0.69 0.55 0.61 270 Engineering 0.55 0.52 0.53 754 Environmental Science 0.56 0.55 0.55 227 Geography 0.54 0.39 0.45 276 Geology 0.74 0.67 0.70 265 History 0.35 0.18 0.24 140 Materials Science 0.72 0.81 0.76 1011 Mathematics 0.77 0.70 0.74 498 Medicine 0.96 0.77 0.86 2835 Philosophy 0.57 0.06 0.10 71 Physics 0.66 0.75 0.70 611 Political Science 0.44 0.61 0.51 291 Psychology 0.52 0.84 0.64 574 Sociology 0.33 0.59 0.42 315 accuracy 0.71 12272 macro avg 0.62 0.60 0.59 12272 weighted avg 0.74 0.71 0.71 12272
Last update:
July 16, 2022