-
-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #43 from serengil/feat-task-split-unit-tests
modular unit tests
Showing
25 changed files
with
550 additions
and
427 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
test: | ||
cd tests && python global-unit-test.py | ||
cd tests && python -m pytest . -s --disable-warnings | ||
|
||
lint: | ||
python -m pylint chefboost/ --fail-under=10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_adaboost.py") | ||
|
||
|
||
def test_adaboost(): | ||
config = { | ||
"algorithm": "Regression", | ||
"enableAdaboost": True, | ||
"num_of_weak_classifier": 10, | ||
"enableParallelism": False, | ||
} | ||
df = pd.read_csv("dataset/adaboost.txt") | ||
validation_df = df.copy() | ||
|
||
model = cb.fit(df, config, validation_df=validation_df, silent=True) | ||
|
||
instance = [4, 3.5] | ||
|
||
prediction = cb.predict(model, instance) | ||
|
||
assert prediction == -1 | ||
assert len(model["trees"]) > 1 | ||
|
||
logger.info("✅ adaboost model restoration test done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_c45.py") | ||
|
||
|
||
def test_c45_for_nominal_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, config={"algorithm": "C4.5"}, silent=True) | ||
assert model["config"]["algorithm"] == "C4.5" | ||
logger.info("✅ build c4.5 for nominal and numeric features and nominal target test done") | ||
|
||
def test_c45_for_nominal_and_numeric_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf2.txt") | ||
model = cb.fit(df, config={"algorithm": "C4.5"}, silent=True) | ||
assert model["config"]["algorithm"] == "C4.5" | ||
logger.info("✅ build c4.5 for nominal and numeric features and nominal target test done") | ||
|
||
def test_large_dataset(): | ||
df = pd.read_csv("dataset/car.data") | ||
model = cb.fit(df, config={"algorithm": "C4.5"}, silent=True) | ||
assert model["config"]["algorithm"] == "C4.5" | ||
logger.info("✅ build c4.5 for large dataset test done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_cart.py") | ||
|
||
|
||
def test_cart_for_nominal_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, config={"algorithm": "CART"}, silent=True) | ||
assert model["config"]["algorithm"] == "CART" | ||
logger.info("✅ build cart for nominal and numeric features and nominal target test done") | ||
|
||
|
||
def test_cart_for_nominal_and_numeric_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf2.txt") | ||
model = cb.fit(df, config={"algorithm": "CART"}, silent=True) | ||
assert model["config"]["algorithm"] == "CART" | ||
logger.info("✅ build cart for nominal and numeric features and nominal target test done") | ||
|
||
def test_large_dataset(): | ||
df = pd.read_csv("dataset/car.data") | ||
model = cb.fit(df, config={"algorithm": "CART"}, silent=True) | ||
assert model["config"]["algorithm"] == "CART" | ||
logger.info("✅ build c4.5 for large dataset test done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_c45.py") | ||
|
||
|
||
def test_c45_for_nominal_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, config={"algorithm": "CHAID"}, silent=True) | ||
assert model["config"]["algorithm"] == "CHAID" | ||
logger.info("✅ build chaid for nominal features and nominal target test done") | ||
|
||
|
||
def test_c45_for_nominal_and_numeric_features_and_nominal_target(): | ||
df = pd.read_csv("dataset/golf2.txt") | ||
model = cb.fit(df, config={"algorithm": "CHAID"}, silent=True) | ||
assert model["config"]["algorithm"] == "CHAID" | ||
logger.info("✅ build chaid for nominal and numeric features and nominal target test done") | ||
|
||
|
||
def test_large_dataset(): | ||
df = pd.read_csv("dataset/car.data") | ||
model = cb.fit(df, config={"algorithm": "CHAID"}, silent=True) | ||
assert model["config"]["algorithm"] == "CHAID" | ||
logger.info("✅ build c4.5 for large dataset test done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_gbm.py") | ||
|
||
|
||
def test_gbm_regression(): | ||
config = { | ||
"algorithm": "Regression", | ||
"enableGBM": True, | ||
"epochs": 10, | ||
"learning_rate": 1, | ||
} | ||
|
||
df = pd.read_csv("dataset/golf4.txt") | ||
validation_df = pd.read_csv("dataset/golf4.txt") | ||
|
||
model = cb.fit(df, config, validation_df=validation_df, silent=True) | ||
assert model["config"]["algorithm"] == "Regression" | ||
assert len(model["trees"]) > 1 | ||
|
||
features = ["Sunny", 85, 85, "Weak"] | ||
target = 25 | ||
prediction = cb.predict(model, features) | ||
assert abs(prediction - target) < 1 | ||
|
||
|
||
def test_gbm_classification(): | ||
config = { | ||
"algorithm": "ID3", | ||
"enableGBM": True, | ||
"epochs": 10, | ||
"learning_rate": 1, | ||
} | ||
|
||
df = pd.read_csv( | ||
"dataset/iris.data", | ||
names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"], | ||
) | ||
validation_df = df.copy() | ||
|
||
model = cb.fit(df, config, validation_df=validation_df, silent=True) | ||
|
||
instance = [7.0, 3.2, 4.7, 1.4] | ||
target = "Iris-versicolor" | ||
prediction = cb.predict(model, instance) | ||
assert prediction == target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_id3.py") | ||
|
||
|
||
def test_build_id3_with_no_config(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, silent=True) | ||
assert model["config"]["algorithm"] == "ID3" | ||
logger.info("✅ standard id3 test done") | ||
|
||
|
||
def test_build_id3_with_internal_validation_df(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
validation_df = pd.read_csv("dataset/golf.txt") | ||
|
||
model = cb.fit(df, validation_df=validation_df, silent=True) | ||
|
||
assert model["config"]["algorithm"] == "ID3" | ||
|
||
validation_eval_results = model["evaluation"]["validation"] | ||
|
||
assert validation_eval_results.get("Accuracy", 0) > 99 | ||
assert validation_eval_results.get("Precision", 0) > 99 | ||
assert validation_eval_results.get("Recall", 0) > 99 | ||
assert validation_eval_results.get("F1", 0) > 99 | ||
assert validation_eval_results.get("Instances", 0) == validation_df.shape[0] | ||
assert "Confusion matrix" in validation_eval_results.keys() | ||
assert "Labels" in validation_eval_results.keys() | ||
|
||
# decision_rules = model["trees"][0].__dict__["__name__"]+".py" | ||
decision_rules = model["trees"][0].__dict__["__spec__"].origin | ||
|
||
fi_df = cb.feature_importance(decision_rules, silent=True) | ||
assert fi_df.shape[0] == 4 | ||
|
||
logger.info("✅ id3 test with internal validation data frame done") | ||
|
||
|
||
def test_build_id3_with_external_validation_set(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, silent=True) | ||
|
||
assert model["config"]["algorithm"] == "ID3" | ||
|
||
validation_df = pd.read_csv("dataset/golf.txt") | ||
results = cb.evaluate(model, validation_df, silent=True) | ||
|
||
assert results.get("Accuracy", 0) > 99 | ||
assert results.get("Precision", 0) > 99 | ||
assert results.get("Recall", 0) > 99 | ||
assert results.get("F1", 0) > 99 | ||
assert results.get("Instances", 0) == validation_df.shape[0] | ||
assert "Confusion matrix" in results.keys() | ||
assert "Labels" in results.keys() | ||
|
||
logger.info("✅ id3 test with external validation data frame done") | ||
|
||
|
||
def test_model_restoration(): | ||
df = pd.read_csv("dataset/golf.txt") | ||
model = cb.fit(df, silent=True) | ||
assert model["config"]["algorithm"] == "ID3" | ||
|
||
cb.save_model(model) | ||
|
||
restored_model = cb.load_model("model.pkl") | ||
|
||
assert restored_model["config"]["algorithm"] == "ID3" | ||
|
||
instance = ["Sunny", "Hot", "High", "Weak"] | ||
|
||
prediction = cb.predict(restored_model, instance) | ||
assert prediction == "No" | ||
|
||
logger.info("✅ id3 model restoration test done") | ||
|
||
|
||
def test_build_id3_for_nominal_and_numeric_features_nominal_target(): | ||
df = pd.read_csv("dataset/golf2.txt") | ||
model = cb.fit(df, silent=True) | ||
|
||
assert model["config"]["algorithm"] == "ID3" | ||
|
||
instance = ["Sunny", 85, 85, "Weak"] | ||
prediction = cb.predict(model, instance) | ||
assert prediction == "No" | ||
logger.info("✅ build id3 for nominal and numeric features and nominal target test done") | ||
|
||
|
||
def test_large_data_set(): | ||
df = pd.read_csv("dataset/car.data") | ||
model = cb.fit(df, silent=True) | ||
|
||
assert model["config"]["algorithm"] == "ID3" | ||
|
||
instance = ["vhigh", "vhigh", 2, "2", "small", "low"] | ||
prediction = cb.predict(model, instance) | ||
assert prediction == "unacc" | ||
|
||
instance = ["high", "high", "4", "more", "big", "high"] | ||
prediction = cb.predict(model, instance) | ||
assert prediction == "acc" | ||
|
||
|
||
def test_iris_dataset(): | ||
df = pd.read_csv( | ||
"dataset/iris.data", | ||
names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"], | ||
) | ||
model = cb.fit(df, silent=True) | ||
assert model["config"]["algorithm"] == "ID3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_randomforest.py") | ||
|
||
|
||
def test_randomforest_for_classification(): | ||
config = { | ||
"algorithm": "ID3", | ||
"enableRandomForest": True, | ||
"num_of_trees": 3, | ||
} | ||
df = pd.read_csv("dataset/car.data") | ||
|
||
model = cb.fit(df, config, silent=True) | ||
|
||
assert model["config"]["algorithm"] == "ID3" | ||
assert model["evaluation"]["train"]["Accuracy"] > 90 | ||
|
||
# feature importance | ||
decision_rules = [] | ||
for tree in model["trees"]: | ||
decision_rule = tree.__dict__["__spec__"].origin | ||
decision_rules.append(decision_rule) | ||
|
||
df = cb.feature_importance(decision_rules, silent=True) | ||
assert df.shape[0] == 6 | ||
|
||
# this is not in train data | ||
instance = ["high", "high", 4, "more", "big", "high"] | ||
prediction = cb.predict(model, instance) | ||
assert prediction in ["unacc", "acc"] | ||
|
||
instance = ["vhigh", "vhigh", 2, "2", "small", "low"] | ||
prediction = cb.predict(model, instance) | ||
assert prediction in ["unacc", "acc"] | ||
|
||
|
||
def test_randomforest_for_regression(): | ||
config = { | ||
"algorithm": "ID3", | ||
"enableRandomForest": True, | ||
"num_of_trees": 5, | ||
} | ||
df = pd.read_csv("dataset/car_reg.data") | ||
model = cb.fit(df, config, silent=True) | ||
|
||
assert model["evaluation"]["train"]["MAE"] < 30 | ||
assert model["config"]["algorithm"] == "Regression" | ||
|
||
instance = ["high", "high", 4, "more", "big", "high"] | ||
target = 100 | ||
prediction = cb.predict(model, instance) | ||
assert abs(prediction - target) < 30 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import pandas as pd | ||
from chefboost import Chefboost as cb | ||
from chefboost.commons.logger import Logger | ||
|
||
logger = Logger(module="tests/test_regression.py") | ||
|
||
|
||
def test_c45_for_nominal_features_and_numeric_target(): | ||
df = pd.read_csv("dataset/golf3.txt") | ||
_ = cb.fit(df, config={"algorithm": "Regression"}, silent=True) | ||
logger.info("✅ build regression for nominal features and numeric target test done") | ||
|
||
|
||
def test_c45_for_nominal_and_numeric_features_and_numeric_target(): | ||
df = pd.read_csv("dataset/golf4.txt") | ||
_ = cb.fit(df, config={"algorithm": "Regression"}, silent=True) | ||
logger.info( | ||
"✅ build regression tree for nominal and numeric features and numeric target test done" | ||
) | ||
|
||
|
||
def test_switching_to_regression_tree(): | ||
df = pd.read_csv("dataset/golf4.txt") | ||
config = {"algorithm": "ID3"} | ||
model = cb.fit(df, config, silent=True) | ||
assert model["config"]["algorithm"] == "Regression" | ||
logger.info("✅ switching to regression tree test done") |