Randomized-Search

Sun 29 June 2025

import pyutil as pyu

pyu.get_local_pyinfo()

'conda env: C:\\Users\\Afia Jahan\\anaconda3\\envs\\py312; pyv: 3.12.11 | packaged by Anaconda, Inc. | (main, Jun  5 2025, 12:58:53) [MSC v.1929 64 bit (AMD64)]'

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from scipy.stats import randint

# Load the Titanic dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
data = pd.read_csv(url)

# Preprocessing
# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = data[features]
y = data['Survived']

# Handle missing values
X.loc[:, 'Age'] = X['Age'].fillna(X['Age'].median())
X.loc[:, 'Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])

# Convert categorical variables to numerical
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = RandomForestClassifier(random_state=42)

# Define the hyperparameter distributions
param_distributions = {
    'n_estimators': randint(100, 500),       # Randomly choose from 100 to 500
    'max_depth': randint(5, 20),            # Randomly choose from 5 to 20
    'min_samples_split': randint(2, 10),    # Randomly choose from 2 to 10
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_distributions,
    n_iter=50,       # Number of random combinations to try
    cv=5,            # 5-fold cross-validation
    scoring='accuracy',
    random_state=42,
    n_jobs=-1        # Use all available cores
)
random_search.fit(X_train, y_train)

RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
                   n_iter=50, n_jobs=-1,
                   param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000173E76B6DE0>,
                                        'min_samples_split': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000173E79D85C0>,
                                        'n_estimators': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000173E768D610>},
                   random_state=42, scoring='accuracy')

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

RandomizedSearchCV

?Documentation for RandomizedSearchCViFitted

Parameters

	estimator	RandomForestC...ndom_state=42)
	param_distributions	{'max_depth': <scipy.stats....00173E76B6DE0>, 'min_samples_split': <scipy.stats....00173E79D85C0>, 'n_estimators': <scipy.stats....00173E768D610>}
	n_iter	50
	scoring	'accuracy'
	n_jobs	-1
	refit	True
	cv	5
	verbose	0
	pre_dispatch	'2*n_jobs'
	random_state	42
	error_score	nan
	return_train_score	False

best_estimator_: RandomForestClassifier

RandomForestClassifier(max_depth=7, n_estimators=491, random_state=42)

RandomForestClassifier

?Documentation for RandomForestClassifier

Parameters

	n_estimators	491
	criterion	'gini'
	max_depth	7
	min_samples_split	2
	min_samples_leaf	1
	min_weight_fraction_leaf	0.0
	max_features	'sqrt'
	max_leaf_nodes	None
	min_impurity_decrease	0.0
	bootstrap	True
	oob_score	False
	n_jobs	None
	random_state	42
	verbose	0
	warm_start	False
	class_weight	None
	ccp_alpha	0.0
	max_samples	None
	monotonic_cst	None

# Print best parameters and score
print("Best Parameters:", random_search.best_params_)
print("Best Score:", random_search.best_score_)

Best Parameters: {'max_depth': 7, 'min_samples_split': 2, 'n_estimators': 491}
Best Score: 0.8342263370432385

# Evaluate on the test set
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))# Evaluate on the test set

Test Accuracy: 0.8156424581005587

Score: 10

Category: basics