Finding Causes for Disease
import pandas as pd
import numpy as np
np.random.seed(42) # Set seed for reproducibility
num_samples = 5000 # Increased number of samples
data = []
for i in range(num_samples):
mean_radius = np.random.uniform(10, 25)
mean_texture = np.random.uniform(10, 25)
# Make the target variable dependent on the feature values
if mean_radius > 18 and mean_texture > 18:
target = 1
else:
target = 0
sample = {
"index": i,
"mean radius": mean_radius,
"mean texture": mean_texture,
"mean perimeter": np.random.uniform(70, 150) if target == 0 else np.random.uniform(100, 200),
"mean area": np.random.uniform(300, 1500) if target == 0 else np.random.uniform(1000, 2500),
"mean smoothness": np.random.uniform(0.05, 0.15) if target == 0 else np.random.uniform(0.1, 0.2),
# ... (rest of the features)
"target": target
}
data.append(sample)
df = pd.DataFrame(data)
display(df)
Last updated