Finding Causes for Disease

import pandas as pd
import numpy as np

np.random.seed(42)  # Set seed for reproducibility

num_samples = 5000  # Increased number of samples

data = []
for i in range(num_samples):
    mean_radius = np.random.uniform(10, 25)
    mean_texture = np.random.uniform(10, 25)
    
    # Make the target variable dependent on the feature values
    if mean_radius > 18 and mean_texture > 18:
        target = 1
    else:
        target = 0
    
    sample = {
        "index": i,
        "mean radius": mean_radius,
        "mean texture": mean_texture,
        "mean perimeter": np.random.uniform(70, 150) if target == 0 else np.random.uniform(100, 200),
        "mean area": np.random.uniform(300, 1500) if target == 0 else np.random.uniform(1000, 2500),
        "mean smoothness": np.random.uniform(0.05, 0.15) if target == 0 else np.random.uniform(0.1, 0.2),
        # ... (rest of the features)
        "target": target
    }
    data.append(sample)

df = pd.DataFrame(data)

display(df)

Last updated