Patient Analysis

This notebook is responsible for analyzing the “patient” survey responses that have been collected from this survey

Questions that we want to answer

We want to know how likely it is that users will use a pickup box for prescriptions based on their age range
- We want to know if our proposed solution will have a high customer acceptance rate
We want to know what patients believe is the mean acceptable amount of time to wait to pick up prescriptions
- We want to have a benchmark by which we can determine if our solution meets the needs of customers.

Import dependencies

# import dependencies
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sci
import pandas as pd
from pathlib import Path
import uuid
from IPython.display import display, HTML

# Some formating options
%config InlineBackend.figure_formats = ['svg']

def disp(df):
    """
    Displays a dataframe as HTML

    Args:
        df (dataframe): Dataframe
    """
    display(HTML(df.to_html()))
    
    

Load patient survery data into dataframe

file_path = "./output/patient-survey-data.csv"
df = pd.read_csv(file_path)

display_all_data = False

if display_all_data:
    disp(df)

We want to know how likely it is that users will use a pickup box for prescriptions based on their age range

ages = ["Under 21", "21 - 35", "35 - 40", "40 - 64", "65 +"]
col_name = "How likely are you to use an automated pick-up locker to pick up your prescriptions?"

df_counts = pd.DataFrame()

for idx, age in enumerate(ages):
    frame = df.loc[df["What is your age range?"] == age]
    fig = plt.figure()
    counts = frame[col_name].value_counts()
    counts = counts.to_frame()
    counts.plot.barh()
    plt.suptitle(str("Count of Respondants " + age))
    plt.xlabel("Count of Respondants")
    plt.ylabel("liklihood")
    counts.rename(
        columns={col_name: age},
        inplace=True,
    )
    if idx == 0:
        df_counts = counts

    else:
        df_counts[age] = counts[age]
        df_counts.fillna(0)


df_counts = df_counts.fillna(0)
disp(df_counts)
plt.figure()
df_counts.plot.barh(figsize=(10, 6))
plt.suptitle("Total Count of Liklihood Responses by Age")
plt.xlabel("Count of Respondants")
plt.ylabel("liklihood")

	Under 21	21 - 35	35 - 40	40 - 64	65 +
Very likely	65	438	165	57	3
Somewhat likely	48	441	233	70	8
Likely	45	479	270	89	6
Not very likely	35	199	59	45	3
Never	10	33	10	16	7

Text(0, 0.5, 'liklihood')

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg

df_probs = df_counts
tot_counts = df[col_name].value_counts()
tot_counts = tot_counts.to_frame()

df_probs["All"] = tot_counts[col_name]

df_probs["All"] = df_probs["All"] / np.sum(df_probs["All"])

for age in ages:
    sum = np.sum(df_probs[age])
    df_probs[age] = df_probs[age] / sum

disp(df_probs)
plt.figure()
ax = df_probs.plot.bar(figsize=(10, 6))
plt.suptitle("Likelihood Responses by Age")
plt.ylabel("Probability")
plt.xlabel("Likelihood")

	Under 21	21 - 35	35 - 40	40 - 64	65 +	All
Very likely	0.320197	0.275472	0.223881	0.205776	0.111111	0.257052
Somewhat likely	0.236453	0.277358	0.316147	0.252708	0.296296	0.282087
Likely	0.221675	0.301258	0.366350	0.321300	0.222222	0.313822
Not very likely	0.172414	0.125157	0.080054	0.162455	0.111111	0.120240
Never	0.049261	0.020755	0.013569	0.057762	0.259259	0.026798

Text(0.5, 0, 'Likelihood')

<Figure size 432x288 with 0 Axes>

svg

plt.figure()
ax = df_probs.plot.density(figsize=(10,6))
plt.suptitle("Density of Response Probability by Age")
plt.xlabel("Probability")
plt.ylabel("Density")

Text(0, 0.5, 'Density')

<Figure size 432x288 with 0 Axes>

svg

We want to know what patients believe is the mean acceptable amount of time to wait to pick up prescriptions

So we can have a benchmark by which we can determine if our solution meets the needs of customers.

Correlate time ranges to numerical values

df_times = pd.DataFrame()
target_ideal = "What do you think is an acceptable amount of time to wait for services at your pharmacy?"
target_true = "How long do you usually wait at the pharmacy?"
min_ideal_wait_times_val = "Min ideal reported wait times"
min_actual_wait_times_val = "Min actual reported wait times"
max_ideal_wait_times_val = "Max ideal reported wait times"
max_actual_wait_times_val = "Max actual reported wait times"
df_times[min_ideal_wait_times_val] = df[target_ideal]
df_times[min_actual_wait_times_val] = df[target_true]
df_times[max_ideal_wait_times_val] = df[target_ideal]
df_times[max_actual_wait_times_val] = df[target_true]
time_ranges = [
    ["0-3 minutes", 0, 3],
    ["4-6 minutes", 4, 6],
    ["7-9 minutes", 7, 9],
    ["11-15 minutes", 11, 15],
    ["more than 15 minutes", 16, 20],
    ["More than 15 minutes", 16, 20],
    [">16 minutes", 16, 20],
]

for range in time_ranges:
    df_times.loc[
        df_times[min_ideal_wait_times_val] == range[0], min_ideal_wait_times_val
    ] = range[1]
    df_times.loc[
        df_times[min_actual_wait_times_val] == range[0], min_actual_wait_times_val
    ] = range[1]
    df_times.loc[
        df_times[max_ideal_wait_times_val] == range[0], max_ideal_wait_times_val
    ] = range[2]
    df_times.loc[
        df_times[max_actual_wait_times_val] == range[0], max_actual_wait_times_val
    ] = range[2]

mean_min_ideal = np.round(np.mean(df_times[min_ideal_wait_times_val]), 3)
mean_min_actual = np.round(np.mean(df_times[min_actual_wait_times_val]), 3)
mean_max_ideal = np.round(np.mean(df_times[max_ideal_wait_times_val]), 3)
mean_max_actual = np.round(np.mean(df_times[max_actual_wait_times_val]), 3)
stddev_mean_min_ideal = np.round(np.std(df_times[min_ideal_wait_times_val]), 3)
diff_min = abs(mean_min_ideal - mean_min_actual)
diff_max = abs(mean_max_ideal - mean_max_actual)


df_mean_times = pd.DataFrame(
    [
        [mean_min_ideal, mean_max_ideal],
        [mean_min_actual, mean_max_actual],
        [diff_min, diff_max],
    ],
    columns=["Mean Minimum (min)", "Mean Max (min)"],
    index=["Ideal Reported Wait Times", "Actual Reported Wait times", "Difference"],
)

Show Table with min and max desired times

disp(df_mean_times)

	Mean Minimum (min)	Mean Max (min)
Ideal Reported Wait Times	6.561	9.146
Actual Reported Wait times	7.675	10.472
Difference	1.114	1.326

Investigate the desity of responses

df_times = df_times.astype(int)

df_times.plot.kde(figsize=(10, 6), subplots=True)


plt.figure()
df_times.plot.hist(alpha=0.5, figsize=(10, 6))

plt.figure()
df_times.plot.box(figsize=(12, 6))

<AxesSubplot: >

svg

<Figure size 432x288 with 0 Axes>

svg

<Figure size 432x288 with 0 Axes>

svg