Patient Analysis
This notebook is responsible for analyzing the βpatientβ survey responses that have been collected from this survey
Questions that we want to answer
- We want to know how likely it is that users will use a pickup box for prescriptions based on their age range
- We want to know if our proposed solution will have a high customer acceptance rate
- We want to know what patients believe is the mean acceptable amount of time to wait to pick up prescriptions
- We want to have a benchmark by which we can determine if our solution meets the needs of customers.
Import dependencies
# import dependencies
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sci
import pandas as pd
from pathlib import Path
import uuid
from IPython.display import display, HTML
# Some formating options
%config InlineBackend.figure_formats = ['svg']
def disp(df):
"""
Displays a dataframe as HTML
Args:
df (dataframe): Dataframe
"""
display(HTML(df.to_html()))
Load patient survery data into dataframe
file_path = "./output/patient-survey-data.csv"
df = pd.read_csv(file_path)
display_all_data = False
if display_all_data:
disp(df)
We want to know how likely it is that users will use a pickup box for prescriptions based on their age range
ages = ["Under 21", "21 - 35", "35 - 40", "40 - 64", "65 +"]
col_name = "How likely are you to use an automated pick-up locker to pick up your prescriptions?"
df_counts = pd.DataFrame()
for idx, age in enumerate(ages):
frame = df.loc[df["What is your age range?"] == age]
fig = plt.figure()
counts = frame[col_name].value_counts()
counts = counts.to_frame()
counts.plot.barh()
plt.suptitle(str("Count of Respondants " + age))
plt.xlabel("Count of Respondants")
plt.ylabel("liklihood")
counts.rename(
columns={col_name: age},
inplace=True,
)
if idx == 0:
df_counts = counts
else:
df_counts[age] = counts[age]
df_counts.fillna(0)
df_counts = df_counts.fillna(0)
disp(df_counts)
plt.figure()
df_counts.plot.barh(figsize=(10, 6))
plt.suptitle("Total Count of Liklihood Responses by Age")
plt.xlabel("Count of Respondants")
plt.ylabel("liklihood")
Under 21 | 21 - 35 | 35 - 40 | 40 - 64 | 65 + | |
---|---|---|---|---|---|
Very likely | 65 | 438 | 165 | 57 | 3 |
Somewhat likely | 48 | 441 | 233 | 70 | 8 |
Likely | 45 | 479 | 270 | 89 | 6 |
Not very likely | 35 | 199 | 59 | 45 | 3 |
Never | 10 | 33 | 10 | 16 | 7 |
Text(0, 0.5, 'liklihood')
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
df_probs = df_counts
tot_counts = df[col_name].value_counts()
tot_counts = tot_counts.to_frame()
df_probs["All"] = tot_counts[col_name]
df_probs["All"] = df_probs["All"] / np.sum(df_probs["All"])
for age in ages:
sum = np.sum(df_probs[age])
df_probs[age] = df_probs[age] / sum
disp(df_probs)
plt.figure()
ax = df_probs.plot.bar(figsize=(10, 6))
plt.suptitle("Likelihood Responses by Age")
plt.ylabel("Probability")
plt.xlabel("Likelihood")
Under 21 | 21 - 35 | 35 - 40 | 40 - 64 | 65 + | All | |
---|---|---|---|---|---|---|
Very likely | 0.320197 | 0.275472 | 0.223881 | 0.205776 | 0.111111 | 0.257052 |
Somewhat likely | 0.236453 | 0.277358 | 0.316147 | 0.252708 | 0.296296 | 0.282087 |
Likely | 0.221675 | 0.301258 | 0.366350 | 0.321300 | 0.222222 | 0.313822 |
Not very likely | 0.172414 | 0.125157 | 0.080054 | 0.162455 | 0.111111 | 0.120240 |
Never | 0.049261 | 0.020755 | 0.013569 | 0.057762 | 0.259259 | 0.026798 |
Text(0.5, 0, 'Likelihood')
<Figure size 432x288 with 0 Axes>
plt.figure()
ax = df_probs.plot.density(figsize=(10,6))
plt.suptitle("Density of Response Probability by Age")
plt.xlabel("Probability")
plt.ylabel("Density")
Text(0, 0.5, 'Density')
<Figure size 432x288 with 0 Axes>
We want to know what patients believe is the mean acceptable amount of time to wait to pick up prescriptions
So we can have a benchmark by which we can determine if our solution meets the needs of customers.
Correlate time ranges to numerical values
df_times = pd.DataFrame()
target_ideal = "What do you think is an acceptable amount of time to wait for services at your pharmacy?"
target_true = "How long do you usually wait at the pharmacy?"
min_ideal_wait_times_val = "Min ideal reported wait times"
min_actual_wait_times_val = "Min actual reported wait times"
max_ideal_wait_times_val = "Max ideal reported wait times"
max_actual_wait_times_val = "Max actual reported wait times"
df_times[min_ideal_wait_times_val] = df[target_ideal]
df_times[min_actual_wait_times_val] = df[target_true]
df_times[max_ideal_wait_times_val] = df[target_ideal]
df_times[max_actual_wait_times_val] = df[target_true]
time_ranges = [
["0-3 minutes", 0, 3],
["4-6 minutes", 4, 6],
["7-9 minutes", 7, 9],
["11-15 minutes", 11, 15],
["more than 15 minutes", 16, 20],
["More than 15 minutes", 16, 20],
[">16 minutes", 16, 20],
]
for range in time_ranges:
df_times.loc[
df_times[min_ideal_wait_times_val] == range[0], min_ideal_wait_times_val
] = range[1]
df_times.loc[
df_times[min_actual_wait_times_val] == range[0], min_actual_wait_times_val
] = range[1]
df_times.loc[
df_times[max_ideal_wait_times_val] == range[0], max_ideal_wait_times_val
] = range[2]
df_times.loc[
df_times[max_actual_wait_times_val] == range[0], max_actual_wait_times_val
] = range[2]
mean_min_ideal = np.round(np.mean(df_times[min_ideal_wait_times_val]), 3)
mean_min_actual = np.round(np.mean(df_times[min_actual_wait_times_val]), 3)
mean_max_ideal = np.round(np.mean(df_times[max_ideal_wait_times_val]), 3)
mean_max_actual = np.round(np.mean(df_times[max_actual_wait_times_val]), 3)
stddev_mean_min_ideal = np.round(np.std(df_times[min_ideal_wait_times_val]), 3)
diff_min = abs(mean_min_ideal - mean_min_actual)
diff_max = abs(mean_max_ideal - mean_max_actual)
df_mean_times = pd.DataFrame(
[
[mean_min_ideal, mean_max_ideal],
[mean_min_actual, mean_max_actual],
[diff_min, diff_max],
],
columns=["Mean Minimum (min)", "Mean Max (min)"],
index=["Ideal Reported Wait Times", "Actual Reported Wait times", "Difference"],
)
Show Table with min
and max
desired times
disp(df_mean_times)
Mean Minimum (min) | Mean Max (min) | |
---|---|---|
Ideal Reported Wait Times | 6.561 | 9.146 |
Actual Reported Wait times | 7.675 | 10.472 |
Difference | 1.114 | 1.326 |
Investigate the desity of responses
df_times = df_times.astype(int)
df_times.plot.kde(figsize=(10, 6), subplots=True)
plt.figure()
df_times.plot.hist(alpha=0.5, figsize=(10, 6))
plt.figure()
df_times.plot.box(figsize=(12, 6))
<AxesSubplot: >
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>