Closed LI-Yixuan closed 3 years ago
import numpy as np import pandas as pd import dabest import pylab import matplotlib.pyplot as plt
from scipy.stats import norm np.random.seed(9999) Ns = 20 # The number of samples taken from each population
c1 = norm.rvs(loc=3, scale=0.4, size=Ns) c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns) c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns) t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns) t3 = norm.rvs(loc=3, scale=0.75, size=Ns) t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns) t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns) t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
females = np.repeat('Female', Ns/2).tolist() males = np.repeat('Male', Ns/2).tolist() gender = females + males
id_col = pd.Series(range(1, Ns+1))
df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1, 'Control 2' : c2, 'Test 2' : t2, 'Control 3' : c3, 'Test 3' : t3, 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6, 'Gender' : gender, 'ID' : id_col })
shared_control = dabest.load(df , idx=(("Control 1", "Test 1", "Test 2","Test 3"), ("Control 2", "Test 4","Test 5", "Test 6") ))
shared_control.mean_diff.plot(color_col="Gender"); pylab.show()
baseline = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1", "Test 2","Test 3"), ("Control 2", "Test 4","Test 5", "Test 6") ), repeated_measures="baseline")
baseline.mean_diff.plot(color_col="Gender"); pylab.show()
sequential = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1", "Test 2","Test 3"), ("Control 2", "Test 4","Test 5", "Test 6") ), repeated_measures="sequential")
sequential.mean_diff.plot(color_col="Gender"); pylab.show()
I'll take a look soon; just want to know how this is different from #112?
I'll take a look soon; just want to know how this is different from #112?
They are generally the same except that paired
and is_paired
are removed in this version, only repeated_measures is used. And the slopeplot is implemented for both baseline and sequential repeated measures.
They are generally the same except that
paired
andis_paired
are removed in this version, only repeated_measures is used.
So paired
as a keyword is totally gone? This means that current paired plots no longer function?
They are generally the same except that
paired
andis_paired
are removed in this version, only repeated_measures is used.So
paired
as a keyword is totally gone? This means that current paired plots no longer function?
Yep in this version I only have repeated_measures as a parameter in load(), because paired is kind of a duplicate of repeated_measures; is_paired
is replaced by repeated_measures
: instead of checking if is_paired
is True, currently the code checks for repeated_measures
is not None.
Paired plots still work but need to be specified by repeated_measures
= "sequential" or repeated_measures
= "baseline"
They are generally the same except that
paired
andis_paired
are removed in this version, only repeated_measures is used.So
paired
as a keyword is totally gone? This means that current paired plots no longer function?They are generally the same except that
paired
andis_paired
are removed in this version, only repeated_measures is used.So
paired
as a keyword is totally gone? This means that current paired plots no longer function?Yep in this version I only have repeated_measures as a parameter in load(), because paired is kind of a duplicate of repeated_measures;
is_paired
is replaced byrepeated_measures
: instead of checking ifis_paired
is True, currently the code checks forrepeated_measures
is not None. Paired plots still work but need to be specified byrepeated_measures
= "sequential" orrepeated_measures
= "baseline"
Hmmm or perhaps I could propose another way of implementation:
paired
and repeated_measures
is_paired
: is_paired
is not a boolean representation of paired
in load(), but rather a representation of whether the comparisons of data are paired; is_paired
= True when 1) paired=True 2) repeated_measures = 'sequential' 3) repeated_measures='baseline'Will this be better?
In this version, repeated_measures
is completely replaced by paired
/is_paired
and paired
= "baseline" now produces a multi-paired plot. A demo is here:
import numpy as np
import pandas as pd
import dabest
import pylab
import matplotlib.pyplot as plt
from scipy.stats import norm
np.random.seed(9999) # Fix the seed so the results are replicable.
# pop_size = 10000 # Size of each population.
Ns = 20 # The number of samples taken from each population
c1 = norm.rvs(loc=3, scale=0.4, size=Ns)
c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)
c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)
t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)
t3 = norm.rvs(loc=3, scale=0.75, size=Ns)
t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)
t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
females = np.repeat('Female', Ns/2).tolist()
males = np.repeat('Male', Ns/2).tolist()
gender = females + males
id_col = pd.Series(range(1, Ns+1))
df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,
'Control 2' : c2, 'Test 2' : t2,
'Control 3' : c3, 'Test 3' : t3,
'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,
'Gender' : gender, 'ID' : id_col
})
sequential = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1",
"Test 2","Test 3"),
("Control 2", "Test 4","Test 5", "Test 6")
), paired = "sequential")
sequential.mean_diff.plot(color_col="Gender");
pylab.show()
baseline = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1",
"Test 2","Test 3"),
("Control 2", "Test 4","Test 5", "Test 6")
), paired = "baseline")
baseline.mean_diff.plot(color_col="Gender");
pylab.show()
baseline.mean_diff.plot(color_col="Gender", show_pairs = False);
pylab.show()
# example of shared-control plot
shared = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1",
"Test 2","Test 3"),
("Control 2", "Test 4","Test 5", "Test 6")
))
shared.mean_diff.plot(color_col="Gender");
pylab.show()
Here is an example using 0to2_beforeduringafter.csv data:
import numpy as np
import pandas as pd
import dabest
import pylab
import matplotlib.pyplot as plt
data = pd.read_csv("0to2_beforeduringafter.csv").dropna()
data = data.rename(columns={'120beforeFeedSpeed_mm/s_Mean':"before",
'duringFeedSpeed_mm/s_Mean':"during",
'120afterFeedSpeed_mm/s_Mean':"after"})
# example of sequential repeated measures
sequential = dabest.load(data, id_col = 'ChamberID', idx=("before", "during", "after"),
paired = "sequential")
sequential.mean_diff.plot(color_col="Sex");
pylab.show()
# example of baseline repeated measures
baseline = dabest.load(data, id_col = 'ChamberID', idx=("before", "during", "after"),
paired = "baseline")
baseline.mean_diff.plot(color_col="Sex");
pylab.show()
baseline.mean_diff.plot(color_col="Sex", show_pairs = False, raw_marker_size=1.7);
pylab.show()
# example of shared-control plot
shared = dabest.load(data, id_col = 'ChamberID', idx=("before", "during", "after"))
shared.mean_diff.plot(color_col="Sex", raw_marker_size=1.7);
pylab.show()
I'm sorry, this doesn't fit with our needs. I would prefer to revert to the previous functionality.
I'm sorry, this doesn't fit with our needs. I would prefer to revert to the previous functionality.
Actually Adam, I think this might be a more functional/ergonomic design of the API. Let me take a closer look!
Example of the lastest rm baseline plot:
import numpy as np
import pandas as pd
import dabest
import pylab
import matplotlib.pyplot as plt
from scipy.stats import norm
np.random.seed(9999) # Fix the seed so the results are replicable.
# pop_size = 10000 # Size of each population.
Ns = 20 # The number of samples taken from each population
# Create samples
c1 = norm.rvs(loc=3, scale=0.4, size=Ns)
c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)
c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)
t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)
t3 = norm.rvs(loc=3, scale=0.75, size=Ns)
t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)
t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
# Add a `gender` column for coloring the data.
females = np.repeat('Female', Ns/2).tolist()
males = np.repeat('Male', Ns/2).tolist()
gender = females + males
# Add an `id` column for paired data plotting.
id_col = pd.Series(range(1, Ns+1))
# Combine samples and gender into a DataFrame.
df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,
'Control 2' : c2, 'Test 2' : t2,
'Control 3' : c3, 'Test 3' : t3,
'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,
'Gender' : gender, 'ID' : id_col
})
# example of baseline repeated measures
baseline = dabest.load(df, id_col = "ID", idx=(("Control 1", "Test 1",
"Test 2","Test 3"),
("Control 2", "Test 4","Test 5", "Test 6")
), paired = "baseline")
baseline.mean_diff.plot(color_col="Gender");
pylab.show()
baseline.mean_diff.plot(color_col="Gender", show_pairs = False);
pylab.show()