traitecoevo / austraits.build

Source for AusTraits
Other
16 stars 2 forks source link

Condense min, max, mean traits into single trait name #62

Closed dfalster closed 8 years ago

dfalster commented 8 years ago

currently traits like leaf_width, leaf_width_min, leaf_width_max would be treated as different traits. These should really be one trait with value_type set as mean, min, max

dfalster commented 8 years ago

here's the current list of traits:

> x <- remake::make("austraits")
> sort(unique(x$data$trait_name))
  [1] "aquatic"                                    
  [2] "bark_mass_area"                             
  [3] "bark_thickness"                             
  [4] "branch_mass_fraction"                       
  [5] "diaspore_mass"                              
  [6] "dispersal_appendage"                        
  [7] "dispersal_syndrome"                         
  [8] "fire_response"                              
  [9] "flowering_month"                            
 [10] "flowering_month_end"                        
 [11] "flowering_month_start"                      
 [12] "flowering_time"                             
 [13] "fruit_diameter"                             
 [14] "fruit_length"                               
 [15] "fruit_type"                                 
 [16] "fruit_type_botany"                          
 [17] "fruit_type_function"                        
 [18] "fruiting_time"                              
 [19] "genome_size"                                
 [20] "glaucous"                                   
 [21] "growth_habit"                               
 [22] "leaf_area"                                  
 [23] "leaf_area_per_sapwood_area"                 
 [24] "leaf_area_ratio"                            
 [25] "leaf_C_per_dry_mass"                        
 [26] "leaf_cell_wall_fraction"                    
 [27] "leaf_cell_wall_N"                           
 [28] "leaf_cell_wall_N_fraction"                  
 [29] "leaf_chlorophyll_per_dry_mass"              
 [30] "leaf_CN_ratio"                              
 [31] "leaf_compoundness"                          
 [32] "leaf_dark_respiration_per_area"             
 [33] "leaf_dark_respiration_per_dry_mass"         
 [34] "leaf_delta13C"                              
 [35] "leaf_delta15N"                              
 [36] "leaf_dry_mass"                              
 [37] "leaf_dry_matter_content"                    
 [38] "leaf_elastic_modulus"                       
 [39] "leaf_fracture_force"                        
 [40] "leaf_K_per_area"                            
 [41] "leaf_K_per_dry_mass"                        
 [42] "leaf_length"                                
 [43] "leaf_length_max"                            
 [44] "leaf_length_max_extreme"                    
 [45] "leaf_length_min"                            
 [46] "leaf_length_min_extreme"                    
 [47] "leaf_length_range"                          
 [48] "leaf_lifespan"                              
 [49] "leaf_lobed"                                 
 [50] "leaf_mass_fraction"                         
 [51] "leaf_mass_per_area_SE"                      
 [52] "leaf_N_per_area"                            
 [53] "leaf_N_per_dry_mass"                        
 [54] "leaf_P_per_area"                            
 [55] "leaf_P_per_dry_mass"                        
 [56] "leaf_phenology"                             
 [57] "leaf_photosynthetic_nitrogen_use_efficiency"
 [58] "leaf_photosynthetic_water_use_efficiency"   
 [59] "leaf_saturated_water_content_per_mass"      
 [60] "leaf_shape"                                 
 [61] "leaf_specific_conductivity"                 
 [62] "leaf_thickness"                             
 [63] "leaf_toughness"                             
 [64] "leaf_type"                                  
 [65] "leaf_water_content_per_area"                
 [66] "leaf_water_content_per_mass"                
 [67] "leaf_width"                                 
 [68] "leaf_width_max"                             
 [69] "leaf_width_max_extreme"                     
 [70] "leaf_width_min"                             
 [71] "leaf_width_min_extreme"                     
 [72] "leaf_width_range"                           
 [73] "life_history"                               
 [74] "lifespan"                                   
 [75] "lignotuber"                                 
 [76] "lineage"                                    
 [77] "nitrogen_fixing"                            
 [78] "photosynthetic_pathway"                     
 [79] "photosynthetic_rate_per_area"               
 [80] "photosynthetic_rate_per_dry_mass"           
 [81] "plant_growth_form"                          
 [82] "plant_height"                               
 [83] "plant_height_max"                           
 [84] "plant_height_max_extreme"                   
 [85] "plant_height_min"                           
 [86] "plant_height_min_extreme"                   
 [87] "regen_strategy"                             
 [88] "root_structure"                             
 [89] "root_wood_density"                          
 [90] "sapwood_specific_conductivity"              
 [91] "seed_breadth"                               
 [92] "seed_breadth_max"                           
 [93] "seed_breadth_min"                           
 [94] "seed_breadth_range"                         
 [95] "seed_depth_range"                           
 [96] "seed_diameter"                              
 [97] "seed_length"                                
 [98] "seed_length_max"                            
 [99] "seed_length_min"                            
[100] "seed_length_range"                          
[101] "seed_mass"                                  
[102] "seed_mass_reserve"                          
[103] "seed_P_concentration"                       
[104] "seed_shape"                                 
[105] "seed_size_less_than_10mm"                   
[106] "seed_size_over_10mm"                        
[107] "seed_volume"                                
[108] "seed_width"                                 
[109] "seed_width_max"                             
[110] "seed_width_min"                             
[111] "serotiny"                                   
[112] "specific_leaf_area"                         
[113] "stem_cross_section_area"                    
[114] "stem_cross_section_area_no_bark"            
[115] "stem_tissue_density"                        
[116] "stomatal_conductance_per_area"              
[117] "storage_organ_present"                      
[118] "succulent"                                  
[119] "water_use_efficiency"                       
[120] "wood_density"                               
[121] "wood_density_family"                        
[122] "wood_density_genus"                         
[123] "woodiness"  
dfalster commented 8 years ago

And hers the current ones with min, max:

> vars <- sort(unique(x$data$trait_name))
> vars[grep("max", vars)]
[1] "leaf_length_max"          "leaf_length_max_extreme" 
[3] "leaf_width_max"           "leaf_width_max_extreme"  
[5] "plant_height_max"         "plant_height_max_extreme"
[7] "seed_breadth_max"         "seed_length_max"         
[9] "seed_width_max"          
> vars[grep("min", vars)]
[1] "leaf_length_min"          "leaf_length_min_extreme" 
[3] "leaf_width_min"           "leaf_width_min_extreme"  
[5] "plant_height_min"         "plant_height_min_extreme"
[7] "seed_breadth_min"         "seed_length_min"         
[9] "seed_width_min" 
dfalster commented 8 years ago

Ok, the above commit: Adds columns from metadata.yml into output: "value_type", "replicates", "precision", "methodology_ids"

Implement changes to combine any traits with extensions _min, _max, _max_extreme, _min_extreme to now have suitable value_type

The following conversions were implemented _min -> min _max -> max

except in studies containing _max_extreme, _min_extreme, then _min -> lower_quantile _min_extreme -> min _max -> upper_quantile _max_extreme -> max

dfalster commented 8 years ago

Here is the distribution of value types (that are not "unkown") in the new dataset:

> library(dplyr)
> x <- remake::make("austraits")$data
> xx <- filter(x, value_type!="unknown") %>%
+   group_by(study, trait_name) %>%
+   count(value_type) 
> as.data.frame(xx)
            study   trait_name     value_type    n
1     dataset_013 plant_height            max   76
2     dataset_024 plant_height            max  568
3     dataset_029 plant_height            max  494
4     dataset_049  leaf_length            max 2609
5     dataset_049  leaf_length            min 2609
6     dataset_049   leaf_width            max 2609
7     dataset_049   leaf_width            min 2609
8     dataset_049  seed_length            max 2595
9     dataset_049  seed_length            min 2593
10    dataset_049   seed_width            max 2592
11    dataset_049   seed_width            min 2590
12  dataset_054_A  leaf_length            max  615
13  dataset_054_A  leaf_length            min  593
14  dataset_054_A   leaf_width            max  680
15  dataset_054_A   leaf_width            min  666
16  dataset_054_A plant_height            max  679
17  dataset_054_A plant_height            min  651
18  dataset_054_A  seed_length            max  412
19  dataset_054_A  seed_length            min  323
20  dataset_054_B  leaf_length            max  111
21  dataset_054_B  leaf_length            min  111
22  dataset_054_B   leaf_width            max  111
23  dataset_054_B   leaf_width            min  111
24  dataset_054_B plant_height            max   66
25  dataset_054_B plant_height            min    7
26  dataset_054_B  seed_length            max   42
27  dataset_054_B  seed_length            min   54
28  dataset_054_C  leaf_length            max  249
29  dataset_054_C  leaf_length            min  249
30  dataset_054_C   leaf_width            max  243
31  dataset_054_C   leaf_width            min  243
32  dataset_054_C plant_height            max  231
33  dataset_054_C plant_height            min  231
34    dataset_055  leaf_length            max 1439
35    dataset_055  leaf_length            min 1417
36    dataset_055   leaf_width            max 1390
37    dataset_055   leaf_width            min 1389
38    dataset_055 plant_height            max 1462
39    dataset_055 plant_height            min   10
40    dataset_056  leaf_length            max  335
41    dataset_056  leaf_length            min  336
42    dataset_056   leaf_width            max  332
43    dataset_056   leaf_width            min  332
44    dataset_056 plant_height            min  200
45    dataset_058  leaf_length lower_quantile  239
46    dataset_058  leaf_length            max  175
47    dataset_058  leaf_length            min  159
48    dataset_058  leaf_length upper_quantile  239
49    dataset_058   leaf_width lower_quantile  238
50    dataset_058   leaf_width            max  120
51    dataset_058   leaf_width            min   76
52    dataset_058   leaf_width upper_quantile  239
53    dataset_058 plant_height lower_quantile  188
54    dataset_058 plant_height            max   60
55    dataset_058 plant_height upper_quantile  236
56    dataset_058 seed_breadth            max  126
57    dataset_058 seed_breadth            min  126
58    dataset_058  seed_length            max  129
59    dataset_058  seed_length            min  129
60    dataset_058   seed_width            max  126
61    dataset_058   seed_width            min  126
62    dataset_061 plant_height            max  128
63    dataset_073 plant_height            max  139
64    dataset_074  leaf_length            max   22
65    dataset_074  leaf_length            min   22
66    dataset_074   leaf_width            max   22
67    dataset_074   leaf_width            min   22
68    dataset_074 plant_height            max   22
69    dataset_074 plant_height            min   22
70    dataset_074  seed_length            max   22
71    dataset_074  seed_length            min   22
72    dataset_074   seed_width            max   22
73    dataset_074   seed_width            min   22
74    dataset_075  leaf_length            max    4
75    dataset_075  leaf_length            min    4
76    dataset_075   leaf_width            max    4
77    dataset_075   leaf_width            min    4
78    dataset_075 plant_height            max    3
79    dataset_075 plant_height            min    3
80    dataset_075  seed_length            max    4
81    dataset_075  seed_length            min    4
82    dataset_075   seed_width            max    4
83    dataset_075   seed_width            min    4
84    dataset_076  leaf_length            max   12
85    dataset_076  leaf_length            min   12
86    dataset_076   leaf_width            max   12
87    dataset_076   leaf_width            min   12
88    dataset_076 plant_height            max   12
89    dataset_076 plant_height            min    1
90    dataset_076  seed_length            max    6
91    dataset_076  seed_length            min    6
92    dataset_077  leaf_length            max   72
93    dataset_077  leaf_length            min   72
94    dataset_077   leaf_width            max   72
95    dataset_077   leaf_width            min   72
96    dataset_077 plant_height            max   70
97    dataset_077  seed_length            max   30
98    dataset_077  seed_length            min   30
99    dataset_077   seed_width            max   30
100   dataset_077   seed_width            min   30
101   dataset_078  leaf_length lower_quantile   21
102   dataset_078  leaf_length            max   19
103   dataset_078  leaf_length            min   20
104   dataset_078  leaf_length upper_quantile   21
105   dataset_078   leaf_width lower_quantile   21
106   dataset_078   leaf_width            max   13
107   dataset_078   leaf_width            min   11
108   dataset_078   leaf_width upper_quantile   21
109   dataset_078 plant_height lower_quantile   16
110   dataset_078 plant_height            max   13
111   dataset_078 plant_height            min    5
112   dataset_078 plant_height upper_quantile   20
113   dataset_079  leaf_length            max  123
114   dataset_079  leaf_length            min  120
115   dataset_079   leaf_width            max  118
116   dataset_079   leaf_width            min  118
117   dataset_079 plant_height            max  110
118   dataset_079 plant_height            min   47
119   dataset_079  seed_length            max   50
120   dataset_079  seed_length            min   48
121   dataset_080  leaf_length            max    2
122   dataset_080  leaf_length            min    2
123   dataset_080   leaf_width            max    2
124   dataset_080   leaf_width            min    2
125   dataset_080 plant_height            max    2
126   dataset_081  leaf_length            max    8
127   dataset_081  leaf_length            min    8
128   dataset_081   leaf_width            max    8
129   dataset_081   leaf_width            min    8
130   dataset_081 plant_height            max    8
131   dataset_081 plant_height            min    6
132   dataset_081  seed_length            max    5
133   dataset_081  seed_length            min    5
134   dataset_081   seed_width            max    5
135   dataset_081   seed_width            min    5
136   dataset_082  leaf_length            max   11
137   dataset_082  leaf_length            min   11
138   dataset_082   leaf_width            max   11
139   dataset_082   leaf_width            min   11
140   dataset_082 plant_height            max    9
141   dataset_082 plant_height            min    9
142   dataset_082  seed_length            max    6
143   dataset_082  seed_length            min    6
144   dataset_082   seed_width            max    5
145   dataset_082   seed_width            min    5
146   dataset_083  leaf_length            max    8
147   dataset_083  leaf_length            min    8
148   dataset_083   leaf_width            max    8
149   dataset_083   leaf_width            min    8
150   dataset_083 plant_height            max    8
151   dataset_083 plant_height            min    7
152   dataset_083  seed_length            max    8
153   dataset_083  seed_length            min    8
154   dataset_083   seed_width            max    8
155   dataset_083   seed_width            min    8
156   dataset_084  leaf_length            max    4
157   dataset_084  leaf_length            min    4
158   dataset_084   leaf_width            max    4
159   dataset_084   leaf_width            min    4
160   dataset_084 plant_height            max    4
161   dataset_084 plant_height            min    4
162   dataset_084 seed_breadth            max    4
163   dataset_084 seed_breadth            min    4
164   dataset_084  seed_length            max    4
165   dataset_084  seed_length            min    4
166   dataset_084   seed_width            max    4
167   dataset_084   seed_width            min    4
168   dataset_085  leaf_length            max    5
169   dataset_085  leaf_length            min    5
170   dataset_085   leaf_width            max    5
171   dataset_085   leaf_width            min    5
172   dataset_085 plant_height            max    5
173   dataset_085 plant_height            min    4
174   dataset_085  seed_length            max    3
175   dataset_085  seed_length            min    3
176   dataset_085   seed_width            max    3
177   dataset_085   seed_width            min    3
178   dataset_086  leaf_length            max    7
179   dataset_086  leaf_length            min    7
180   dataset_086   leaf_width            max    7
181   dataset_086   leaf_width            min    7
182   dataset_086 plant_height            max    7
183   dataset_086 plant_height            min    5
184   dataset_086 seed_breadth            max    7
185   dataset_086 seed_breadth            min    7
186   dataset_086  seed_length            max    7
187   dataset_086  seed_length            min    7
188   dataset_086   seed_width            max    7
189   dataset_086   seed_width            min    7
190   dataset_087  leaf_length            max   13
191   dataset_087  leaf_length            min   13
192   dataset_087   leaf_width            max   13
193   dataset_087   leaf_width            min   13
194   dataset_087 plant_height            max   13
195   dataset_087 plant_height            min    9
196   dataset_087  seed_length            max    6
197   dataset_087  seed_length            min    6
198   dataset_087   seed_width            max    6
199   dataset_087   seed_width            min    6
200   dataset_088  leaf_length lower_quantile    2
201   dataset_088  leaf_length            max    1
202   dataset_088  leaf_length            min    1
203   dataset_088  leaf_length upper_quantile    2
204   dataset_088   leaf_width lower_quantile    2
205   dataset_088   leaf_width            max    1
206   dataset_088   leaf_width            min    1
207   dataset_088   leaf_width upper_quantile    2
208   dataset_088 plant_height lower_quantile    2
209   dataset_088 plant_height            max    1
210   dataset_088 plant_height upper_quantile    2
211   dataset_089  leaf_length lower_quantile    5
212   dataset_089  leaf_length upper_quantile    5
213   dataset_089   leaf_width lower_quantile    5
214   dataset_089   leaf_width upper_quantile    5
215   dataset_089 plant_height lower_quantile    4
216   dataset_089 plant_height upper_quantile    5
217   dataset_090  leaf_length            max    3
218   dataset_090  leaf_length            min    3
219   dataset_090   leaf_width            max    3
220   dataset_090   leaf_width            min    3
221   dataset_090 plant_height            max    3
222   dataset_090 seed_breadth            max    3
223   dataset_090 seed_breadth            min    3
224   dataset_090  seed_length            max    3
225   dataset_090  seed_length            min    3
226   dataset_090   seed_width            max    3
227   dataset_090   seed_width            min    3
228   dataset_091  leaf_length            max   46
229   dataset_091  leaf_length            min   40
230   dataset_091   leaf_width            max   45
231   dataset_091   leaf_width            min   38
232   dataset_091 plant_height            max   31
233   dataset_091 plant_height            min    3
234   dataset_091 seed_breadth            max    2
235   dataset_091 seed_breadth            min    2
236   dataset_091  seed_length            max   30
237   dataset_091  seed_length            min   30
238   dataset_091   seed_width            max   30
239   dataset_091   seed_width            min   30
240   dataset_092  leaf_length            max  999
241   dataset_092  leaf_length            min  997
242   dataset_092   leaf_width            max  946
243   dataset_092   leaf_width            min  920
244   dataset_092 plant_height            max  982
245   dataset_092 plant_height            min  585
246   dataset_092 seed_breadth            max   10
247   dataset_092 seed_breadth            min    9
248   dataset_092  seed_length            max  823
249   dataset_092  seed_length            min  701
250   dataset_092   seed_width            max   70
251   dataset_092   seed_width            min   61
252   dataset_093  leaf_length            max 3410
253   dataset_093  leaf_length            min 3307
254   dataset_093   leaf_width            max 3030
255   dataset_093   leaf_width            min 2975
256   dataset_093 plant_height            max 3219
257   dataset_093 plant_height            min 1062
258   dataset_093 seed_breadth            max   77
259   dataset_093 seed_breadth            min   77
260   dataset_093  seed_length            max  751
261   dataset_093  seed_length            min  766
262   dataset_093   seed_width            max  426
263   dataset_093   seed_width            min  429
264   dataset_112 plant_height lower_quantile 4818
265   dataset_112 plant_height            max  721
266   dataset_112 plant_height            min  253
267   dataset_112 plant_height upper_quantile 6193
268   dataset_114  leaf_length            max 4257
269   dataset_114  leaf_length            min 4257
270   dataset_114   leaf_width            max 4391
271   dataset_114   leaf_width            min 4391
272   dataset_114 plant_height            max 4371
273   dataset_114 plant_height            min 4371