Open kimgerdes opened 6 years ago
Right... I am not sure what's going on here, but if I trim it down to the basics it works like expected, although the output might not be very nice.
from matplotlib import gridspec
d1={'Afrikaans': 1.35, 'Amharic': 9.51, 'AncientGreek': 11.62, 'Arabic': 9.22, 'Armenian': 2.92, 'Bambara': 0.1, 'Basque': 8.46, 'Belarusian': 2.48, 'Breton': 24.0, 'Bulgarian': 5.12, 'Buryat': 0.0, 'Cantonese': 4.5, 'Catalan': 1.97, 'Chinese': 0.05, 'Coptic': 4.41, 'Croatian': 4.17, 'Czech': 8.74, 'Danish': 14.58, 'Dutch': 15.3, 'English': 0.79, 'Erzya': 21.74, 'Estonian': 17.1, 'Faroese': 8.92, 'Finnish': 5.82, 'French': 2.44, 'Galician': 7.0, 'German': 20.6, 'Gothic': 11.11, 'Greek': 4.88, 'Hebrew': 1.48, 'Hindi': 0.16, 'Hungarian': 7.46, 'Indonesian': 1.12, 'Irish': 98.16, 'Italian': 6.8, 'Japanese': 0.0, 'Kazakh': 0.46, 'Komi': 17.24, 'Korean': 0.04, 'Kurmanji': 0.37, 'Latin': 6.14, 'Latvian': 3.34, 'Lithuanian': 0.98, 'Maltese': 0.0, 'Marathi': 1.95, 'Naija': 0.11, 'NorthSami': 4.48, 'Norwegian': 12.59, 'OldChurchSlavonic': 13.42, 'OldFrench': 10.6, 'Persian': 2.45, 'Polish': 15.49, 'Portuguese': 3.1, 'Romanian': 12.75, 'Russian': 5.9, 'Sanskrit': 9.46, 'Serbian': 9.7, 'Slovak': 11.67, 'Slovenian': 12.08, 'Spanish': 3.41, 'Swedish': 13.36, 'SwedishSign': 18.89, 'Tagalog': 100.0, 'Tamil': 5.61, 'Telugu': 0.0, 'Thai': 0.0, 'Turkish': 9.95, 'Ukrainian': 5.39, 'UpperSorbian': 5.66, 'Urdu': 0.21, 'Uyghur': 1.96, 'Vietnamese': 0}
d2={'Afrikaans': 2.63, 'Amharic': 0.59, 'AncientGreek': 41.61, 'Arabic': 73.29, 'Armenian': 20.6, 'Bambara': 0.0, 'Basque': 18.53, 'Belarusian': 33.54, 'Breton': 53.99, 'Bulgarian': 30.08, 'Buryat': 0.38, 'Cantonese': 5.31, 'Catalan': 23.57, 'Chinese': 0.24, 'Coptic': 28.02, 'Croatian': 28.64, 'Czech': 37.94, 'Danish': 14.95, 'Dutch': 21.98, 'English': 9.93, 'Erzya': 42.54, 'Estonian': 38.92, 'Faroese': 16.07, 'Finnish': 23.02, 'French': 5.85, 'Galician': 19.7, 'German': 19.77, 'Gothic': 49.52, 'Greek': 35.74, 'Hebrew': 35.52, 'Hindi': 0.39, 'Hungarian': 28.8, 'Indonesian': 4.5, 'Irish': 98.64, 'Italian': 25.96, 'Japanese': 0.0, 'Kazakh': 0.44, 'Komi': 20.17, 'Korean': 0.04, 'Kurmanji': 0.46, 'Latin': 32.51, 'Latvian': 37.48, 'Lithuanian': 39.38, 'Maltese': 10.34, 'Marathi': 2.78, 'Naija': 4.44, 'NorthSami': 32.38, 'Norwegian': 19.04, 'OldChurchSlavonic': 53.81, 'OldFrench': 35.13, 'Persian': 0.73, 'Polish': 36.67, 'Portuguese': 13.93, 'Romanian': 30.23, 'Russian': 33.52, 'Sanskrit': 31.1, 'Serbian': 25.7, 'Slovak': 39.69, 'Slovenian': 31.77, 'Spanish': 22.06, 'Swedish': 19.8, 'SwedishSign': 18.69, 'Tagalog': 97.92, 'Tamil': 0.55, 'Telugu': 0.95, 'Thai': 0.15, 'Turkish': 4.67, 'Ukrainian': 32.81, 'UpperSorbian': 23.85, 'Urdu': 0.18, 'Uyghur': 4.06, 'Vietnamese': 1.62}
langnameGroup={"AncientGreek":"Indo-European", "Arabic":"Semitic", "Basque":"isolate", "Belarusian":"Indo-European-Baltoslavic", "Bulgarian":"Indo-European-Baltoslavic", "Cantonese":"Sino-Austronesian", "Catalan":"Indo-European-Romance", "Chinese":"Sino-Austronesian", "Coptic":"Afroasiatic", "Croatian":"Indo-European-Baltoslavic", "Czech":"Indo-European-Baltoslavic", "Danish":"Indo-European-Germanic", "Dutch":"Indo-European-Germanic", "English":"Indo-European-Germanic", "Estonian":"Agglutinating", "Finnish":"Agglutinating", "French":"Indo-European-Romance", "Galician":"Indo-European-Romance", "German":"Indo-European-Germanic", "Gothic":"Indo-European-Germanic", "Greek":"Indo-European", "Hebrew":"Semitic", "Hindi":"Indo-European", "Hungarian":"Agglutinating", "Indonesian":"Sino-Austronesian", "Irish":"Indo-European", "Italian":"Indo-European-Romance", "Japanese":"Agglutinating", "Kazakh":"Agglutinating", "Korean":"Agglutinating", "Latin":"Indo-European-Romance", "Latvian":"Indo-European-Baltoslavic", "Lithuanian":"Indo-European-Baltoslavic", "Norwegian":"Indo-European-Germanic", "OldChurchSlavonic":"Indo-European-Baltoslavic", "Persian":"Indo-European", "Polish":"Indo-European-Baltoslavic", "Portuguese":"Indo-European-Romance", "Romanian":"Indo-European-Romance", "Russian":"Indo-European-Baltoslavic", "Sanskrit":"Indo-European", "Slovak":"Indo-European-Baltoslavic", "Slovenian":"Indo-European-Baltoslavic", "Spanish":"Indo-European-Romance", "Swedish":"Indo-European-Germanic", "Tamil":"Dravidian", "Turkish":"Agglutinating", "Ukrainian":"Indo-European-Baltoslavic", "Urdu":"Indo-European", "Uyghur":"Agglutinating", "Vietnamese":"Sino-Austronesian",'Afrikaans':'Indo-European-Germanic', 'SwedishSign':'Indo-European-Germanic', 'Kurmanji':'Indo-European', 'NorthSami':'Agglutinating', 'UpperSorbian':"Indo-European-Baltoslavic", 'Buryat':'Agglutinating', 'Telugu':'Dravidian', 'Serbian':"Indo-European-Baltoslavic", 'Marathi':'Indo-European','Naija':"Indo-European-Germanic", "OldFrench":"Indo-European-Romance", "Maltese":"Semitic", "Thai":"Sino-Austronesian","Amharic":"Afroasiatic", 'Erzya': 'Agglutinating', 'Faroese':"Indo-European-Germanic", 'Tagalog':"Sino-Austronesian", 'Bambara':'Niger-Congo', 'Breton':"Indo-European", 'Armenian':"Indo-European", 'Komi': 'Agglutinating'}
groupColors={"Indo-European-Romance":'brown',"Indo-European-Baltoslavic":'purple',"Indo-European-Germanic":'olive',"Indo-European":'royalBlue',"Sino-Austronesian":'limeGreen', "Agglutinating":'red'}
groupMarkers={"Indo-European-Romance":'<',"Indo-European-Baltoslavic":'^',"Indo-European-Germanic":'v',"Indo-European":'>',"Sino-Austronesian":'s', "Agglutinating":'+'}
col1 = pd.Series(d1)
col2 = pd.Series(d2)
c=[groupColors.get(langnameGroup[label],'k') for label in col1.index]
m=[groupMarkers.get(langnameGroup[label],'o') for label in col1.index]
fig, ax = plt.subplots(figsize=(10,10))
# gs = gridspec.GridSpec(2, 2, width_ratios=[1, 25],height_ratios=[25, 1])
# aa = plt.subplot(gs[0])
# ax = plt.subplot(gs[1])
# bb = plt.subplot(gs[3])
li,la = (0,100)
# plt.xlim(li,la)
# plt.ylim(li,la)
ax.set_xlim([li,la])
ax.set_ylim([li,la])
# aa.set_xlim([0, 1])
# aa.set_ylim([li,la])
# bb.set_ylim([0, 1])
# ax.set_xticks([50,100], minor=False) # only the 50% is major
# ax.set_xticks([0,25,50,75,100], minor=True) # all 10th are minor
# ax.set_yticks([50,100], minor=False) # only the 50% is major
# ax.set_yticks([0,25,50,75,100], minor=True) # all 10th are minor
# ax.grid(which='both', axis='both',alpha=.5) # draw grid
# ax.plot([0, 1], [0, 1], transform=ax.transAxes, alpha=.5, color="gray") # diagonal
# aa.set_xticks([], minor=False)
# aa.set_yticks([], minor=False)
# bb.set_xticks([], minor=False)
# bb.set_yticks([], minor=False)
for xx, yy, cc, mm in zip(col1, col2, c, m):
ax.scatter(xx, yy, marker=mm, c=cc)
# aa.scatter([0.5 for _ in col1], col2, c=c, alpha=0.5)
# bb.scatter(col1, [0.5 for _ in col2], c=c, alpha=0.5)
texts=[]
for label, x, y in zip(col1.index, col1, col2):
#texts+=[ax.text(x+1, y+1, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # original
texts+=[ax.text(x, y, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # for adjustText
adjust_text(texts, col1, col2,
# expand_text=(1, 1), ha='center', va='top',force_text=.6,lim=277,force_points=0.1,
arrowprops=dict(arrowstyle='-', color='gray', alpha=.5))
(setting lower force helps a lot, as always, but that's not the point here...)
Maybe it has something to do with gridspec?..
Right, yeah, somehow gridspec messes with adjust_text if there is more than one subplot created that way. See below.
from matplotlib import gridspec
d1={'Afrikaans': 1.35, 'Amharic': 9.51, 'AncientGreek': 11.62, 'Arabic': 9.22, 'Armenian': 2.92, 'Bambara': 0.1, 'Basque': 8.46, 'Belarusian': 2.48, 'Breton': 24.0, 'Bulgarian': 5.12, 'Buryat': 0.0, 'Cantonese': 4.5, 'Catalan': 1.97, 'Chinese': 0.05, 'Coptic': 4.41, 'Croatian': 4.17, 'Czech': 8.74, 'Danish': 14.58, 'Dutch': 15.3, 'English': 0.79, 'Erzya': 21.74, 'Estonian': 17.1, 'Faroese': 8.92, 'Finnish': 5.82, 'French': 2.44, 'Galician': 7.0, 'German': 20.6, 'Gothic': 11.11, 'Greek': 4.88, 'Hebrew': 1.48, 'Hindi': 0.16, 'Hungarian': 7.46, 'Indonesian': 1.12, 'Irish': 98.16, 'Italian': 6.8, 'Japanese': 0.0, 'Kazakh': 0.46, 'Komi': 17.24, 'Korean': 0.04, 'Kurmanji': 0.37, 'Latin': 6.14, 'Latvian': 3.34, 'Lithuanian': 0.98, 'Maltese': 0.0, 'Marathi': 1.95, 'Naija': 0.11, 'NorthSami': 4.48, 'Norwegian': 12.59, 'OldChurchSlavonic': 13.42, 'OldFrench': 10.6, 'Persian': 2.45, 'Polish': 15.49, 'Portuguese': 3.1, 'Romanian': 12.75, 'Russian': 5.9, 'Sanskrit': 9.46, 'Serbian': 9.7, 'Slovak': 11.67, 'Slovenian': 12.08, 'Spanish': 3.41, 'Swedish': 13.36, 'SwedishSign': 18.89, 'Tagalog': 100.0, 'Tamil': 5.61, 'Telugu': 0.0, 'Thai': 0.0, 'Turkish': 9.95, 'Ukrainian': 5.39, 'UpperSorbian': 5.66, 'Urdu': 0.21, 'Uyghur': 1.96, 'Vietnamese': 0}
d2={'Afrikaans': 2.63, 'Amharic': 0.59, 'AncientGreek': 41.61, 'Arabic': 73.29, 'Armenian': 20.6, 'Bambara': 0.0, 'Basque': 18.53, 'Belarusian': 33.54, 'Breton': 53.99, 'Bulgarian': 30.08, 'Buryat': 0.38, 'Cantonese': 5.31, 'Catalan': 23.57, 'Chinese': 0.24, 'Coptic': 28.02, 'Croatian': 28.64, 'Czech': 37.94, 'Danish': 14.95, 'Dutch': 21.98, 'English': 9.93, 'Erzya': 42.54, 'Estonian': 38.92, 'Faroese': 16.07, 'Finnish': 23.02, 'French': 5.85, 'Galician': 19.7, 'German': 19.77, 'Gothic': 49.52, 'Greek': 35.74, 'Hebrew': 35.52, 'Hindi': 0.39, 'Hungarian': 28.8, 'Indonesian': 4.5, 'Irish': 98.64, 'Italian': 25.96, 'Japanese': 0.0, 'Kazakh': 0.44, 'Komi': 20.17, 'Korean': 0.04, 'Kurmanji': 0.46, 'Latin': 32.51, 'Latvian': 37.48, 'Lithuanian': 39.38, 'Maltese': 10.34, 'Marathi': 2.78, 'Naija': 4.44, 'NorthSami': 32.38, 'Norwegian': 19.04, 'OldChurchSlavonic': 53.81, 'OldFrench': 35.13, 'Persian': 0.73, 'Polish': 36.67, 'Portuguese': 13.93, 'Romanian': 30.23, 'Russian': 33.52, 'Sanskrit': 31.1, 'Serbian': 25.7, 'Slovak': 39.69, 'Slovenian': 31.77, 'Spanish': 22.06, 'Swedish': 19.8, 'SwedishSign': 18.69, 'Tagalog': 97.92, 'Tamil': 0.55, 'Telugu': 0.95, 'Thai': 0.15, 'Turkish': 4.67, 'Ukrainian': 32.81, 'UpperSorbian': 23.85, 'Urdu': 0.18, 'Uyghur': 4.06, 'Vietnamese': 1.62}
langnameGroup={"AncientGreek":"Indo-European", "Arabic":"Semitic", "Basque":"isolate", "Belarusian":"Indo-European-Baltoslavic", "Bulgarian":"Indo-European-Baltoslavic", "Cantonese":"Sino-Austronesian", "Catalan":"Indo-European-Romance", "Chinese":"Sino-Austronesian", "Coptic":"Afroasiatic", "Croatian":"Indo-European-Baltoslavic", "Czech":"Indo-European-Baltoslavic", "Danish":"Indo-European-Germanic", "Dutch":"Indo-European-Germanic", "English":"Indo-European-Germanic", "Estonian":"Agglutinating", "Finnish":"Agglutinating", "French":"Indo-European-Romance", "Galician":"Indo-European-Romance", "German":"Indo-European-Germanic", "Gothic":"Indo-European-Germanic", "Greek":"Indo-European", "Hebrew":"Semitic", "Hindi":"Indo-European", "Hungarian":"Agglutinating", "Indonesian":"Sino-Austronesian", "Irish":"Indo-European", "Italian":"Indo-European-Romance", "Japanese":"Agglutinating", "Kazakh":"Agglutinating", "Korean":"Agglutinating", "Latin":"Indo-European-Romance", "Latvian":"Indo-European-Baltoslavic", "Lithuanian":"Indo-European-Baltoslavic", "Norwegian":"Indo-European-Germanic", "OldChurchSlavonic":"Indo-European-Baltoslavic", "Persian":"Indo-European", "Polish":"Indo-European-Baltoslavic", "Portuguese":"Indo-European-Romance", "Romanian":"Indo-European-Romance", "Russian":"Indo-European-Baltoslavic", "Sanskrit":"Indo-European", "Slovak":"Indo-European-Baltoslavic", "Slovenian":"Indo-European-Baltoslavic", "Spanish":"Indo-European-Romance", "Swedish":"Indo-European-Germanic", "Tamil":"Dravidian", "Turkish":"Agglutinating", "Ukrainian":"Indo-European-Baltoslavic", "Urdu":"Indo-European", "Uyghur":"Agglutinating", "Vietnamese":"Sino-Austronesian",'Afrikaans':'Indo-European-Germanic', 'SwedishSign':'Indo-European-Germanic', 'Kurmanji':'Indo-European', 'NorthSami':'Agglutinating', 'UpperSorbian':"Indo-European-Baltoslavic", 'Buryat':'Agglutinating', 'Telugu':'Dravidian', 'Serbian':"Indo-European-Baltoslavic", 'Marathi':'Indo-European','Naija':"Indo-European-Germanic", "OldFrench":"Indo-European-Romance", "Maltese":"Semitic", "Thai":"Sino-Austronesian","Amharic":"Afroasiatic", 'Erzya': 'Agglutinating', 'Faroese':"Indo-European-Germanic", 'Tagalog':"Sino-Austronesian", 'Bambara':'Niger-Congo', 'Breton':"Indo-European", 'Armenian':"Indo-European", 'Komi': 'Agglutinating'}
groupColors={"Indo-European-Romance":'brown',"Indo-European-Baltoslavic":'purple',"Indo-European-Germanic":'olive',"Indo-European":'royalBlue',"Sino-Austronesian":'limeGreen', "Agglutinating":'red'}
groupMarkers={"Indo-European-Romance":'<',"Indo-European-Baltoslavic":'^',"Indo-European-Germanic":'v',"Indo-European":'>',"Sino-Austronesian":'s', "Agglutinating":'+'}
col1 = pd.Series(d1)
col2 = pd.Series(d2)
c=[groupColors.get(langnameGroup[label],'k') for label in col1.index]
m=[groupMarkers.get(langnameGroup[label],'o') for label in col1.index]
# fig, ax = plt.subplots(figsize=(10,10))
fig = plt.figure(figsize=(10, 10))
gs = gridspec.GridSpec(2, 2, width_ratios=[1, 25],height_ratios=[25, 1])
# aa = plt.subplot(gs[0])
ax = plt.subplot(gs[1])
# bb = plt.subplot(gs[3])
li,la = (0,100)
# plt.xlim(li,la)
# plt.ylim(li,la)
ax.set_xlim([li,la])
ax.set_ylim([li,la])
# aa.set_xlim([0, 1])
# aa.set_ylim([li,la])
# bb.set_ylim([0, 1])
# ax.set_xticks([50,100], minor=False) # only the 50% is major
# ax.set_xticks([0,25,50,75,100], minor=True) # all 10th are minor
# ax.set_yticks([50,100], minor=False) # only the 50% is major
# ax.set_yticks([0,25,50,75,100], minor=True) # all 10th are minor
# ax.grid(which='both', axis='both',alpha=.5) # draw grid
# ax.plot([0, 1], [0, 1], transform=ax.transAxes, alpha=.5, color="gray") # diagonal
# aa.set_xticks([], minor=False)
# aa.set_yticks([], minor=False)
# bb.set_xticks([], minor=False)
# bb.set_yticks([], minor=False)
for xx, yy, cc, mm in zip(col1, col2, c, m):
ax.scatter(xx, yy, marker=mm, c=cc)
# aa.scatter([0.5 for _ in col1], col2, c=c, alpha=0.5)
# bb.scatter(col1, [0.5 for _ in col2], c=c, alpha=0.5)
texts=[]
for label, x, y in zip(col1.index, col1, col2):
#texts+=[ax.text(x+1, y+1, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # original
texts+=[ax.text(x, y, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # for adjustText
adjust_text(texts, col1, col2,
force_text=(0.1, 0.5),
# expand_text=(1, 1), ha='center', va='top',force_text=.6,lim=277,force_points=0.1,
arrowprops=dict(arrowstyle='-', color='gray', alpha=.5))
The same code with uncommented lines to create additional axes:
Clearly this somehow messes with the coordinate system...
The same happens if I just use plt.subplots() to create the subplots (uses gridspec internally) even with equal subplot sizes. Clearly the axes bounds are not interpreted correctly at some stage, and the coordinates are just shifted - by the looks of it, to the edge of the other subplot (at least on the left - perhaps, the end of the current subplots on the bottom, including tick labels?). This is clearly a bug either in adjustText, or in matplotlib (and we know what is more likely, if I am honest! but I still might submit an issue there for some help...)
Sooo in the meantime my advice is to stick to one subplot :) Sorry. You can still add the points to indicate positions in the same axes, but repel the texts from them too. You can also have short vertical/horizontal lines there instead of points, like a rug plot.
Or perhaps I can get @tacaswell to share some helpful comment here? :) I am a bit at a loss what is going on.
Thanks for figuring that out! I can do without the side plots for the moment or recompose them apart. However, even the single plot results moves labels, sometimes quite far, and on top of points: for example Catalan, Buryat, Cantonese... Three questions: Is there a way of preventing labels to end up on top of points? Is there a way of restricting the distance that the label travels? Is it difficult to write a function that tells me whether two labels are on top of each others? If I had an algorithm that moves the labels the best possible but not beyond a given distance and not on top of points, I could use a list of more important to less important languages (because they are mentioned in the text for example) and remove the intersecting label of the lower language in the list... That would make a perfect graph.
Unfortunately, such dense plots are very difficult to optimize... A very small force and many iterations is your best bet, and probably reducing the expand_text too.
Since you are supplying x and y coordinates of the points, texts are repelled from them - but clearly it's just not working well enough for such a dense plot. I am not sure what else I can suggest to help with this.
Currently, no, there is no way to do that, but it shouldn't be very difficult to add as an option... I'll try look into it. What I was also planning to do is to add a step when texts are pulled back a little to their original coordinates every iteration, this should help with this kind of issues too, I suppose.
Mmm if I change the function to return texts instead of just creating annotations in their place, you could easily find all overlaps between them. I don't know how to do it with annotations since the annotation's bounding box includes the arrow... If you find a way, let me know, it would allow starting with annotations directly instead of texts, and would make everything more flexible in terms of different arrow styles for different annotations etc.
Basically, the problem is that in the bottom left corner the annotations are simply too close together and there is not way to keep them close to the points and avoid overlaps. An option is to have separate plots with different scales, like if I zoom into the left bottom corner I get this If I zoom even more, I get this I am not sure whether this approach can work for you, but you could quite easily generate plots with three different axes limits, and only labelling the points that are not labelled in the plot with smaller limits, for example.
Thanks again for the immediate reply! I agree that there is no optimal solution for such dense graphs. That's why it would be great to do the best possible: Move labels a little to avoid overlaps whenever possible; then orderly remove labels that couldn't find a good position. Since our article is about global distribution of language features, we need the complete graph. So zooming in could only be useful very rarely for what I'm trying to achieve. The pulling-back step seems like a great idea. At least pulling back along a straight line until something is hit. That would already fix quite a few of the weird positions in this graph. Maybe that could even be done for the nodes in parallel, one point of retreat at a time, going around through all annotations, so that in your graph for example, Turkish and Vietnamese (at the bottom right) do not hinder each other from retracting. For finding the bounding box of annotations, yes it would be great if the function would return a dictionary/list of annotation texts with their bounding box and some reference to the actual annotation object, so that it's possible to change or remove it... Hope that is not too difficult. Thanks so much!
Yeah, someone has already suggested this "importance" feature with removing less important annotations before. I need to think how to best implement this kind of behaviour.
Yeah, for crossing arrows I saw that in ggrepel
they are now simply swapping the positions of corresponding labels, this should be relatively easy to implement, I will try it at some point. But pulling the annotations back should help too...
BTW, another option for you is this:
Yes, that already looks much better. What are the settings? You just made the space go to -10? I'll try that... Looking forward to trying out your retraction algorithm ;-)
Yeah, just -10. This looks even better I think, although takes a long time:
d1={'Afrikaans': 1.35, 'Amharic': 9.51, 'AncientGreek': 11.62, 'Arabic': 9.22, 'Armenian': 2.92, 'Bambara': 0.1, 'Basque': 8.46, 'Belarusian': 2.48, 'Breton': 24.0, 'Bulgarian': 5.12, 'Buryat': 0.0, 'Cantonese': 4.5, 'Catalan': 1.97, 'Chinese': 0.05, 'Coptic': 4.41, 'Croatian': 4.17, 'Czech': 8.74, 'Danish': 14.58, 'Dutch': 15.3, 'English': 0.79, 'Erzya': 21.74, 'Estonian': 17.1, 'Faroese': 8.92, 'Finnish': 5.82, 'French': 2.44, 'Galician': 7.0, 'German': 20.6, 'Gothic': 11.11, 'Greek': 4.88, 'Hebrew': 1.48, 'Hindi': 0.16, 'Hungarian': 7.46, 'Indonesian': 1.12, 'Irish': 98.16, 'Italian': 6.8, 'Japanese': 0.0, 'Kazakh': 0.46, 'Komi': 17.24, 'Korean': 0.04, 'Kurmanji': 0.37, 'Latin': 6.14, 'Latvian': 3.34, 'Lithuanian': 0.98, 'Maltese': 0.0, 'Marathi': 1.95, 'Naija': 0.11, 'NorthSami': 4.48, 'Norwegian': 12.59, 'OldChurchSlavonic': 13.42, 'OldFrench': 10.6, 'Persian': 2.45, 'Polish': 15.49, 'Portuguese': 3.1, 'Romanian': 12.75, 'Russian': 5.9, 'Sanskrit': 9.46, 'Serbian': 9.7, 'Slovak': 11.67, 'Slovenian': 12.08, 'Spanish': 3.41, 'Swedish': 13.36, 'SwedishSign': 18.89, 'Tagalog': 100.0, 'Tamil': 5.61, 'Telugu': 0.0, 'Thai': 0.0, 'Turkish': 9.95, 'Ukrainian': 5.39, 'UpperSorbian': 5.66, 'Urdu': 0.21, 'Uyghur': 1.96, 'Vietnamese': 0}
d2={'Afrikaans': 2.63, 'Amharic': 0.59, 'AncientGreek': 41.61, 'Arabic': 73.29, 'Armenian': 20.6, 'Bambara': 0.0, 'Basque': 18.53, 'Belarusian': 33.54, 'Breton': 53.99, 'Bulgarian': 30.08, 'Buryat': 0.38, 'Cantonese': 5.31, 'Catalan': 23.57, 'Chinese': 0.24, 'Coptic': 28.02, 'Croatian': 28.64, 'Czech': 37.94, 'Danish': 14.95, 'Dutch': 21.98, 'English': 9.93, 'Erzya': 42.54, 'Estonian': 38.92, 'Faroese': 16.07, 'Finnish': 23.02, 'French': 5.85, 'Galician': 19.7, 'German': 19.77, 'Gothic': 49.52, 'Greek': 35.74, 'Hebrew': 35.52, 'Hindi': 0.39, 'Hungarian': 28.8, 'Indonesian': 4.5, 'Irish': 98.64, 'Italian': 25.96, 'Japanese': 0.0, 'Kazakh': 0.44, 'Komi': 20.17, 'Korean': 0.04, 'Kurmanji': 0.46, 'Latin': 32.51, 'Latvian': 37.48, 'Lithuanian': 39.38, 'Maltese': 10.34, 'Marathi': 2.78, 'Naija': 4.44, 'NorthSami': 32.38, 'Norwegian': 19.04, 'OldChurchSlavonic': 53.81, 'OldFrench': 35.13, 'Persian': 0.73, 'Polish': 36.67, 'Portuguese': 13.93, 'Romanian': 30.23, 'Russian': 33.52, 'Sanskrit': 31.1, 'Serbian': 25.7, 'Slovak': 39.69, 'Slovenian': 31.77, 'Spanish': 22.06, 'Swedish': 19.8, 'SwedishSign': 18.69, 'Tagalog': 97.92, 'Tamil': 0.55, 'Telugu': 0.95, 'Thai': 0.15, 'Turkish': 4.67, 'Ukrainian': 32.81, 'UpperSorbian': 23.85, 'Urdu': 0.18, 'Uyghur': 4.06, 'Vietnamese': 1.62}
langnameGroup={"AncientGreek":"Indo-European", "Arabic":"Semitic", "Basque":"isolate", "Belarusian":"Indo-European-Baltoslavic", "Bulgarian":"Indo-European-Baltoslavic", "Cantonese":"Sino-Austronesian", "Catalan":"Indo-European-Romance", "Chinese":"Sino-Austronesian", "Coptic":"Afroasiatic", "Croatian":"Indo-European-Baltoslavic", "Czech":"Indo-European-Baltoslavic", "Danish":"Indo-European-Germanic", "Dutch":"Indo-European-Germanic", "English":"Indo-European-Germanic", "Estonian":"Agglutinating", "Finnish":"Agglutinating", "French":"Indo-European-Romance", "Galician":"Indo-European-Romance", "German":"Indo-European-Germanic", "Gothic":"Indo-European-Germanic", "Greek":"Indo-European", "Hebrew":"Semitic", "Hindi":"Indo-European", "Hungarian":"Agglutinating", "Indonesian":"Sino-Austronesian", "Irish":"Indo-European", "Italian":"Indo-European-Romance", "Japanese":"Agglutinating", "Kazakh":"Agglutinating", "Korean":"Agglutinating", "Latin":"Indo-European-Romance", "Latvian":"Indo-European-Baltoslavic", "Lithuanian":"Indo-European-Baltoslavic", "Norwegian":"Indo-European-Germanic", "OldChurchSlavonic":"Indo-European-Baltoslavic", "Persian":"Indo-European", "Polish":"Indo-European-Baltoslavic", "Portuguese":"Indo-European-Romance", "Romanian":"Indo-European-Romance", "Russian":"Indo-European-Baltoslavic", "Sanskrit":"Indo-European", "Slovak":"Indo-European-Baltoslavic", "Slovenian":"Indo-European-Baltoslavic", "Spanish":"Indo-European-Romance", "Swedish":"Indo-European-Germanic", "Tamil":"Dravidian", "Turkish":"Agglutinating", "Ukrainian":"Indo-European-Baltoslavic", "Urdu":"Indo-European", "Uyghur":"Agglutinating", "Vietnamese":"Sino-Austronesian",'Afrikaans':'Indo-European-Germanic', 'SwedishSign':'Indo-European-Germanic', 'Kurmanji':'Indo-European', 'NorthSami':'Agglutinating', 'UpperSorbian':"Indo-European-Baltoslavic", 'Buryat':'Agglutinating', 'Telugu':'Dravidian', 'Serbian':"Indo-European-Baltoslavic", 'Marathi':'Indo-European','Naija':"Indo-European-Germanic", "OldFrench":"Indo-European-Romance", "Maltese":"Semitic", "Thai":"Sino-Austronesian","Amharic":"Afroasiatic", 'Erzya': 'Agglutinating', 'Faroese':"Indo-European-Germanic", 'Tagalog':"Sino-Austronesian", 'Bambara':'Niger-Congo', 'Breton':"Indo-European", 'Armenian':"Indo-European", 'Komi': 'Agglutinating'}
groupColors={"Indo-European-Romance":'brown',"Indo-European-Baltoslavic":'purple',"Indo-European-Germanic":'olive',"Indo-European":'royalBlue',"Sino-Austronesian":'limeGreen', "Agglutinating":'red'}
groupMarkers={"Indo-European-Romance":'<',"Indo-European-Baltoslavic":'^',"Indo-European-Germanic":'v',"Indo-European":'>',"Sino-Austronesian":'s', "Agglutinating":'+'}
col1 = pd.Series(d1)
col2 = pd.Series(d2)
c=[groupColors.get(langnameGroup[label],'k') for label in col1.index]
m=[groupMarkers.get(langnameGroup[label],'o') for label in col1.index]
fig, ax = plt.subplots(figsize=(10,10))
li,la = (-10,100)
ax.set_xlim([li,la])
ax.set_ylim([li,la])
for xx, yy, cc, mm in zip(col1, col2, c, m):
ax.scatter(xx, yy, marker=mm, c=cc)
texts=[]
for label, x, y in zip(col1.index, col1, col2):
texts+=[ax.text(x, y, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # for adjustText
adjust_text(texts, col1, col2, lim=1000,
force_text=(0.1, 0.5), force_points=(0.1, 0.5),
expand_text=(1, 1), expand_points=(1, 1),
arrowprops=dict(arrowstyle='-', color='gray', alpha=.5))
Even better with -15
Wonderful! Thanks a lot! Let me know if you need any data for testing the retraction or for your notebook tutorial.
Try forcing a draw before you call adjust_text
. My knee-jerk reaction is that there is a race condition between you using the transforms and the transforms getting fully updated.
@tacaswell Thanks! Just simply adding plt.draw()
before adjust_text
doesn't help :( Would have been an easy fix...
Hi @kimgerdes I found an easy solution. Still don't know why this doesn't work by default, but adding ax=ax
in the adjust_text
call to the very first code you posted makes it work!
Thanks for having looked into that! It's nice that the side graphs can be preserved this way. Having discussed about the appearance in total, this current solution has two drawbacks:
So our favorite mode would be one where
This would give something like
A
。
。
B
If you have any idea how that could be achieved, that would be great.
I see.
So, I did make a minor fix to autoalignment, not that it makes a huge difference, but this is th kind of plot that I get now with this code.
from matplotlib import gridspec
d1={'Afrikaans': 1.35, 'Amharic': 9.51, 'AncientGreek': 11.62, 'Arabic': 9.22, 'Armenian': 2.92, 'Bambara': 0.1, 'Basque': 8.46, 'Belarusian': 2.48, 'Breton': 24.0, 'Bulgarian': 5.12, 'Buryat': 0.0, 'Cantonese': 4.5, 'Catalan': 1.97, 'Chinese': 0.05, 'Coptic': 4.41, 'Croatian': 4.17, 'Czech': 8.74, 'Danish': 14.58, 'Dutch': 15.3, 'English': 0.79, 'Erzya': 21.74, 'Estonian': 17.1, 'Faroese': 8.92, 'Finnish': 5.82, 'French': 2.44, 'Galician': 7.0, 'German': 20.6, 'Gothic': 11.11, 'Greek': 4.88, 'Hebrew': 1.48, 'Hindi': 0.16, 'Hungarian': 7.46, 'Indonesian': 1.12, 'Irish': 98.16, 'Italian': 6.8, 'Japanese': 0.0, 'Kazakh': 0.46, 'Komi': 17.24, 'Korean': 0.04, 'Kurmanji': 0.37, 'Latin': 6.14, 'Latvian': 3.34, 'Lithuanian': 0.98, 'Maltese': 0.0, 'Marathi': 1.95, 'Naija': 0.11, 'NorthSami': 4.48, 'Norwegian': 12.59, 'OldChurchSlavonic': 13.42, 'OldFrench': 10.6, 'Persian': 2.45, 'Polish': 15.49, 'Portuguese': 3.1, 'Romanian': 12.75, 'Russian': 5.9, 'Sanskrit': 9.46, 'Serbian': 9.7, 'Slovak': 11.67, 'Slovenian': 12.08, 'Spanish': 3.41, 'Swedish': 13.36, 'SwedishSign': 18.89, 'Tagalog': 100.0, 'Tamil': 5.61, 'Telugu': 0.0, 'Thai': 0.0, 'Turkish': 9.95, 'Ukrainian': 5.39, 'UpperSorbian': 5.66, 'Urdu': 0.21, 'Uyghur': 1.96, 'Vietnamese': 0}
d2={'Afrikaans': 2.63, 'Amharic': 0.59, 'AncientGreek': 41.61, 'Arabic': 73.29, 'Armenian': 20.6, 'Bambara': 0.0, 'Basque': 18.53, 'Belarusian': 33.54, 'Breton': 53.99, 'Bulgarian': 30.08, 'Buryat': 0.38, 'Cantonese': 5.31, 'Catalan': 23.57, 'Chinese': 0.24, 'Coptic': 28.02, 'Croatian': 28.64, 'Czech': 37.94, 'Danish': 14.95, 'Dutch': 21.98, 'English': 9.93, 'Erzya': 42.54, 'Estonian': 38.92, 'Faroese': 16.07, 'Finnish': 23.02, 'French': 5.85, 'Galician': 19.7, 'German': 19.77, 'Gothic': 49.52, 'Greek': 35.74, 'Hebrew': 35.52, 'Hindi': 0.39, 'Hungarian': 28.8, 'Indonesian': 4.5, 'Irish': 98.64, 'Italian': 25.96, 'Japanese': 0.0, 'Kazakh': 0.44, 'Komi': 20.17, 'Korean': 0.04, 'Kurmanji': 0.46, 'Latin': 32.51, 'Latvian': 37.48, 'Lithuanian': 39.38, 'Maltese': 10.34, 'Marathi': 2.78, 'Naija': 4.44, 'NorthSami': 32.38, 'Norwegian': 19.04, 'OldChurchSlavonic': 53.81, 'OldFrench': 35.13, 'Persian': 0.73, 'Polish': 36.67, 'Portuguese': 13.93, 'Romanian': 30.23, 'Russian': 33.52, 'Sanskrit': 31.1, 'Serbian': 25.7, 'Slovak': 39.69, 'Slovenian': 31.77, 'Spanish': 22.06, 'Swedish': 19.8, 'SwedishSign': 18.69, 'Tagalog': 97.92, 'Tamil': 0.55, 'Telugu': 0.95, 'Thai': 0.15, 'Turkish': 4.67, 'Ukrainian': 32.81, 'UpperSorbian': 23.85, 'Urdu': 0.18, 'Uyghur': 4.06, 'Vietnamese': 1.62}
langnameGroup={"AncientGreek":"Indo-European", "Arabic":"Semitic", "Basque":"isolate", "Belarusian":"Indo-European-Baltoslavic", "Bulgarian":"Indo-European-Baltoslavic", "Cantonese":"Sino-Austronesian", "Catalan":"Indo-European-Romance", "Chinese":"Sino-Austronesian", "Coptic":"Afroasiatic", "Croatian":"Indo-European-Baltoslavic", "Czech":"Indo-European-Baltoslavic", "Danish":"Indo-European-Germanic", "Dutch":"Indo-European-Germanic", "English":"Indo-European-Germanic", "Estonian":"Agglutinating", "Finnish":"Agglutinating", "French":"Indo-European-Romance", "Galician":"Indo-European-Romance", "German":"Indo-European-Germanic", "Gothic":"Indo-European-Germanic", "Greek":"Indo-European", "Hebrew":"Semitic", "Hindi":"Indo-European", "Hungarian":"Agglutinating", "Indonesian":"Sino-Austronesian", "Irish":"Indo-European", "Italian":"Indo-European-Romance", "Japanese":"Agglutinating", "Kazakh":"Agglutinating", "Korean":"Agglutinating", "Latin":"Indo-European-Romance", "Latvian":"Indo-European-Baltoslavic", "Lithuanian":"Indo-European-Baltoslavic", "Norwegian":"Indo-European-Germanic", "OldChurchSlavonic":"Indo-European-Baltoslavic", "Persian":"Indo-European", "Polish":"Indo-European-Baltoslavic", "Portuguese":"Indo-European-Romance", "Romanian":"Indo-European-Romance", "Russian":"Indo-European-Baltoslavic", "Sanskrit":"Indo-European", "Slovak":"Indo-European-Baltoslavic", "Slovenian":"Indo-European-Baltoslavic", "Spanish":"Indo-European-Romance", "Swedish":"Indo-European-Germanic", "Tamil":"Dravidian", "Turkish":"Agglutinating", "Ukrainian":"Indo-European-Baltoslavic", "Urdu":"Indo-European", "Uyghur":"Agglutinating", "Vietnamese":"Sino-Austronesian",'Afrikaans':'Indo-European-Germanic', 'SwedishSign':'Indo-European-Germanic', 'Kurmanji':'Indo-European', 'NorthSami':'Agglutinating', 'UpperSorbian':"Indo-European-Baltoslavic", 'Buryat':'Agglutinating', 'Telugu':'Dravidian', 'Serbian':"Indo-European-Baltoslavic", 'Marathi':'Indo-European','Naija':"Indo-European-Germanic", "OldFrench":"Indo-European-Romance", "Maltese":"Semitic", "Thai":"Sino-Austronesian","Amharic":"Afroasiatic", 'Erzya': 'Agglutinating', 'Faroese':"Indo-European-Germanic", 'Tagalog':"Sino-Austronesian", 'Bambara':'Niger-Congo', 'Breton':"Indo-European", 'Armenian':"Indo-European", 'Komi': 'Agglutinating'}
groupColors={"Indo-European-Romance":'brown',"Indo-European-Baltoslavic":'purple',"Indo-European-Germanic":'olive',"Indo-European":'royalBlue',"Sino-Austronesian":'limeGreen', "Agglutinating":'red'}
groupMarkers={"Indo-European-Romance":'<',"Indo-European-Baltoslavic":'^',"Indo-European-Germanic":'v',"Indo-European":'>',"Sino-Austronesian":'s', "Agglutinating":'+'}
col1 = pd.Series(d1)
col2 = pd.Series(d2)
c=[groupColors.get(langnameGroup[label],'k') for label in col1.index]
m=[groupMarkers.get(langnameGroup[label],'o') for label in col1.index]
fig = plt.figure(figsize=(10,10))
gs = gridspec.GridSpec(2, 2, width_ratios=[1, 25],height_ratios=[25, 1])
aa = plt.subplot(gs[0])
ax = plt.subplot(gs[1])
bb = plt.subplot(gs[3])
li,la = (-15,100)
plt.xlim(li,la)
plt.ylim(li,la)
ax.set_xlim([li,la])
ax.set_ylim([li,la])
aa.set_xlim([0, 1])
aa.set_ylim([li,la])
bb.set_ylim([0, 1])
ax.set_xticks([50,100], minor=False) # only the 50% is major
ax.set_xticks([0,25,50,75,100], minor=True) # all 10th are minor
ax.set_yticks([50,100], minor=False) # only the 50% is major
ax.set_yticks([0,25,50,75,100], minor=True) # all 10th are minor
ax.grid(which='both', axis='both',alpha=.5) # draw grid
ax.plot([0, 1], [0, 1], transform=ax.transAxes, alpha=.5, color="gray") # diagonal
aa.set_xticks([], minor=False)
aa.set_yticks([], minor=False)
bb.set_xticks([], minor=False)
bb.set_yticks([], minor=False)
for xx, yy, cc, mm in zip(col1, col2, c, m):
ax.scatter(xx, yy, marker=mm, c=cc)
aa.scatter([0.5 for _ in col1], col2, c=c, alpha=0.5)
bb.scatter(col1, [0.5 for _ in col2], c=c, alpha=0.5)
texts=[]
for label, x, y in zip(col1.index, col1, col2):
#texts+=[ax.text(x+1, y+1, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # original
texts+=[ax.text(x, y, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # for adjustText
adjust_text(texts, col1, col2, ax=ax, lim=1000, text_from_points=True, text_from_text=True, autoalign=True,
expand_text=(1, 1), expand_points=(1, 1), force_text=(0.01, 0.25), force_points=(0.01, 0.25),
arrowprops=dict(arrowstyle='-', color='gray', alpha=.5))
Not much can be done to improve the really dense area on the left bottom, but I think it looks very reasonable with most texts very close to the dots...
Thanks for this quick fix! That's already very close to our ideal graph. Just three questions:
That would be great. As an optional "pared down" feature it would be perfect if the "importance" information could be implemented to remove too distant labels or labels that are still touching other labels or points. But we are already happy with this graph.
in fact, rereading our conversation, i have seen that the labels could leap out of the graph here: something like that would be perfect for our graphs...
expand_points
bigger - but carefully, since it's in fraction of the text bbox dimensions, I would make it only a little bigger for the x, and more in the y. Alternatively, add invisible squares around the points of the size you want, and add them through add_objects
.I suppose the example you show is something related to my suggestion in 3, but I don't understand why this happens, and so I wouldn't rely on this behaviour.
Great, thanks! do you think that there is a chance that the "importance" feature will be implemented any time soon? if not, we'll do with this version.
So, I quickly tried adding a bigger axes to contain the texts... This kind of works, but somehow the whole algorithm doesn't really do its job very well.
from matplotlib import gridspec
d1={'Afrikaans': 1.35, 'Amharic': 9.51, 'AncientGreek': 11.62, 'Arabic': 9.22, 'Armenian': 2.92, 'Bambara': 0.1, 'Basque': 8.46, 'Belarusian': 2.48, 'Breton': 24.0, 'Bulgarian': 5.12, 'Buryat': 0.0, 'Cantonese': 4.5, 'Catalan': 1.97, 'Chinese': 0.05, 'Coptic': 4.41, 'Croatian': 4.17, 'Czech': 8.74, 'Danish': 14.58, 'Dutch': 15.3, 'English': 0.79, 'Erzya': 21.74, 'Estonian': 17.1, 'Faroese': 8.92, 'Finnish': 5.82, 'French': 2.44, 'Galician': 7.0, 'German': 20.6, 'Gothic': 11.11, 'Greek': 4.88, 'Hebrew': 1.48, 'Hindi': 0.16, 'Hungarian': 7.46, 'Indonesian': 1.12, 'Irish': 98.16, 'Italian': 6.8, 'Japanese': 0.0, 'Kazakh': 0.46, 'Komi': 17.24, 'Korean': 0.04, 'Kurmanji': 0.37, 'Latin': 6.14, 'Latvian': 3.34, 'Lithuanian': 0.98, 'Maltese': 0.0, 'Marathi': 1.95, 'Naija': 0.11, 'NorthSami': 4.48, 'Norwegian': 12.59, 'OldChurchSlavonic': 13.42, 'OldFrench': 10.6, 'Persian': 2.45, 'Polish': 15.49, 'Portuguese': 3.1, 'Romanian': 12.75, 'Russian': 5.9, 'Sanskrit': 9.46, 'Serbian': 9.7, 'Slovak': 11.67, 'Slovenian': 12.08, 'Spanish': 3.41, 'Swedish': 13.36, 'SwedishSign': 18.89, 'Tagalog': 100.0, 'Tamil': 5.61, 'Telugu': 0.0, 'Thai': 0.0, 'Turkish': 9.95, 'Ukrainian': 5.39, 'UpperSorbian': 5.66, 'Urdu': 0.21, 'Uyghur': 1.96, 'Vietnamese': 0}
d2={'Afrikaans': 2.63, 'Amharic': 0.59, 'AncientGreek': 41.61, 'Arabic': 73.29, 'Armenian': 20.6, 'Bambara': 0.0, 'Basque': 18.53, 'Belarusian': 33.54, 'Breton': 53.99, 'Bulgarian': 30.08, 'Buryat': 0.38, 'Cantonese': 5.31, 'Catalan': 23.57, 'Chinese': 0.24, 'Coptic': 28.02, 'Croatian': 28.64, 'Czech': 37.94, 'Danish': 14.95, 'Dutch': 21.98, 'English': 9.93, 'Erzya': 42.54, 'Estonian': 38.92, 'Faroese': 16.07, 'Finnish': 23.02, 'French': 5.85, 'Galician': 19.7, 'German': 19.77, 'Gothic': 49.52, 'Greek': 35.74, 'Hebrew': 35.52, 'Hindi': 0.39, 'Hungarian': 28.8, 'Indonesian': 4.5, 'Irish': 98.64, 'Italian': 25.96, 'Japanese': 0.0, 'Kazakh': 0.44, 'Komi': 20.17, 'Korean': 0.04, 'Kurmanji': 0.46, 'Latin': 32.51, 'Latvian': 37.48, 'Lithuanian': 39.38, 'Maltese': 10.34, 'Marathi': 2.78, 'Naija': 4.44, 'NorthSami': 32.38, 'Norwegian': 19.04, 'OldChurchSlavonic': 53.81, 'OldFrench': 35.13, 'Persian': 0.73, 'Polish': 36.67, 'Portuguese': 13.93, 'Romanian': 30.23, 'Russian': 33.52, 'Sanskrit': 31.1, 'Serbian': 25.7, 'Slovak': 39.69, 'Slovenian': 31.77, 'Spanish': 22.06, 'Swedish': 19.8, 'SwedishSign': 18.69, 'Tagalog': 97.92, 'Tamil': 0.55, 'Telugu': 0.95, 'Thai': 0.15, 'Turkish': 4.67, 'Ukrainian': 32.81, 'UpperSorbian': 23.85, 'Urdu': 0.18, 'Uyghur': 4.06, 'Vietnamese': 1.62}
langnameGroup={"AncientGreek":"Indo-European", "Arabic":"Semitic", "Basque":"isolate", "Belarusian":"Indo-European-Baltoslavic", "Bulgarian":"Indo-European-Baltoslavic", "Cantonese":"Sino-Austronesian", "Catalan":"Indo-European-Romance", "Chinese":"Sino-Austronesian", "Coptic":"Afroasiatic", "Croatian":"Indo-European-Baltoslavic", "Czech":"Indo-European-Baltoslavic", "Danish":"Indo-European-Germanic", "Dutch":"Indo-European-Germanic", "English":"Indo-European-Germanic", "Estonian":"Agglutinating", "Finnish":"Agglutinating", "French":"Indo-European-Romance", "Galician":"Indo-European-Romance", "German":"Indo-European-Germanic", "Gothic":"Indo-European-Germanic", "Greek":"Indo-European", "Hebrew":"Semitic", "Hindi":"Indo-European", "Hungarian":"Agglutinating", "Indonesian":"Sino-Austronesian", "Irish":"Indo-European", "Italian":"Indo-European-Romance", "Japanese":"Agglutinating", "Kazakh":"Agglutinating", "Korean":"Agglutinating", "Latin":"Indo-European-Romance", "Latvian":"Indo-European-Baltoslavic", "Lithuanian":"Indo-European-Baltoslavic", "Norwegian":"Indo-European-Germanic", "OldChurchSlavonic":"Indo-European-Baltoslavic", "Persian":"Indo-European", "Polish":"Indo-European-Baltoslavic", "Portuguese":"Indo-European-Romance", "Romanian":"Indo-European-Romance", "Russian":"Indo-European-Baltoslavic", "Sanskrit":"Indo-European", "Slovak":"Indo-European-Baltoslavic", "Slovenian":"Indo-European-Baltoslavic", "Spanish":"Indo-European-Romance", "Swedish":"Indo-European-Germanic", "Tamil":"Dravidian", "Turkish":"Agglutinating", "Ukrainian":"Indo-European-Baltoslavic", "Urdu":"Indo-European", "Uyghur":"Agglutinating", "Vietnamese":"Sino-Austronesian",'Afrikaans':'Indo-European-Germanic', 'SwedishSign':'Indo-European-Germanic', 'Kurmanji':'Indo-European', 'NorthSami':'Agglutinating', 'UpperSorbian':"Indo-European-Baltoslavic", 'Buryat':'Agglutinating', 'Telugu':'Dravidian', 'Serbian':"Indo-European-Baltoslavic", 'Marathi':'Indo-European','Naija':"Indo-European-Germanic", "OldFrench":"Indo-European-Romance", "Maltese":"Semitic", "Thai":"Sino-Austronesian","Amharic":"Afroasiatic", 'Erzya': 'Agglutinating', 'Faroese':"Indo-European-Germanic", 'Tagalog':"Sino-Austronesian", 'Bambara':'Niger-Congo', 'Breton':"Indo-European", 'Armenian':"Indo-European", 'Komi': 'Agglutinating'}
groupColors={"Indo-European-Romance":'brown',"Indo-European-Baltoslavic":'purple',"Indo-European-Germanic":'olive',"Indo-European":'royalBlue',"Sino-Austronesian":'limeGreen', "Agglutinating":'red'}
groupMarkers={"Indo-European-Romance":'<',"Indo-European-Baltoslavic":'^',"Indo-European-Germanic":'v',"Indo-European":'>',"Sino-Austronesian":'s', "Agglutinating":'+'}
col1 = pd.Series(d1)
col2 = pd.Series(d2)
c=[groupColors.get(langnameGroup[label],'k') for label in col1.index]
m=[groupMarkers.get(langnameGroup[label],'o') for label in col1.index]
fig = plt.figure(figsize=(10,10))
gs = gridspec.GridSpec(2, 2, width_ratios=[1, 25],height_ratios=[25, 1])
aa = plt.subplot(gs[0])
ax = plt.subplot(gs[1])
bb = plt.subplot(gs[3])
li,la = (0,100)
plt.xlim(li,la)
plt.ylim(li,la)
ax.set_xlim([li,la])
ax.set_ylim([li,la])
aa.set_xlim([0, 1])
aa.set_ylim([li,la])
bb.set_ylim([0, 1])
ax.set_xticks([50,100], minor=False) # only the 50% is major
ax.set_xticks([0,25,50,75,100], minor=True) # all 10th are minor
ax.set_yticks([50,100], minor=False) # only the 50% is major
ax.set_yticks([0,25,50,75,100], minor=True) # all 10th are minor
ax.grid(which='both', axis='both',alpha=.5) # draw grid
ax.plot([0, 1], [0, 1], transform=ax.transAxes, alpha=.5, color="gray") # diagonal
aa.set_xticks([], minor=False)
aa.set_yticks([], minor=False)
bb.set_xticks([], minor=False)
bb.set_yticks([], minor=False)
allax = fig.add_subplot(111)
allax.axis('off')
for xx, yy, cc, mm in zip(col1, col2, c, m):
ax.scatter(xx, yy, marker=mm, c=cc)
aa.scatter([0.5 for _ in col1], col2, c=c, alpha=0.5)
bb.scatter(col1, [0.5 for _ in col2], c=c, alpha=0.5)
texts=[]
for label, x, y in zip(col1.index, col1, col2):
#texts+=[ax.text(x+1, y+1, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # original
texts+=[ax.text(x, y, label, color=groupColors.get(langnameGroup[label],'k'), fontsize=8)] # for adjustText
adjust_text(texts, col1, col2, ax=allax, lim=1000, text_from_points=True, text_from_text=True, autoalign=True,
expand_text=(1, 1), expand_points=(1.1, 1.2), force_text=(0.01, 0.25), force_points=(0.01, 0.25),
arrowprops=dict(arrowstyle='-', color='gray', alpha=.5))
There is a chance it'll be implemented soon, but no guarantee! :) Sorry.
yes, now some labels are right on the dots. And some have gray lines although they are spot on (Estonian, Armenian,...). strange.
and on the max side, some space could be added as well.
Yeah, I need to dig into the algorithm and think about transforms and so on... I guess I have to transform the coordinates of the dots into the new axes.
This is an extension of issue https://github.com/Phlya/adjustText/issues/50 The main graphs that we'll be using in our paper are 2-dimensional. Maybe there is something that can be done with adjustText here, too: So the unadjusted scatter plot is adjustText gives me Again, labels are moving that shouldn't. Moreover, some labels are moved right on top of other points. Below is the complete code. You can see a few commented out variables in the adjust_text call that I have experimented with, but they don't seem to make any difference. Do you have any idea of parameter settings that would give a better uncluttering? Ideally, one should be able to specify a maximum distance from the original position, and then an option would allow to remove remaining overlapping labels until only non-overlapping remain :smile: