Closed Qubitium closed 8 years ago
Updated Sep 14: Added Samsung Grand Prime and Chuwi Vi8 to working device list.
Please try the below on the node and report back the result here, thanks.
package andro
import (
"fmt"
"syscall"
)
func TapProtocolSwitch() {
for _, sw := range []struct {
family int
sotype int
protocol int
}{
{syscall.AF_INET, syscall.SOCK_STREAM, 0},
{syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP},
{syscall.AF_INET, syscall.SOCK_DGRAM, 0},
{syscall.AF_INET, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP},
{syscall.AF_INET6, syscall.SOCK_STREAM, 0},
{syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_TCP},
{syscall.AF_INET6, syscall.SOCK_DGRAM, 0},
{syscall.AF_INET6, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP},
} {
s, err := syscall.Socket(sw.family, sw.sotype, sw.protocol)
if err != nil {
fmt.Printf("socket: %+v, %v\n", sw, err)
continue
}
defer syscall.Close(s)
var sa syscall.Sockaddr
switch sw.family {
case syscall.AF_INET:
sa = &syscall.SockaddrInet4{Addr: [4]byte{}}
case syscall.AF_INET6:
sa = &syscall.SockaddrInet6{Addr: [16]byte{}}
}
if err := syscall.Bind(s, sa); err != nil {
fmt.Printf("bind: %+v, %v\n", sw, err)
continue
}
fmt.Printf("%d: %+v\n", s, sw)
}
}
@mikioh
Samsung SM-T320 (Broken device)
09-14 13:44:43.908 15083-15113/? I/GoLog: 38: {family:2 sotype:1 protocol:0}
09-14 13:44:43.908 15083-15113/? I/GoLog: 39: {family:2 sotype:1 protocol:6}
09-14 13:44:43.908 15083-15113/? I/GoLog: 40: {family:2 sotype:2 protocol:0}
09-14 13:44:43.908 15083-15113/? I/GoLog: 42: {family:2 sotype:2 protocol:17}
09-14 13:44:43.908 15083-15113/? I/GoLog: 43: {family:10 sotype:1 protocol:0}
09-14 13:44:43.908 15083-15113/? I/GoLog: 41: {family:10 sotype:1 protocol:6}
09-14 13:44:43.908 15083-15113/? I/GoLog: 44: {family:10 sotype:2 protocol:0}
09-14 13:44:43.908 15083-15113/? I/GoLog: 45: {family:10 sotype:2 protocol:17}
Samsung GT-N7102 (Broken device)
09-14 13:43:24.903 6088-6112/? I/GoLog: 64: {family:2 sotype:1 protocol:0}
09-14 13:43:24.903 6088-6112/? I/GoLog: 65: {family:2 sotype:1 protocol:6}
09-14 13:43:24.903 6088-6112/? I/GoLog: 67: {family:2 sotype:2 protocol:0}
09-14 13:43:24.903 6088-6112/? I/GoLog: 68: {family:2 sotype:2 protocol:17}
09-14 13:43:24.903 6088-6112/? I/GoLog: 69: {family:10 sotype:1 protocol:0}
09-14 13:43:24.903 6088-6112/? I/GoLog: 70: {family:10 sotype:1 protocol:6}
09-14 13:43:24.903 6088-6112/? I/GoLog: 71: {family:10 sotype:2 protocol:0}
09-14 13:43:24.903 6088-6112/? I/GoLog: 72: {family:10 sotype:2 protocol:17}
Samsung Grand Prime (Working device)
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 58: {family:2 sotype:1 protocol:0}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 59: {family:2 sotype:1 protocol:6}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 60: {family:2 sotype:2 protocol:0}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 61: {family:2 sotype:2 protocol:17}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 62: {family:10 sotype:1 protocol:0}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 63: {family:10 sotype:1 protocol:6}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 64: {family:10 sotype:2 protocol:0}
09-14 13:46:32.023 15326-15343/com.test.gosockettest I/GoLog: 65: {family:10 sotype:2 protocol:17}
Asus Zenfone2 (Working device)
09-14 13:47:30.913 1824-1869/com.test.gosockettest I/GoLog: 47: {family:2 sotype:1 protocol:0}
09-14 13:47:30.915 1824-1869/com.test.gosockettest I/GoLog: 49: {family:2 sotype:1 protocol:6}
09-14 13:47:30.917 1824-1869/com.test.gosockettest I/GoLog: 50: {family:2 sotype:2 protocol:0}
09-14 13:47:30.918 1824-1869/com.test.gosockettest I/GoLog: 51: {family:2 sotype:2 protocol:17}
09-14 13:47:30.920 1824-1869/com.test.gosockettest I/GoLog: 52: {family:10 sotype:1 protocol:0}
09-14 13:47:30.923 1824-1869/com.test.gosockettest I/GoLog: 53: {family:10 sotype:1 protocol:6}
09-14 13:47:30.925 1824-1869/com.test.gosockettest I/GoLog: 54: {family:10 sotype:2 protocol:0}
09-14 13:47:30.926 1824-1869/com.test.gosockettest I/GoLog: 55: {family:10 sotype:2 protocol:17}
No failures, nice. Please try the below. When you see some error on connect, that would be a key to the root cause.
package andro
import (
"fmt"
"net"
"syscall"
)
// Eg. daddr="173.205.184.7", dport=80
func InitiateProtocolConnect(daddr string, dport int) {
ip := net.ParseIP(daddr)
for _, sw := range []struct {
family int
sotype int
protocol int
}{
{syscall.AF_INET, syscall.SOCK_STREAM, 0},
{syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP},
{syscall.AF_INET6, syscall.SOCK_STREAM, 0},
{syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_TCP},
} {
if ip.To4() != nil && sw.family != syscall.AF_INET || ip.To16() != nil && ip.To4() == nil && sw.family != syscall.AF_INET6 {
continue
}
s, err := syscall.Socket(sw.family, sw.sotype, sw.protocol)
if err != nil {
fmt.Printf("socket: %+v, %v\n", sw, err)
continue
}
defer syscall.Close(s)
var lsa, rsa syscall.Sockaddr
switch sw.family {
case syscall.AF_INET:
lsa = &syscall.SockaddrInet4{Addr: [4]byte{}}
sa := &syscall.SockaddrInet4{Port: dport}
copy(sa.Addr[:], ip.To4())
rsa = sa
case syscall.AF_INET6:
lsa = &syscall.SockaddrInet6{Addr: [16]byte{}}
sa := &syscall.SockaddrInet6{Port: dport}
copy(sa.Addr[:], ip.To16())
rsa = sa
}
if err := syscall.Bind(s, lsa); err != nil {
fmt.Printf("bind: %+v, %v\n", sw, err)
continue
}
if err := syscall.Connect(s, rsa); err != nil {
fmt.Printf("conenct: %+v, %v\n", sw, err)
continue
}
fmt.Printf("%d: %+v\n", s, sw)
}
}
@mikioh Result of new test with syscall.bind + syscall.connect
Samsung SM-T320 (Broken device)
09-14 15:49:31.568 17808-17838/? I/GoLog: 38: {family:2 sotype:1 protocol:0}
09-14 15:49:31.765 17808-17838/? I/GoLog: 39: {family:2 sotype:1 protocol:6}
Samsung GT-N7102 (Broken device)
09-14 15:51:15.091 21315-21330/com.test.gosockettest I/GoLog: 59: {family:2 sotype:1 protocol:0}
09-14 15:51:15.286 21315-21330/com.test.gosockettest I/GoLog: 64: {family:2 sotype:1 protocol:6}
No errors on broken devices. Strange.
But this give us some extra ammo to work with. We are going to try using the syscall.connect method to create a tcp fd and then converting that fd -> file -> fileConn(). We have already successfully passed java tcp socket's fd to go and going this route to bypass this problem. If the syscall fd works, it would be a pure go problem bypass.
The above two snippets are basic waltzes. The real net.Dial does more dancing; a) DNS resolution (I guess you always pass a literal address, so it won't be a problem), b) Non-blocking IO with runtime-integrated network poller. If (b) is related to the issue, for example, somehow the poller takes a wrong descriptor and the kernel says "hey, protocol not available", subsequent read/write ops might be fail too.
@mikioh.
We just confirmed that taking your syscall test code to generate a tcp ipv4 con fd and then converting it to a File{} then to a FileConn{} worked perfectly. We are now able to correctly use the socket and do stuff like TLS handshake on it.
So the problem is somewhere in net.Dial.
I assigned this to @mikioh for now, because @mikioh is already helping. But, anyone who has time and wants to work on it, feel free to take it.
FWIW, I can't reproduce this is my OnePlus One (4.4.4). I used golang.org/x/mobile/example/bind as starting point, added android.permission.INTERNET to AndroidManifest.xml and added GetTestResult and a call to it to hello.go. I got:
I/GoLog ( 5004): Starting go test... 173.205.184.7:80 I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp go routine.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp go routine.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp go routine.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp go routine.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp go routine.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp serial.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp serial.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp serial.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp serial.. I/GoLog ( 5004): GOT TCP! I/GoLog ( 5004): completed tcp serial..
My uneducated guess is weird interaction with IPv6 where your phones are connected. Does your test program fail on WiFi as well as on mobile data (2/3/4G)?
@eliasnaur
I have just retested on my OnePlus 4.4.4 CM 11.0-XNPH44S on both WIFI and LTE and failed the tests. I have access to another OnePlus, CM 12 this time and will test that later.
So these are the net environments I have the listed devices tested. Note that I did not test each device at every location. Just the sum of all locations where these tests have been performed.
1) Los Angeles Tmobile LTE 2) Los Angeles Charter Cable (Wifi) 3) Shanghai Unicom LTE 4) Shanghai China Telecom Fiber (Wifi)
So the issue from my pov:
1) Not Android specific. from 4.3 to 5.1.1 2) Not cpu specific: x86, qualcomm, mediatek 3) Not Manufacture specific: all over the place. 4) Not location specific: US, China 5) Not connectivity specific: Wifi, LTE 6) Not dev machine specific. Compiled and tested apk on separate dev machines.
I am as perplexed as you are, especially the fact you weren't able to reproduce the result on your OnePlus.
Awesome testing coverage! :) I'm not sure you mentioned it, but is there any indication of a timing issue? That is, have you ever had a configuration fail and then suddenly succeed efter some number of retries?
@eliasnaur
The thing is, it is also failing on the official Android Studio/Google supplied "Lollipop API 22 armv7 Emulator" image. Can you launch an v22 armv7 AVD and run the apk? I am using Nexus 4 768x1280 template.
If the code is working on your emulator and failing on mine, then...I need a drink.
And I have never had a broken device succeed in my tests and vice versa with working devices, over the the past few days. So not likely to be time related.
@diegomontoya I created an AVD from the "Google APIs ARM (armeabi-v7a)" ABI and "Google APIs (API level 22)" Target on a Nexus 4 device 768x1280 with no skin. Same result, "GOT TCP!" every time.
@eliasnaur I am going to assume at this point, based on your test results, that this may be isolated to my localized build env and the end product my env is producing.
Let's swap apks. This way, we can clarify some fog here.
1) I will build a test apk for you to run that has confirmed failure and success on different devices. 2) Send me the apk to test which works on your OnePlus.
Please email me at xing@fictionpress.com and we can start exchanging some test files.
Good idea. I've sent you a mail with apk and source diff.
Looks like the root cause of this issue comes from somewhere between Android SDK and bindings generated by gomobile, and not related to the package net directly. Have a safe trip, mobile guys.
Thanks a lot @mikioh for helping with this!
@diegomontoya, is #12725 the root cause of this problem?
Timeout. Closing.
Updated Sep 14: Added Samsung Grand Prime and Chuwi Vi8 to working device list.
Go ver: devel +0b5bcf5 Sat Sep 12 08:34:52 2015 +0000 linux/amd64 Go Mobile ver: +65551d8 Fri Sep 11 20:14:45 2015 +0000 (android); androidSDK=/opt/Android/Sdk/platforms/android-23 Android Studio ver: 1.4 Beta3 Go Mobile Bind Plugin: 0.2.2 Build Env: Linux x86_64
Problem:
TCP networking code generated for Android packaged as .so in APK with studio and go-bind plugin will sometimes work, and most often times, result in "protocol not available" error in net.Dial().
Setup:
1) Manifest has
xml <uses-permission android:name="android.permission.INTERNET" />
2) Java tcp does work, verified in same apk. 3) Tested both calling go via java using go-bind bridge on java main thread and java background thread. Result is the same.Expectation:
Test Code:
At the bottom I have included the Go code and relevant Java code you can insert into any go mobile android project.
Working Devices:
Failed Devices:
I will try to update the device list with working/non-working as I find more devices to test on.