square / okio

A modern I/O library for Android, Java, and Kotlin Multiplatform.
https://square.github.io/okio/
Apache License 2.0
8.81k stars 1.18k forks source link

Watchdog thread is eating CPU #185

Closed swankjesse closed 8 years ago

swankjesse commented 8 years ago

http://stackoverflow.com/questions/34997596/abnormal-cpu-usage-okio-watchdog

DavidEdwards commented 8 years ago

I am able to reproduce this consistently in my current project. I haven't used OkHttp (okio) before this project. I cannot guarantee that it isn't a problem introduced by me.

I'll post an example of how I use OkHttp (okio) shortly.

DavidEdwards commented 8 years ago

How I am starting the trace, onPause:

Debug.startMethodTracing("debug", 80 * 1024 * 1024); // 80 MiB trace file
new Handler(Looper.getMainLooper()).postDelayed(new Runnable() {
    @Override
    public void run() {
        Debug.stopMethodTracing();
    }
}, 30000);

A barebones example of how I use the library:

public class Api {

    private static Api mApi = null;
    private OkHttpClient mClient = null;
    private WeakReference<Context> mContext = null;

    private final boolean mTestDelay = false;

    private static final String API_GET_USER = "http://example.com/api/userinfo.php";

    private static String VERSION_CODE = "0";
    private static String VERSION_VERSION = "1.0";

    @NonNull
    public static Api getInstance() {
        return getInstance(OTRApplication.sContext.get());
    }

    @NonNull
    public static Api getInstance(@NonNull Context context) {
        if (mApi == null) {
            mApi = new Api(context);
        }
        if(mApi.mClient == null) {
            OkHttpClient.Builder builder = new OkHttpClient.Builder();

            mApi.mClient = new OkHttpClient();

            File cacheDir = new File(context.getCacheDir(), "okhttp");
            if(!cacheDir.mkdirs()) {
                cacheDir = context.getCacheDir();
            }
            int cacheSize = 1 * 1024 * 1024; // 1 MiB
            Cache cache = new Cache(cacheDir, cacheSize);

            builder.connectTimeout(15, TimeUnit.SECONDS);
            builder.cookieJar(new NotPersistentCookieJar());
            builder.cache(cache);

            mApi.mClient = builder.build();
        }
        mApi.mContext = new WeakReference<Context>(context);

        return mApi;
    }

    public Api(@NonNull Context context) {
        try {
            PackageInfo pInfo = context.getPackageManager().getPackageInfo(context.getPackageName(), 0);
            VERSION_CODE = String.valueOf(pInfo.versionCode);
            VERSION_VERSION = pInfo.versionName;
        } catch (PackageManager.NameNotFoundException e) {
            e.printStackTrace();
        }
    }

    @NonNull
    public OkHttpClient getClient() {
        return mClient;
    }

    private void introduceDelay() {
        long ms = (new Random()).nextInt(5000)+2000;
        Utils.log("HTTP", "Delay of "+ms+"ms");
        Utils.sleep(ms);
    }

    private String logUrl(@NonNull String url) {
        Utils.log("API", "I::URL: "+url);
        return url;
    }

    @WorkerThread
    @NonNull
    public String getAccountXmlSync(@NonNull String email) {
        try {
            Request request = new Request.Builder()
                    .url(logUrl(API_GET_USER)
                    .build();

            Response response = mClient.newCall(request).execute();

            if(mTestDelay) {
                introduceDelay();
            }

            return response.body().string();
        } catch (IOException e) {
            Utils.log("API", "E::"+e.toString());
        }

        return "";
    }

    public void getUserAccount(@NonNull final String email, @NonNull final GetAccountTask.IAccountListener callback) {
        GetAccountTask task = new GetAccountTask(mContext.get(), callback);
        AsyncTaskCompat.executeParallel(task, email);
    }
}

Here is the related AsyncTask:

public class GetAccountTask extends AsyncTask<String, Void, UserAccount> {
    private WeakReference<Context> mContext = null;
    private GetAccountTask.IAccountListener mCallback = null;

    public GetAccountTask(final Context context, final GetAccountTask.IAccountListener callback) {
        this.mContext = new WeakReference<Context>(context);
        this.mCallback = callback;
    }

    @Override
    protected void onPreExecute() {
    }

    @Override
    protected UserAccount doInBackground(String... emails) {Process.setThreadPriority(Process.THREAD_PRIORITY_BACKGROUND+Process.THREAD_PRIORITY_MORE_FAVORABLE);

        final String email = emails[0];
        if(TextUtils.isEmpty(email)) return UserAccount.empty();
        String xml = Api.getInstance(context.get()).getAccountXmlSync(email);

        return UserAccount.fromXML(xml);
    }

    @Override
    protected void onPostExecute(UserAccount result) {
        // Do stuff with result
    }
}

The whole sequence would start by: Api.getInstance(context.get()).getUserAccount(email, callback);

I also tested commenting out the OkHttp specific lines in getAccountXmlSync so that it only returns "". The result is as follows in my trace view.

Additionally, adb shell top -m 5 -n 1 reports a constant CPU use originating from my App of 15%.

DavidEdwards commented 8 years ago

Has any clue been found to this issue?

swankjesse commented 8 years ago

Haven’t investigated, but will take a look soon.

molexx commented 8 years ago

For me it seems to be caused by proguard optimization - I get this every time (on startup the app does a lot of retrofit and picasso calls) with optimization on but it's fine with it off.

We would much prefer our production app be built with optimization enabled.

If I use Android's proguard-android-optimize.txt config but then override it to disable every optimization option listed here: http://proguard.sourceforge.net/manual/optimizations.html there is no problem. If someone can suggest which of those might be causing it I'll test.

DavidEdwards commented 8 years ago

@molexx Are you using -dontoptimize or are you literally disabling each option?

molexx commented 8 years ago

@DavidEdwards either of those works. I would like to try and narrow down which option is causing the problem - initially so our app can be as optimized as possible but it should give a hint towards a proper fix.

Are you using optimization too?

DavidEdwards commented 8 years ago

Yes, I am running tests at the moment. So far my findings correlate with yours.

DavidEdwards commented 8 years ago

So far I have reduced it from disable-all to disabling a subset:

-optimizations !class/*,!field/*,!method/marking/*,!method/removal/*,!code/removal/*

I cannot reduce it much further. Hopefully this gives an indication of where to look.

Removing any of these results in a switch between bad .trace and good .trace. !method/marking/* !code/removal/*

It looks like ProGuard is removing some OKIO code at some point, pertaining to the Watchdog.

molexx commented 8 years ago

Great, thanks!

I'm down to:

-optimizations !method/marking/static,!method/removal/parameter,!code/removal/advanced

The standard proguard-android-optimize.txt already contains !field/*.

DavidEdwards commented 8 years ago

That ProGuard config works here too. That narrowed it down further.

DavidEdwards commented 8 years ago

@swankjesse The optimizations above from ProGuard are causing the issue. Tested by me and Molexx. After uploading a version to my users with this change, they are also reporting their battery issues are gone.

ProGuard is definitely messing with okio somehow.

swankjesse commented 8 years ago

I haven’t been able to isolate the problem with ProGuard. I’ve gotten as far as building Okio with different ProGuard configurations, and visually comparing the generated bytecode, but nothing is jumping out at me as the culprit.

I’d like some help reproducing if you’re able. First configure ProGuard to disable obfuscation with -dontobfuscate. Next build two .jar files, one with the good ProGuard config and one with the bad ProGuard config. Finally send these two me, either by attaching them here or by emailing ’em confidentially. I’m jesse@swank.ca.

Thanks.

swankjesse commented 8 years ago

Yes, please send both working and non-working .jar files. You can disable obfuscation for both without impacting optimization. It’s the optimizations that are causing the behavior change.

DavidEdwards commented 8 years ago

Is it still remotely helpful to you without --dontobfuscate?

Unfortunately, I cannot bring my code to compile with that set.

@molexx perhaps you would have better luck?

molexx commented 8 years ago

Maybe I'm in the same place - if I add -dontobfuscate (one hyphen) then dex fails with an unhelpful error message.

That said our app has many dependencies and does a lot of retrofit and picasso calls on startup so it's difficult to narrow down what is triggering this.

To recreate this publicly I've tried proguarding Jake Wharton's u2020 demo app as it uses many of the same libraries but without success so far - something to do with Dagger I think.

swankjesse commented 8 years ago

@DavidEdwards @molexx I’ve spent a couple days comparing the .class files optimized by ProGuard and the class files not optimized by ProGuard and I haven’t yet found anything obviously wrong. I’m starting to think this bug only arises when the ProGuard-optimized code interacts with an ART or Dalvik VM. Could you tell me which versions of Android you’ve been able to reproduce this on? I wonder specifically if it’s ART-only, dalvikvm-only, and which versions of each.

DavidEdwards commented 8 years ago

I got an incredible number of crash reports relating to finalizing when this issue was a problem for me.

The error was:

Fatal Exception: java.util.concurrent.TimeoutException
com.android.internal.os.BinderInternal$GcWatcher.finalize() timed out after 10 seconds

The reason for it was due to long running background processes.

The reason I am mentioning this, is that this error is gone now. I assume that the watchdog was the main reason for this crash issue. I can use this to determine reasonably well which Android versions had this issue.

Crash report 1

samsung - 95%
asus - 3%
Enspert - 1%
Other… - 1%

4.4.2 - 48%
4.3 - 40%
4.2.2 - 6%
Other… - 6%

Crash report 2

LGE - 47%
motorola - 33%
Huawei - 13%
Other… - 7%

6.0.1 - 100%

Crash report 3

samsung - 81%
motorola - 8%
LGE - 7%
Other… - 4%

4 - 79%
6 - 20%
5 - 1%

Crash report 4

samsung - 63%
Sony - 31%
ODYS - 3%
Other… - 3%

5.1.1 - 97%
5.1 - 3%

So, I think that it affects all versions of Android. ART / Dalvik.

I have personally reproduced it on Marshmallow on my Nexus 5X.

molexx commented 8 years ago

It recreates 100% on my test 4.2 and 5.1.1 ARM devices, and on the 6.0 x86_64 emulator. I can try more if that would be useful?

I've proguarded the Picasso sample app and a Retrofit2 sample app and neither showed the problem.

I've been unable to get a -dontobfuscate proguarded build to get through dexing.

I'll work on cutting down our app to the minimum needed to recreate.

Here's the app's dependencies: http://pastebin.com/ch4mqwtU @DavidEdwards perhaps it's worth comparing your dependency list to mine?

DavidEdwards commented 8 years ago

Here is my dependencies list also: http://pastebin.com/akzdG2BH

molexx commented 8 years ago

Thanks. I've taken out crashlytics and gms, hasn't helped. I am wondering if it could be multidex? That would explain why small demo apps are not having issues.

It's tricky to orchestrate as attempting to trigger multidex by adding unused libs will be undone by proguard's optimizer. I'll continue to investigate.

molexx commented 8 years ago

It seems proguard's -assumenosideeffects is the trigger. If I add it for an empty class that isn't referenced then the watchdog loop starts happening as soon as a retrofit call is made. e.g.: -assumenosideeffects class com.example.DummyClass { *; }

Without that it works fine regardless of the optimization exclusions.

I can post a small demo app next week.

@DavidEdwards do you have any -assumenosideeffects statements in your config? Can you try commenting them out please?

(multidex appears to be irrelevant)

swankjesse commented 8 years ago

@molexx awesome!

molexx commented 8 years ago

Very small demo project uploaded. I've stripped out retrofit as the problem can be caused by issuing an okhttp request. Proguard is enabled in debug builds. Comment out the -assumenosideeffects line and it's fine.

As with other projects I've been unable to get it to dex with -dontobfuscate specified.

https://github.com/molexx/okiowatchdogtest

swankjesse commented 8 years ago

@molexx great!

DavidEdwards commented 8 years ago

Yes, I use it to remove various unwanted code out of production, for example:

-assumenosideeffects class android.util.Log {
    public static *** d(...);
    public static *** v(...);
    public static *** i(...);
    public static *** w(...);
    public static *** e(...);
}

What on earth could that be doing to OKIO? A bug in Proguard?

lber19535 commented 8 years ago

I find a NOTE in this manual

Note: the configuration specifies that none of the methods of class '...' have any side effects

Your configuration contains an option -assumenosideeffects to indicate that the specified methods don't have any side effects. However, the configuration tries to match all methods, by using a wildcard like "*;". This includes methods from java.lang.Object, such as wait() and notify(). Removing invocations of those methods will most likely break your application. You should list the methods without side effects more conservatively. You can switch off these notes by specifying the -dontnote option.

In AsyncTimeout.awaitTimeout(), there have a wait code:

AsyncTimeout.class.wait();

when using -assumenosideeffects with * for some class, the proguard will remove wait method from superclass. When I modify the proguard file and use method name, all works fine.

So I think this is not a bug of okio.

DavidEdwards commented 8 years ago

I decided to science @molexx test project.

I decompiled them and I compared them. There is clearly a change being made by ProGuard due to this added ProGuard statement.

Here is an HTML diff

Here is a git friendly diff.

As you can see, there is a massive change in awaitTimeout() and a smaller change in scheduleTimeout(AsyncTimeout, long, boolean).

molexx commented 8 years ago

Very interesting @DavidEdwards and @lber19535 Removing the wildcard method spec does make the problem go away.

Assuming that okio isn't trying to call .wait() on DummyClass which is not referenced or instantiated, I think this seems like a bug in proguard: -assumenosideeffects applying to classes not specified by the pattern.

molexx commented 8 years ago

Apparently:

https://sourceforge.net/p/proguard/discussion/182456/thread/d447d007/

this is how proguard is supposed to work: assumenosideeffects operates up the chain on superclasses and still applies to those superclasses when they are extended by other classes not covered by the spec.

So this is not an okio bug. I am happy for it to be closed.

Unlike a lot of proguard-caused Android problems though this one would be quite difficult to detect and diagnose. Would there be an efficient way for okio to detect that wait() has been removed and throw or log an error?

DavidEdwards commented 8 years ago

@molexx Upon which chain of superclasses? Your example project is a standalone POJO inheriting nothing. There is no chain to follow.

molexx commented 8 years ago

All classes implicitly inherit from Object even if not specified. Object has the method wait() which is removed by the wildcard method pattern.

DavidEdwards commented 8 years ago

Ach, of course. Object has wait().

Well, it seems to be that general advice for using OKIO / OkHttp should be to not use -assumenosideeffects if that is expected behaviour. That should probably be listed prominently on the relevant readme files.

swankjesse commented 8 years ago

So ProGuard’s -assumenosideeffects is dangerous and should not be used.

molexx commented 8 years ago

Definitely used with caution. Which to be fair is what the docs say. I hadn't realised that the removals affected parent classes and applied to other uses of those classes.

IMHO though it's essential in some cases such as to remove logging from production apps.

If okio wanted to be super-nice it could detect the missing wait() and log about it, but I'm not sure how that could be done performantly i.e. without reflection at runtime. Maybe once on startup?

EricLafortune commented 8 years ago

Molexx and DavidEdwards are correct. The option -assumenosideeffects is the only one that intentionally changes the functionality of your code, so you should use it with care. Typically:

-assumenosideeffects class android.util.Log {
    public static boolean isLoggable(java.lang.String, int);
    public static int v(...);
    public static int d(...);
    public static int i(...);
    public static int w(...);
    public static int e(...);
    public static java.lang.String getStackTraceString(java.lang.Throwable);
}

Do not use -assumenosideeffects class android.util.Log { *; }, because it includes methods like wait(), which ripples up to Object#wait() and all other implementations, and thus quietly breaks your code.

Recent versions of ProGuard explicitly print out a warning at build time if you have this wildcard in your configuration. Admittedly, the warning may be easy to miss.

Eric (developer of ProGuard)

swankjesse commented 8 years ago

@EricLafortune glad to hear there’s a warning. Is stripping Object.wait() ever safe?

swankjesse commented 8 years ago

No action for us to take on this.

tomsang commented 7 years ago

I'm using OkHttp3.4.2 based on OkIo 1.9.0 in Android 6.0 and 4.4 in Debug mode without assumenosideeffects.

The issue still happens occasionally. So it seems that the issue hasn't been fixed yet.

The stack is the almost same as that in https://github.com/square/okio/issues/214

Maybe assumenosideeffects is implicitly used in my Android apk. Could you help me know whether assumenosideeffects is used actually or not .

swankjesse commented 7 years ago

@tomsang are you using ProGuard?

tomsang commented 7 years ago

@swankjesse I'm using the debug section of following

    buildTypes {
        release {
            minifyEnabled true
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
            signingConfig signingConfigs.Platform
            debuggable false
        }
        debug {
            signingConfig signingConfigs.Platform
            debuggable true
        }
    }

I believe my Android studio 2.1.2 will use the android version Proguard which has no optimization . It's ~/Android/Sdk/tools/proguard/proguard-android.txt with the following content,

### 
### # This is a configuration file for ProGuard.
# http://proguard.sourceforge.net/index.html#manual/usage.html
#
# Starting with version 2.2 of the Android plugin for Gradle, these files are no longer used. Newer
# versions are distributed with the plugin and unpacked at build time. Files in this directory are
# no longer maintained.

-dontusemixedcaseclassnames
-dontskipnonpubliclibraryclasses
-verbose

# Optimization is turned off by default. Dex does not like code run
# through the ProGuard optimize and preverify steps (and performs some
# of these optimizations on its own).
-dontoptimize
-dontpreverify
# Note that if you want to enable optimization, you cannot just
# include optimization flags in your own project configuration file;
# instead you will need to point to the
# "proguard-android-optimize.txt" file instead of this one from your
# project.properties file.

-keepattributes *Annotation*
-keep public class com.google.vending.licensing.ILicensingService
-keep public class com.android.vending.licensing.ILicensingService

# For native methods, see http://proguard.sourceforge.net/manual/examples.html#native
-keepclasseswithmembernames class * {
    native <methods>;
}

# keep setters in Views so that animations can still work.
# see http://proguard.sourceforge.net/manual/examples.html#beans
-keepclassmembers public class * extends android.view.View {
   void set*(***);
   *** get*();
}

# We want to keep methods in Activity that could be used in the XML attribute onClick
-keepclassmembers class * extends android.app.Activity {
   public void *(android.view.View);
}

# For enumeration classes, see http://proguard.sourceforge.net/manual/examples.html#enumerations
-keepclassmembers enum * {
    public static **[] values();
    public static ** valueOf(java.lang.String);
}

-keepclassmembers class * implements android.os.Parcelable {
  public static final android.os.Parcelable$Creator CREATOR;
}

-keepclassmembers class **.R$* {
    public static <fields>;
}

# The support library contains references to newer platform versions.
# Don't warn about those in case this app is linking against an older
# platform version.  We know about them, and they are safe.
-dontwarn android.support.**

# Understand the @Keep support annotation.
-keep class android.support.annotation.Keep

-keep @android.support.annotation.Keep class * {*;}

-keepclasseswithmembers class * {
    @android.support.annotation.Keep <methods>;
}

-keepclasseswithmembers class * {
    @android.support.annotation.Keep <fields>;
}

-keepclasseswithmembers class * {
    @android.support.annotation.Keep <init>(...);
}