openwpm / OpenWPM

A web privacy measurement framework
https://openwpm.readthedocs.io
Other
1.34k stars 314 forks source link

javascript execution without profile tar #570

Closed MaazBinMusa closed 4 years ago

MaazBinMusa commented 4 years ago

So i was trying to run the following function in the run_custom_function

def callGet(path,mode,**kwargs):
    driver = kwargs['driver']
    js = "var output = [];"\
      "function getCPM()"\
      "{"\
      " var responses = pbjs.getBidResponses();"\
      " Object.keys(responses).forEach(function(adUnitCode){"\
      " var response = responses[adUnitCode];"\
      "     response.bids.forEach(function(bid)"\
      "     {"\
      "         output.push({"\
      "         ad: bid"\
      "         "\
      "         "\
      "         "\
      "         "\
      "         });"\
      "     });"\
      " });"\
      "}"\
      "getCPM();"\
      "return output;"
    status = driver.execute_script(js)
    #save the result in a file
    path = os.path.join(path,"Header_Bidding"+mode)
    if(not os.path.exists(path)):
        f = open(path,'w')
        f.close()
    f = open(path,'w')
    stl = [str(i) for i in status]
    st = ' '.join(stl)
    #print(st)
    f.write('\r\n')
    f.write(st)
    f.close()
    return

image

When I load a profile tar before doing this, it works smooth. The main issue is I manually wrote a selenium script that loads no profile and does this and it works, so it is an openwpm issue

vringar commented 4 years ago

Can you share the selenium script that you wrote? Because from first impression this something that's happening in Selenium code (as driver is a Selenium class)

MaazBinMusa commented 4 years ago

Can you share the selenium script that you wrote? Because from first impression this something that's happening in Selenium code (as driver is a Selenium class)

I use my own config files and not the default config but it has the same key value pairs plus a few of my own. I am changing dirs because I use dockers. and the only important code to look at I believe is the last if statement where I visit 'accuweather'

os.chdir('opt/OpenWPM')
config_path  = sys.argv[1] 
mode         = sys.argv[2] 

def callGet(path,mode,**kwargs):
    driver = kwargs['driver']
    js = "var output = [];"\
      "function getCPM()"\
      "{"\
      " var responses = pbjs.getBidResponses();"\
      " Object.keys(responses).forEach(function(adUnitCode){"\
      " var response = responses[adUnitCode];"\
      "     response.bids.forEach(function(bid)"\
      "     {"\
      "         output.push({"\
      "         ad: bid"\
      "         "\
      "         "\
      "         "\
      "         "\
      "         });"\
      "     });"\
      " });"\
      "}"\
      "getCPM();"\
      "return output;"
    status = driver.execute_script(js)
    #save the result in a file
    path = os.path.join(path,"Header_Bidding"+mode)
    if(not os.path.exists(path)):
        f = open(path,'w')
        f.close()
    f = open(path,'w')
    stl = [str(i) for i in status]
    st = ' '.join(stl)
    #print(st)
    f.write('\r\n')
    f.write(st)
    f.close()
    return

if(os.path.exists(config_path)):
    data = open(config_path,'r').read()
    data = json.loads(data)
    persona_path = data['Persona_Path']
    persona_name = data['Persona_Name']
    persona_numb = data['Persona_Numb']
    browser_params_path = data['Browser_Config']
    manager_params_path = data['Manager_Config']
    d_browser_params_path = data['Browser_Config_Def']
    d_manager_params_path = data['Manager_Config_Def']
    NUM_BROWSERS        = data['Number_of_Browsers']
    storage_file        = data['Storage_File']
    sites               = data['Sites']
    Ad_Sites       = data['Ad_Sites']
    Intent_Sites        = data['Intent_Sites']
else:
  print(config_path)
  print("Config File Does Not Exist")
  exit()

if(os.path.exists(persona_path)):
    if (os.path.exists(os.path.join(persona_path,persona_name))):
        print("[Starting Persona Training]-----")
    else:
        print("[Persona Type Does Not Exist] ----- CRITICAL, EXITING")
        exit()
else:
    print("[Persona Folder Not Available] ----- CRITICAL, EXITING")
    exit()

sites = data['Sites']

manager_params, browser_params = TaskManager.load_default_params(browser_params_path,manager_params_path,NUM_BROWSERS)

path = browser_params[0]['profile_tar']

for i in range(NUM_BROWSERS):
    browser_params[i]['http_instrument'] = True
    browser_params[i]['disable_flash'] = False
    browser_params[i]['headless'] = True  
    if(not os.path.exists(os.path.join(path,"profile.tar.gz"))):
        browser_params[i]["profile_tar"] = None
        print("set to None")
    browser_params[i]['storage_file']    = storage_file

os.chdir('../..')
manager_params['data_directory'] = os.path.join(os.path.dirname(os.path.abspath(__file__)),path)
manager_params['log_directory']  = os.path.join(os.path.dirname(os.path.abspath(__file__)),path)
os.chdir('opt/OpenWPM')

manager = TaskManager.TaskManager(manager_params, browser_params)
if(mode != '1'):
  for site in sites:
      command_sequence = CommandSequence.CommandSequence(site)
      command_sequence.get(sleep=60, timeout=600)
      command_sequence.dump_profile_cookies(120)
      manager.execute_command_sequence(command_sequence, index='**')

  Intent_Sites = []
  if Intent_Sites != []:
      site = random.choice(Intent_Sites)
      command_sequence = CommandSequence.CommandSequence(site)
      command_sequence.get(sleep=60, timeout=600)
      command_sequence.dump_profile_cookies(120)
      manager.execute_command_sequence(command_sequence, index='**')

if(mode == '1'):
    Ad_Sites = ['https://www.accuweather.com/']
    for site in Ad_Sites:
        command_sequence = CommandSequence.CommandSequence(site)
        command_sequence.get(sleep=3, timeout=100)
        path = browser_params[0]['profile_tar']
        command_sequence.run_custom_function(callGet,(path,mode,))
        command_sequence.dump_profile_cookies(120)
        manager.execute_command_sequence(command_sequence, index='**')
manager.close()
MaazBinMusa commented 4 years ago

The issue was a resource loading issue. Increasing the timeout generously to 90 seconds solved this issue. No profile was needed.