Closed alex4200 closed 4 years ago
Yeah I get some errors too after seeing some plots..
----------------------------------------------
Score: 3.3379923050048985
Output files:
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/figs/somaticfeat_UCL_data/traces.pdf
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/figs/somaticfeat_UCL_data/absolute_features.pdf
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/figs/somaticfeat_UCL_data/Feature_errors.pdf
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/figs/somaticfeat_UCL_data/traces_subplots.pdf
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/results/somaticfeat_UCL_data/somatic_model_features.json
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/results/somaticfeat_UCL_data/somatic_model_errors.json
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/results/somaticfeat_UCL_data/final_score.json
/home/jovyan/CA1_int_cAC_970627BHP1_20190328160727/validation_results/results/somaticfeat_UCL_data/test_log.txt
----------------------------------------------
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
<ipython-input-12-18d32056e973> in <module>()
22 with open(stim_file, 'r') as f:
23 config = json.load(f, object_pairs_hook=collections.OrderedDict)
---> 24 result_id, score = utils.run_test(username=HBP_USERNAME, model=cell_model, test_alias="hippo_somafeat_"+ttype, test_version="1.0", storage_collab_id=collab_id, register_result=True, config=config, specify_data_set=specify_data_set)
25
26 elif test == "Somatic Features Test - JMakara data set":
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/utils.py in run_test(username, password, environment, model, test_instance_id, test_id, test_alias, test_version, storage_collab_id, register_result, client_obj, **params)
438 test_config_file = prepare_run_test_offline(username=username, password=password, environment=environment, test_instance_id=test_instance_id, test_id=test_id, test_alias=test_alias, test_version=test_version, client_obj=client_obj, **params)
439 test_result_file = run_test_offline(model=model, test_config_file=test_config_file)
--> 440 result_id, score = upload_test_result(username=username, password=password, environment=environment, test_result_file=test_result_file, storage_collab_id=storage_collab_id, register_result=register_result, client_obj=client_obj)
441 return result_id, score
442
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/utils.py in upload_test_result(username, password, environment, test_result_file, storage_collab_id, register_result, client_obj)
374 auth=test_library.auth)
375
--> 376 response = test_library.register_result(test_result=score, data_store=collab_storage)
377 return response, score
378
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in register_result(self, test_result, data_store, project)
1361 "score": int(test_result.score) if isinstance(test_result.score, bool) else test_result.score,
1362 "passed": None if "passed" not in test_result.related_data else test_result.related_data["passed"],
-> 1363 "platform": str(self._get_platform()), # database accepts a string
1364 "project": project,
1365 "normalized_score": int(test_result.score) if isinstance(test_result.score, bool) else test_result.score,
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in _get_platform(self)
1414 network_name = platform.node()
1415 bits, linkage = platform.architecture()
-> 1416 if _have_internet_connection():
1417 try:
1418 ip_addr = socket.gethostbyname(network_name)
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in _have_internet_connection()
2663 test_address = 'http://74.125.113.99' # google.com
2664 try:
-> 2665 urlopen(test_address, timeout=1)
2666 return True
2667 except (URLError, socket.timeout):
/opt/conda/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/opt/conda/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
/opt/conda/lib/python3.6/urllib/request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
/opt/conda/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/opt/conda/lib/python3.6/urllib/request.py in http_open(self, req)
1344
1345 def http_open(self, req):
-> 1346 return self.do_open(http.client.HTTPConnection, req)
1347
1348 http_request = AbstractHTTPHandler.do_request_
/opt/conda/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 except OSError as err: # timeout error
1320 raise URLError(err)
-> 1321 r = h.getresponse()
1322 except:
1323 h.close()
/opt/conda/lib/python3.6/http/client.py in getresponse(self)
1329 try:
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
1333 self.close()
/opt/conda/lib/python3.6/http/client.py in begin(self)
295 # read until we get a non-100 response
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
299 break
/opt/conda/lib/python3.6/http/client.py in _read_status(self)
256
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
260 raise LineTooLong("status line")
/opt/conda/lib/python3.6/socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
ConnectionResetError: [Errno 104] Connection reset by peer
@apdavison : The problem is arising from checking of internet connection here: https://github.com/HumanBrainProject/hbp-validation-client/blob/master/hbp_validation_framework/__init__.py#L2671
Made a minimal example here to reproduce the error: https://collab.humanbrainproject.eu/#/collab/5165/nav/61177
The problem only arises on the Collab, and running the same example locally runs fine (prints "B" via the except block)
It looks as though google.com now resolves to some other IP address.
I suggest replacing the urlopen
with a ping, something like this (from https://stackoverflow.com/a/32684938):
import platform # For getting the operating system name
import subprocess # For executing a shell command
def ping(host):
"""
Returns True if host (str) responds to a ping request.
Remember that a host may not respond to a ping (ICMP) request even if the host name is valid.
"""
# Option for the number of packets as a function of
param = '-n' if platform.system().lower()=='windows' else '-c'
# Building the command. Ex: "ping -c 1 google.com"
command = ['ping', param, '1', host]
return subprocess.call(command) == 0
then
def _have_internet_connection():
"""
Not foolproof, but allows checking for an external connection with a short
timeout, before trying socket.gethostbyname(), which has a very long
timeout.
"""
host = "8.8.8.8" # Google DNS server
return ping(host)
I can make that change, but I am curious as to why we see different behaviors locally and on the Collab. _have_internet_connection()
is basically being used by us to get the ip address of the system (ip_addr
). If it fails, it is supposed to resort to setting the value to "127.0.0.1".
I see that the latter occurs as expected locally. For running this locally, I get:
<urlopen error [Errno 111] Connection refused>
whereas on the Collab, I get:
[Errno 104] Connection reset by peer
Why this difference? If the error handling had worked as it does locally, this error would not have broken the usecases.
I don't know why this difference occurs, but it doesn't matter, a failure is a failure. All we are trying to discover is whether the machine is connected to the internet.
Ok, will make those changes and deploy.
It would be good to know why the Collab operates differently (would help setup better error handling if we understand). @alex4200 : let me know if you have any idea on this.
I think the difference is coming from the remote server, it is treating the two cases differently, presumably based in differences in IP address or request headers.
I am testing the new implementation here. It seems to work fine on the Collab (ip_address = "172.17.0.5").
Locally, on running the same code, I get my IP address as "127.0.1.1".
But if we change it to (https://stackoverflow.com/a/30990617/7383605):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
print(s.getsockname()[0])
I get: On Collab: "172.17.0.5" Locally: "192.168.XX.XX"
Shall I go ahead with this?
Both methods work for me, both at home and on the collab.
But it seems the new approach is more reliable, so go for it.
Made a new release of the Python client (0.5.28). I have updated all the usecases to use this new version. Also tested the Hippocampus usecase again, and it works fine.
@alex4200 : Can you test all the validation usecases again, and let me know if there are any more problems.
Hi,
I tested the DEV versions of the notebook and all use cases seeem to work except the last one ( 'Basal Ganglia Population Morphology Validation'). I get the following error:
[cid:64ce2b9a-4dd9-43a1-966e-3d1edff765ce] Can you check that please?
Thanks
Alex
From: appukuttan-shailesh notifications@github.com Sent: Tuesday, April 28, 2020 10:26:46 AM To: cnr-ibf-pa/hbp-bsp-issues Cc: Dietz Alexander; Mention Subject: Re: [cnr-ibf-pa/hbp-bsp-issues] Validation use cases failing (#541)
Made a new release of the Python client (0.5.28). I have updated all the usecases to use this new version. Also tested the Hippocampus usecase again, and it works fine.
@alex4200https://github.com/alex4200 : Can you test all the validation usecases again, and let me know if there are any more problems.
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHubhttps://github.com/cnr-ibf-pa/hbp-bsp-issues/issues/541#issuecomment-620459129, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ADBDXSY64OQHQVM7ZFA7WI3RO2HMNANCNFSM4MR6BFUQ.
I was able to run the "Basal Ganglia Population Morphology Validation" usecase without any problems. I used the following input:
Enter a list of, minimum, two model instance(s) to be validated: 1 - 47 Example inputs: [1,10], [1,4,5,8], all [1,10]
@alex4200 : Can you test again?
@pedroernesto : Can you follow up on this particular use case with Alex (if required). Thanks.
Sure, no problem
Hi,
when I run the "Basal Ganglia Population Morphology Validation" usecase with my normal user it works.
However, when I use a different user (for the automated tests) I still get the same 403 error in the fourth code cell in section B1. Maybe there is a permission cause of the problem?
Cheers
Alex
From: pedroernesto notifications@github.com Sent: Tuesday, April 28, 2020 4:23:30 PM To: cnr-ibf-pa/hbp-bsp-issues Cc: Dietz Alexander; Mention Subject: Re: [cnr-ibf-pa/hbp-bsp-issues] Validation use cases failing (#541)
Sure, no problem
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHubhttps://github.com/cnr-ibf-pa/hbp-bsp-issues/issues/541#issuecomment-620639651, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ADBDXS4DC23DVK3JZ5V4CULRO3RGDANCNFSM4MR6BFUQ.
Curious.... do the other validation use cases work with this user account that you use for the automated tests?
yes
Ok, in that case it seems unlikely that it is a permissions issue (as all the validation use cases require similar privileges).
It might be useful for @pedroernesto if you could share a snapshot of the error. Not sure he can reproduce it easily.
Selected model: 1 Model instances: [1,10]
Full error:
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-18-46937de77471> in <module>()
18 model_catalog.add_model_instance(model_id=morph_model.model_uuid, alias='NeuroM-MorphStats-Pop',
19 version=morph_model.model_version,
---> 20 description=', '.join(passed_morph_inst_uuid))
21 from time import sleep
22 sleep(10)
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in add_model_instance(self, model_id, alias, source, version, description, parameters, code_format, hash, morphology, license)
2280 return response.json()["uuid"][0]
2281 else:
-> 2282 raise Exception("Error in adding model instance. Response = " + str(response))
2283
2284 def find_model_instance_else_add(self, model_obj):
Exception: Error in adding model instance. Response = <Response [403]>
@apdavison : anything to do with permissions for adding new models/ instances on the KG? In all the other use cases, we handle existing models and instances. Only the results are created as new entries in KG. In this particular use case (@pedroernesto can confirm), I think he creates a new model and/or instance for each input combination. So it could be a problem with that?
Also, @alex4200 : were you able to run this particular use case previously with this same user account? Also, do you run all the validation use cases to completion? Not skip any cells?
@appukuttan-shailesh
Is the test user a member of the collab with which the model is associated?
Ideally the use case should create the model with the running Collab as its host Collab. This ensures that the user is a member of the collab with which the model is associated.
@pedroernesto can you confirm if this is true for this use case? To reproduce the situation, you can try to create a second HBP account for testing purposes.
@apdavison I do not know what Collab you refer to.
In the test, I simply navigate to the BSP (https://collab.humanbrainproject.eu/#/collab/8444/nav/65633), log in as alextestuser003, select the use case from the menu, create a new Collab and run the notebook in that Collab (for example: Collab TT_bdc14104-e55e-4b7f-a9ce-c6fac728fad8).
@alex4200 This usecase has run successfully before, with me, Shailesh, Andrew, and even you as the right test-user. A problem happened only last time when you were using the wrong user. Are you sure this is not the case again? Do you remember which was missing in that kind of user and how you corrected it?
@alex4200 I see on slack that last time you tried this:
@pedroernesto I reloaded the tab, reloaded the kernel, used a different user: I still get the same 403 error.
I also see the use case "Validation Framework Demo" fail for the normal user. In the last cell before the section "7. Browse the result database", while running this notebook cell-for-cell without any change, I get the following error (done on 1.5.2020 10:50)
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
<ipython-input-36-e72b2d8071a0> in <module>()
----> 1 response = test_library.register_result(score, data_store=collab_storage, project=collab_id)
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in register_result(self, test_result, data_store, project)
1361 "score": int(test_result.score) if isinstance(test_result.score, bool) else test_result.score,
1362 "passed": None if "passed" not in test_result.related_data else test_result.related_data["passed"],
-> 1363 "platform": str(self._get_platform()), # database accepts a string
1364 "project": project,
1365 "normalized_score": int(test_result.score) if isinstance(test_result.score, bool) else test_result.score,
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in _get_platform(self)
1414 network_name = platform.node()
1415 bits, linkage = platform.architecture()
-> 1416 if _have_internet_connection():
1417 try:
1418 ip_addr = socket.gethostbyname(network_name)
/opt/conda/lib/python3.6/site-packages/hbp_validation_framework/__init__.py in _have_internet_connection()
2663 test_address = 'http://74.125.113.99' # google.com
2664 try:
-> 2665 urlopen(test_address, timeout=1)
2666 return True
2667 except (URLError, socket.timeout):
/opt/conda/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/opt/conda/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
/opt/conda/lib/python3.6/urllib/request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
/opt/conda/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/opt/conda/lib/python3.6/urllib/request.py in http_open(self, req)
1344
1345 def http_open(self, req):
-> 1346 return self.do_open(http.client.HTTPConnection, req)
1347
1348 http_request = AbstractHTTPHandler.do_request_
/opt/conda/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 except OSError as err: # timeout error
1320 raise URLError(err)
-> 1321 r = h.getresponse()
1322 except:
1323 h.close()
/opt/conda/lib/python3.6/http/client.py in getresponse(self)
1329 try:
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
1333 self.close()
/opt/conda/lib/python3.6/http/client.py in begin(self)
295 # read until we get a non-100 response
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
299 break
/opt/conda/lib/python3.6/http/client.py in _read_status(self)
256
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
260 raise LineTooLong("status line")
/opt/conda/lib/python3.6/socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
ConnectionResetError: [Errno 104] Connection reset by peer
The above error is same as the original error reported in this ticket, and was fixed with release of v0.5.28 of the validation Python client.
I have the feeling that the Jupyter notebook isn't being run entirely. Especially the initial cells dealing with package installations. The usecase was updated to use v0.5.28 and your above error log doesn't seem to reflect that. For example, _have_internet_connection()
no longer exists, and had been replaced with _get_ip_address()
, but the error log still shows the former.
Can you run the entire notebook manually with you regular user (and then if it works with the test user) and see if it runs to completion?
Ah I see - you have updated your DEV notebook, but I am testing the PROD notebook.
You have to let me know asap when you make a vital update to the DEV notebook, so I can test and move them to PROD!
Testing DEV and then move them to prod.
I actually did mention:
I have updated all the usecases to use this new version.
And since you mentioned afterwards that you had tested the DEV versions of the notebook, I presumed we were on the same page.
Ah yes sorry for that. I got confused
But the "Basal Ganglia Population Morphology Validation" use case still does not work, even on DEV.
No worries.
Yeah I am not sure about that particular use case. @pedroernesto might be better placed to comment on it as he owns that use case. My thougths on it are here.
I think I was right about the source of the error: The model used in that UseCase (model_id = '078d19ae-8107-476e-8efe-bf7b0e0898bd') is associated with Collab #1771.
Only members of that Collab will be able to add model instances to this model, as required by cell #4 under "B.1 Instantiating the model; Running the validation tests".
I believe this is the source of the problem. Alex's original user account (@adietz) is a member of this Collab, but I presume his "test_user" account is not. One solution could be to require users wishing to run this UseCase to request membership to this Collab, but I believe that makes it a bit restrictive.
The other solutions might involve more changes wrt registering a new model itself (not simply a model instance) for each run. I recollect @apdavison suggesting previously that the validation UseCases should make use of existing models and not create new ones. I suppose the same holds true for model instances. But since this usecase being diffferent (population study), it potentially involves a different subset of models each time, not sure how to approach it.
@pedroernesto maybe discuss this with @apdavison .
Please can someone provide a link to an installed version of the use case in question? (in a collab I have access to), so I can quickly take a look?
@apdavison I just shared a collab with you where I ran the use case again (and saved all the outputs): B1022
Thanks @alex4200
@pedroernesto I think the simplest solution would be to remove the part where the user creates a new model instance, and instead use a pre-existing model instance.
For v2 of the VF, we can look into changing the permissions required in this scenario (see https://github.com/HumanBrainProject/hbp-validation-framework/issues/262)
Ok, I will do it. @alex4200 I will let you know as soon as it is ready and tested
Hi, @pedroernesto, any update on this issue?
I can connect to the Collab today, but it seems that problems with CSCS machines continue after the recent cyber-attack. I can not access the notebooks. I suspect that KG connection will be similarly running down. As I said @alex4200, I will let you know when I solve this.
@pedroernesto The notebooks seem to be back running.
With no changes and same choices, today the DEV notebook for this usecase (on 'Model validation suites' Collab) is running till the end (with my HBP account). Trying now the one on the BSP Collab. Anyway, I will introduce some small modifications to avoid this kind of issues in the future.
Same for the PROD notebook on the BSP collab (with my HBP account). Last time I even could not pass section 'A' about hard constraints, as a connection problem arose.
@pedroernesto: I still get the 403 error when I use a non HBP test user. (DEV and PROD)
When I use my normal user this use case works on PROD.
But as far as I know this use case should also work for a test user. Is that correct?
The model used in that UseCase (model_id = '078d19ae-8107-476e-8efe-bf7b0e0898bd') is associated with Collab #1771.
Only members of that Collab will be able to add model instances to this model, as required by cell #4 under "B.1 Instantiating the model; Running the validation tests".
Alex's original user account (@adietz) is a member of this Collab, but I presume his "test_user" account is not.
So to reproduce/test the problem, we need to use an account that is not a member of Collab #1771 (see Team list here). So the problem is more specific than the account being a non HBP test user.
I hope that helps.
But as far as I know this use case should also work for a test user. Is that correct?
@alex4200 I will add some modifications to the usecase, to make it work for other users, yes. As @apdavison suggested, I will eliminate the need to create a new model instance.
@alex4200 it should work now. Can you try the latest DEV version, with other test users? I have reduced user intervention to just reporting cells of the notebook and introduced the following changes:
@pedroernesto Unable to test as the DEV version of this notebook seem to have vanished.
Did you delete the notebook or the file and create a new one? You should not do that, as the reference ID in the usecases.json becomes invalid.
What is the link to the current DEV notebook now? I will test the notebook and can fix the wrong ID in the usecases.json file...
Oh, sorry for that. I did not know other way to update it. Usually the old file saved on Storage would enter in conflict with the new one, if names are the same. So, I deleted it, then imported the new one and added a Jupyter notebook as a child under label UseCases Source Files.
The latest DEV version is now available at https://collab.humanbrainproject.eu/#/collab/8123/nav/567961
@pedroernesto Thanks for the info.
If you want to update something, only use THIS notebook: https://collab.humanbrainproject.eu/#/collab/8123/nav/567961
No need to recreate something, delete something etc. Just edit and save This notebook: https://collab.humanbrainproject.eu/#/collab/8123/nav/567961
Then I can easily select it in the BSP for testing!
@appukuttan-shailesh does not recommend to work directly on the DEV versions. So, maybe the best way for me in the future, is to work in another Collab and when everything works, start editing the existing DEV version by copy+paste the new changes.
None of the validation use cases seem to run.
Maybe there is a problem with a backend/a service