First, please note the following:
Run the installation script in “unprivileged” mode
./install.sh -u
Start the standalone server
python standalone.py
You should then be able to access the SciAnnotate server from the address printed out by standalone.py.
LABELING_FUNCTION_SET = {"alias": your_new_function}
import sys
import re
def contactLabeler(text="", entity_index=None):
res = dict()
entities = [
["F" + str(next(entity_index)), "Contact-Email", [(pos.start(), pos.end())], text[pos.start(): pos.end()]]
for pos in re.finditer("^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+$", text)
]
entities.extend(
[
["F" + str(next(entity_index)), "Contact-Phone", [(pos.start(), pos.end())], text[pos.start(): pos.end()]]
for pos in re.finditer("(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})", text)
]
)
entity_list = set()
for index, entity in enumerate(entities):
entity_list.add(entity[1])
entities[index][1] = '{}_{}'.format(str(sys._getframe().f_code.co_name), entity[1])
res["entities"] = entities
return res
If you would like to reference SciAnnotate in a publication, please use:
@software{SciAnnotate2021github,
author = {Mengyang Liu, Haozheng Luo, Leonard Thong, Yinghao Li, Chao Zhang, Le Song },
title = {SciAnnotate-A Tool for Integrating Weak Labeling Sources for Sequence Labeling},
url = {http://github.com/robinzixuan/SciAnnotate},
version = {0.1.4},
year = {2021},
}