Using ACCESS as a streamline service?!

One simple solution is to use a wrapper around the simplifier(source_filepath, pred_filepath) that takes a string as input and outputs a string as you mentioned in #8
Another solution is to directly use some fairseq code, but I am not sure it is going to be better so you might not want to go the extra mile for it.
Here is some dirty code for that might not be working, for inspiration:
class StreamTextSimplification:
    Batch = namedtuple("Batch", "srcs sample")
    Translation = namedtuple("Translation", "src_str hypos pos_scores alignments")
    Result = namedtuple("Result", "translation score")

    def __init__(self):
            self.model_path = "text_simplification/model"
            self.checkpoint_path = os.path.join(self.model_path, "checkpoint_last.pt")

            input_args = [
                self.model_path,
                "--path",
                self.checkpoint_path,
                "--source-lang",
                "complex",
                "--target-lang",
                "simple",
                "--beam",
                "8",
                "--nbest",
                "1",
                "--lenpen",
                "1.0",
                "--diverse-beam-groups",
                "-1",
                "--diverse-beam-strength",
                "0.5",
                "--print-alignment",
                "--gen-subset",
                "tmp",
                "--model-overrides",
                '{"encoder_embed_path": None, "decoder_embed_path": None}',
                "--cpu",
            ]

            parser = options.get_generation_parser(interactive=True)
            args = options.parse_args_and_arch(parser, input_args)
            self.args = args

            if args.buffer_size < 1:
                args.buffer_size = 1
            if args.max_tokens is None and args.max_sentences is None:
                args.max_sentences = 1

            assert (
                not args.max_sentences or args.max_sentences <= args.buffer_size
            ), "--max-sentences/--batch-size cannot be larger than --buffer-size"

            print(args)

            self.use_cuda = torch.cuda.is_available() and not args.cpu

            # Setup task, e.g., translation
            self.task = tasks.setup_task(args)

            # Load ensemble
            print("| loading model(s) from {}".format(args.path))
            model_paths = args.path.split(":")
            self.models, model_args = checkpoint_utils.load_model_ensemble(
                model_paths, arg_overrides=eval(args.model_overrides), task=self.task
            )

            # Set dictionaries
            self.src_dict = self.task.source_dictionary
            self.tgt_dict = self.task.target_dictionary

            # Optimize ensemble for generation
            for model in self.models:
                model.make_generation_fast_(
                    beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
                    need_attn=args.print_alignment,
                )
                if args.fp16:
                    model.half()
                if self.use_cuda:
                    model.cuda()

            # Initialize generator
            self.translator = SequenceGenerator(
                tgt_dict=self.tgt_dict,
                beam_size=args.beam,
                normalize_scores=(not args.unnormalized),
                len_penalty=args.lenpen,
                unk_penalty=args.unkpen,
                temperature=args.temperature,
                max_len_a=args.max_len_a,
                max_len_b=args.max_len_b,
                min_len=args.min_len,
            )

            print(self.translator)

            # Load alignment dictionary for unknown word replacement
            # (None if no unknown word replacement, empty if no path to align dictionary)
            self.align_dict = utils.load_align_dict(args.replace_unk)
        self.bpe_codes_path = "bpe_codes_70000"
        self.mosesdecoder_path = "mosesdecoder-master/scripts/tokenizer"
        self.normalize_punctuation_path = os.path.join(
            self.mosesdecoder_path, "normalize-punctuation.perl"
        )
        self.tokenizer_path = os.path.join(self.mosesdecoder_path, "tokenizer.perl")
        self.detokenizer_path = os.path.join(self.mosesdecoder_path, "detokenizer.perl")

    def normalize_punctuation(self, text):
        process = Popen(
            ["perl", self.normalize_punctuation_path, "-l", "en"],
            stdout=PIPE,
            stdin=PIPE,
            stderr=PIPE,
            encoding="utf8",
        )
        return process.communicate(text)[0].strip()

    def tokenize(self, text):
        process = Popen(
            ["perl", self.tokenizer_path, "-a", "-l", "en", "-q", "-no-escape"],
            stdout=PIPE,
            stdin=PIPE,
            stderr=PIPE,
            encoding="utf8",
        )
        return process.communicate(text)[0].strip()

    def detokenize(self, text):
        process = Popen(
            ["perl", self.detokenizer_path, "-q"],
            stdout=PIPE,
            stdin=PIPE,
            stderr=PIPE,
            encoding="utf8",
        )
        return process.communicate(text)[0].strip()

    @lru_cache(maxsize=1000)
    def translate(
        self,
        text,
        dep_tree_depth_ratio=0.95,
        wordrank_ratio=0.8,
        length_ratio=0.9,
        levenshtein_ratio=0.85,
        fkgl_ratio=0.4,
    ):
        preprocessors = [
            DependencyTreeDepthRatioPreprocessor(
                bucket_size=0.05, target_ratio=dep_tree_depth_ratio
            ),
            WordRankRatioPreprocessor(bucket_size=0.05, target_ratio=wordrank_ratio),
            LengthRatioPreprocessor(bucket_size=0.05, target_ratio=length_ratio),
            LevenshteinPreprocessor(bucket_size=0.05, target_ratio=levenshtein_ratio),
            FKGLRatioPreprocessor(bucket_size=0.05, target_ratio=fkgl_ratio),
            BPEPreprocessor(bpe_codes_path=self.bpe_codes_path, n_bpe_codes=70000),
        ]

        composed_preprocessor = ComposedPreprocessor(preprocessors)
        text = self.tokenize(self.normalize_punctuation(text))
        inputs = [composed_preprocessor.encode_sentence(text)]

        args = self.args

        def make_result(src_str, hypos):
            result = self.Translation(
                src_str="O\t{}".format(src_str), hypos=[], pos_scores=[], alignments=[]
            )

            # Process top predictions
            for hypo in hypos[: min(len(hypos), args.nbest)]:
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo["tokens"].int().cpu(),
                    src_str=src_str,
                    alignment=hypo["alignment"].int().cpu()
                    if hypo["alignment"] is not None
                    else None,
                    align_dict=self.align_dict,
                    tgt_dict=self.tgt_dict,
                    remove_bpe=args.remove_bpe,
                )
                result.hypos.append(hypo_str)
                result.pos_scores.append(hypo["score"])
                result.alignments.append(
                    "A\t{}".format(
                        " ".join(map(lambda x: str(utils.item(x)), alignment))
                    )
                    if alignment is not None
                    else None
                )
            return result

        def process_batch(batch):
            if self.use_cuda:
                batch.sample["net_input"]["src_tokens"] = batch.sample["net_input"][
                    "src_tokens"
                ].cuda()
                batch.sample["net_input"]["src_lengths"] = batch.sample["net_input"][
                    "src_lengths"
                ].cuda()
            translations = self.translator.generate(
                models=self.models, sample=batch.sample
            )

            return [make_result(batch.srcs[i], t) for i, t in enumerate(translations)]

        indices = []
        results = []
        for batch, batch_indices in self.make_batches(
            inputs, args, self.src_dict, self.models[0].max_positions()
        ):
            indices.extend(batch_indices)
            results += process_batch(batch)

        for i in np.argsort(indices):
            result = results[i]
            print(result.src_str)
            for hypo, pos_score, align in zip(
                result.hypos, result.pos_scores, result.alignments
            ):
                print(hypo)
                print(pos_score)
                print(align)
                translation = self.detokenize(
                    composed_preprocessor.decode_sentence(hypo)
                )
                return self.Result(translation=translation, score=pos_score)

    def make_batches(self, lines, args, src_dict, max_positions):
        tokens = [
            src_dict.encode_line(src_str, add_if_not_exist=False).long()
            for src_str in lines
        ]
        lengths = np.array([t.numel() for t in tokens])
        itr = self.task.get_batch_iterator(
            dataset=fairseq.data.LanguagePairDataset(tokens, lengths, src_dict),
            max_tokens=args.max_tokens,
            max_sentences=args.max_sentences,
            max_positions=max_positions,
        ).next_epoch_itr(shuffle=False)
        for batch in itr:
            yield self.Batch(srcs=[lines[i] for i in batch["id"]], sample=batch), batch[
                "id"
            ]
facebookresearch / access

Using ACCESS as a streamline service?! #7