Open psybers opened 7 years ago
Note that we don't need to replace every occurrence of HashSet
with a LinkedHashSet
but rather only those that might appear in the output of a Boa query.
Here is the list of every HashSet
we create in the compiler:
$ grep -R 'HashSet' *
java/boa/aggregators/GraphAggregator.java: this.neighbors = new HashSet<String>();
java/boa/aggregators/SetAggregator.java:import java.util.HashSet;
java/boa/aggregators/SetAggregator.java: private HashSet<String> set;
java/boa/aggregators/SetAggregator.java: this.set = new HashSet<String>();
java/boa/compiler/transforms/InheritedAttributeTransformer.java:import java.util.HashSet;
java/boa/compiler/transforms/InheritedAttributeTransformer.java: protected final Set<BoaTuple> currents = new HashSet<BoaTuple>();
java/boa/compiler/transforms/StopStatementTransformer.java:import java.util.HashSet;
java/boa/compiler/transforms/StopStatementTransformer.java: protected final Set<VisitStatement> stops = new HashSet<VisitStatement>();
java/boa/compiler/transforms/StopStatementTransformer.java: final Set<BoaProtoTuple> keys = new HashSet<BoaProtoTuple>(beforeStopTypes.keySet());
java/boa/compiler/transforms/StopStatementTransformer.java: final Set<BoaProtoTuple> remainingAfters = new HashSet<BoaProtoTuple>(keys);
java/boa/compiler/transforms/StopStatementTransformer.java: final Set<BoaProtoTuple> types = new HashSet<BoaProtoTuple>();
java/boa/compiler/transforms/StopStatementTransformer.java: final Set<Integer> intersection = new HashSet<Integer>();
java/boa/compiler/transforms/StopStatementTransformer.java: final Set<Class<? extends BoaProtoTuple>> types = new HashSet<Class<? extends BoaProtoTuple>>();
java/boa/compiler/transforms/VisitorOptimizingTransformer.java:import java.util.HashSet;
java/boa/compiler/transforms/VisitorOptimizingTransformer.java: protected final static Set<Class<? extends BoaType>> astTypes = new HashSet<Class<? extends BoaType>>();
java/boa/compiler/transforms/VisitorOptimizingTransformer.java: types = new HashSet<Class<? extends BoaType>>();
java/boa/compiler/transforms/VisitorOptimizingTransformer.java: types = new HashSet<Class<? extends BoaType>>();
java/boa/compiler/visitors/CodeGeneratingVisitor.java: protected final Set<String> funcs = new HashSet<String>();
java/boa/compiler/visitors/CodeGeneratingVisitor.java: protected final Set<String> tuples = new HashSet<String>();
java/boa/compiler/visitors/CodeGeneratingVisitor.java: protected final Set<String> names = new HashSet<String>();
java/boa/compiler/visitors/CodeGeneratingVisitor.java: protected final Set<Node> indexees = new HashSet<Node>();
java/boa/compiler/visitors/TaskClassifyingVisitor.java:import java.util.HashSet;
java/boa/compiler/visitors/TaskClassifyingVisitor.java: protected final static Set<Class<? extends BoaType>> astTypes = new HashSet<Class<? extends BoaType>>();
java/boa/compiler/visitors/TaskClassifyingVisitor.java: protected final Set<Class<? extends BoaType>> types = new HashSet<Class<? extends BoaType>>();
java/boa/compiler/visitors/TypeCheckingVisitor.java: protected Set<String> befores = new HashSet<String>();
java/boa/compiler/visitors/TypeCheckingVisitor.java: protected Set<String> afters = new HashSet<String>();
java/boa/compiler/visitors/TypeCheckingVisitor.java: protected Set<String> befores = new HashSet<String>();
java/boa/compiler/visitors/TypeCheckingVisitor.java: protected Set<String> befores = new HashSet<String>();
java/boa/datagen/forges/github/DataDownloadWorker.java:import gnu.trove.set.hash.THashSet;
java/boa/datagen/forges/github/DataDownloadWorker.java: THashSet<String> names = GitHubRepoMetaDataDownloader.names;
java/boa/datagen/forges/github/GithubLanguageDownloadMaster.java:import gnu.trove.set.hash.THashSet;
java/boa/datagen/forges/github/GithubLanguageDownloadMaster.java: public static THashSet<String> names = new THashSet<String>();
java/boa/datagen/forges/github/GitHubRepoMetaDataDownloader.java:import java.util.HashSet;
java/boa/datagen/forges/github/GitHubRepoMetaDataDownloader.java:import gnu.trove.set.hash.THashSet;
java/boa/datagen/forges/github/GitHubRepoMetaDataDownloader.java: public static THashSet<String> names = new THashSet<String>();
java/boa/datagen/forges/github/LanguageDownloadWorker.java:import gnu.trove.set.hash.THashSet;
java/boa/datagen/forges/github/LanguageDownloadWorker.java: THashSet<String> names = GithubLanguageDownloadMaster.names;
java/boa/datagen/forges/github/RepoMetadata.java:import java.util.HashSet;
java/boa/datagen/forges/github/RepositoryClonerWorker.java:import java.util.HashSet;
java/boa/datagen/forges/github/RepositoryClonerWorker.java: private HashSet<String> names = new HashSet<String>();
java/boa/datagen/SeqProjectCombiner.java:import java.util.HashSet;
java/boa/datagen/SeqProjectCombiner.java: HashSet<String> marks = new HashSet<String>();
java/boa/datagen/SeqRepoImporter.java:import java.util.HashSet;
java/boa/datagen/SeqRepoImporter.java: private final static HashSet<String> processedProjectIds = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getNodesWithDefinition(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> vardef = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableKilled(final boa.types.Control.CFG cfg, final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> varkilled = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableDef(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> vardef = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableUsed(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> varused = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static void traverseExpr(final HashSet<String> varused, final Expression expr) {
java/boa/functions/BoaGraphIntrinsics.java: public static void traverseVarDecls(final HashSet<String> varused, final Variable vardecls) {
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_union(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_intersect(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_difference(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_symdiff(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/graphs/cfg/CFG.java:import java.util.HashSet;
java/boa/graphs/cfg/CFG.java: protected HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> outs = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> ins = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> breaks = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> returns = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getNodes() {
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getOuts() {
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getIns() {
java/boa/graphs/cfg/CFG.java: public void mergeBranches(CFG target, HashSet<CFGNode> saveOuts) {
java/boa/graphs/cfg/CFG.java: for (CFGNode node : new HashSet<CFGNode>(this.breaks)) {
java/boa/graphs/cfg/CFGNode.java:import java.util.HashSet;
java/boa/graphs/cfg/CFGNode.java: private HashSet<Integer> parameters;
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> inEdges = new HashSet<CFGEdge>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> outEdges = new HashSet<CFGEdge>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> useVariables = new HashSet<String>();
java/boa/graphs/cfg/CFGNode.java: String objectName, int numOfParameters, HashSet<Integer> datas) {
java/boa/graphs/cfg/CFGNode.java: this.parameters = new HashSet<Integer>(datas);
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> getDefUse() {
java/boa/graphs/cfg/CFGNode.java: HashSet<String> defUse = new HashSet<String>(useVariables);
java/boa/graphs/cfg/CFGNode.java: public void setParameters(HashSet<Integer> parameters) {
java/boa/graphs/cfg/CFGNode.java: public HashSet<Integer> getParameters() {
java/boa/graphs/cfg/CFGNode.java: public void setUseVariables(HashSet<String> useVariables) {
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> getUseVariables() {
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> getInEdges() {
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> getOutEdges() {
java/boa/graphs/cfg/CFGNode.java: HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFGNode.java: HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> processUse() {
java/boa/graphs/cfg/CFGNode.java: HashSet<String> useVar= new HashSet<String>();
java/boa/graphs/cfg/CFGNode.java: public static void traverseExpr(HashSet<String> useVar, final boa.types.Ast.Expression expr) {
java/boa/graphs/cfg/CFGNode.java: public static void traverseVarDecls(HashSet<String> useVar, final boa.types.Ast.Variable vardecls) {
java/boa/runtime/BoaAbstractTraversal.java: java.util.HashSet<CFGNode> nl=cfg.getNodes();
java/boa/types/BoaProtoList.java:import java.util.HashSet;
java/boa/types/BoaProtoList.java: return new HashSet<Class<? extends BoaProtoTuple>>();
java/boa/types/BoaProtoTuple.java:import java.util.HashSet;
java/boa/types/BoaProtoTuple.java: reachableTypesCache = new HashSet<Class<? extends BoaProtoTuple>>();
java/boa/types/BoaSet.java: return "java.util.HashSet<" + this.type.toBoxedJavaType() + ">";
test/boa/test/datagen/JSONStyleASTPrinter.java:import java.util.HashSet;
test/boa/test/datagen/JSONStyleASTPrinter.java: private static final Set<Class<? extends Object>> JSON_ALLOWED_WRAPPER_TYPES = new HashSet<Class<? extends Object>>(
and here is what I think we need to replace.
Sets themselves:
java/boa/types/BoaSet.java: return "java.util.HashSet<" + this.type.toBoxedJavaType() + ">";
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_union(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_intersect(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_difference(final java.util.Set<T> s1, final java.util.Set<T> s2) {
java/boa/functions/BoaIntrinsics.java: final java.util.HashSet<T> s = new java.util.HashSet<T>(s1);
java/boa/functions/BoaIntrinsics.java: public static <T> java.util.HashSet<T> set_symdiff(final java.util.Set<T> s1, final java.util.Set<T> s2) {
Output aggregators:
java/boa/aggregators/GraphAggregator.java: this.neighbors = new HashSet<String>();
java/boa/aggregators/SetAggregator.java:import java.util.HashSet;
java/boa/aggregators/SetAggregator.java: private HashSet<String> set;
java/boa/aggregators/SetAggregator.java: this.set = new HashSet<String>();
Graphs (maybe?) and graph traversals (so the traversals occur in a deterministic order):
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getNodesWithDefinition(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> vardef = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableKilled(final boa.types.Control.CFG cfg, final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> varkilled = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableDef(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> vardef = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static HashSet<String> getVariableUsed(final CFGNode node) {
java/boa/functions/BoaGraphIntrinsics.java: final HashSet<String> varused = new HashSet<String>();
java/boa/functions/BoaGraphIntrinsics.java: public static void traverseExpr(final HashSet<String> varused, final Expression expr) {
java/boa/functions/BoaGraphIntrinsics.java: public static void traverseVarDecls(final HashSet<String> varused, final Variable vardecls) {
java/boa/graphs/cfg/CFG.java:import java.util.HashSet;
java/boa/graphs/cfg/CFG.java: protected HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> outs = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> ins = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> breaks = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: private HashSet<CFGNode> returns = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getNodes() {
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getOuts() {
java/boa/graphs/cfg/CFG.java: public HashSet<CFGNode> getIns() {
java/boa/graphs/cfg/CFG.java: public void mergeBranches(CFG target, HashSet<CFGNode> saveOuts) {
java/boa/graphs/cfg/CFG.java: for (CFGNode node : new HashSet<CFGNode>(this.breaks)) {
java/boa/graphs/cfg/CFGNode.java:import java.util.HashSet;
java/boa/graphs/cfg/CFGNode.java: private HashSet<Integer> parameters;
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> inEdges = new HashSet<CFGEdge>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> outEdges = new HashSet<CFGEdge>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> useVariables = new HashSet<String>();
java/boa/graphs/cfg/CFGNode.java: String objectName, int numOfParameters, HashSet<Integer> datas) {
java/boa/graphs/cfg/CFGNode.java: this.parameters = new HashSet<Integer>(datas);
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> getDefUse() {
java/boa/graphs/cfg/CFGNode.java: HashSet<String> defUse = new HashSet<String>(useVariables);
java/boa/graphs/cfg/CFGNode.java: public void setParameters(HashSet<Integer> parameters) {
java/boa/graphs/cfg/CFGNode.java: public HashSet<Integer> getParameters() {
java/boa/graphs/cfg/CFGNode.java: public void setUseVariables(HashSet<String> useVariables) {
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> getUseVariables() {
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> getInEdges() {
java/boa/graphs/cfg/CFGNode.java: public HashSet<CFGEdge> getOutEdges() {
java/boa/graphs/cfg/CFGNode.java: HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFGNode.java: HashSet<CFGNode> nodes = new HashSet<CFGNode>();
java/boa/graphs/cfg/CFGNode.java: public HashSet<String> processUse() {
java/boa/graphs/cfg/CFGNode.java: HashSet<String> useVar= new HashSet<String>();
java/boa/graphs/cfg/CFGNode.java: public static void traverseExpr(HashSet<String> useVar, final boa.types.Ast.Expression expr) {
java/boa/graphs/cfg/CFGNode.java: public static void traverseVarDecls(HashSet<String> useVar, final boa.types.Ast.Variable vardecls) {
java/boa/runtime/BoaAbstractTraversal.java: java.util.HashSet<CFGNode> nl=cfg.getNodes();
I am not entirely sure using a LinkedHashSet will fix the issue for set
aggregators. While it would remove some non-determinism, it still relies on the order of items being added to that set being fixed and I do not believe that is the case.
We should instead utilize a sort call on the set before outputting the values.
A Java LinkedHashSet is preferred for storing
set
in Boa, as the output order (if printed/shown in the output) will be predictable leading to identical job outputs.Currently we use HashSet, where the iterated order of the items is not deterministic and thus some jobs will have output that is bytewise different (but semantically equivalent) on subsequent runs.