/////////////////////////////////////////////////////////////////////// // VerbLearn System // // A crosslinguistic bodily-grounded hand-action // verb semantics acquisition system. // // David Bailey 1/97 // Converted from Sather 1.1 // // File: Context.java // // Description: Contains bookkeeping functionality. // // Classes: Context, Scenario, ScenarioSet, Params, Dataset, // VerbComplex, CandidateSense, CandidateMerge import java.io.*; import java.util.*; /////////////////////////////////////////////////////////////////////// // Holds the current "setup" of the application. class Context extends Observable { private String[] langslotList; // each of the form "English2" private String[] paramList; private String[] datasetList; private String[] lexiconList; // for the current language private ScenarioSet scenarios; private String lang; // e.g., "English" private int numSlots; // e.g., 2 private Params params; private Dataset dataset; private Lexicon lexicon; // Upon creation, load the scenario set from disk. Context() { // Set up scenarios: setParams(VerbLearn.INITIAL_PARAM_FILE); // probably "default" scenarios = new ScenarioSet(); rereadLists(); } public void rereadLists() { try { langslotList = DataSource.getDirectoryList("lang"); paramList = DataSource.getDirectoryList("param"); datasetList = DataSource.getDirectoryList("dataset"); if (lang != null) { lexiconList = DataSource.getDirectoryList("lang/" + langslot() + "/lexicon"); } } catch (Exception e) { System.err.println("Context couldn't read directories:" + e.getMessage()); } setChanged(); notifyObservers(); if (VerbLearn.frame() != null) { VerbLearn.frame().validate(); } } public String[] langslotList() { return langslotList; } public String[] paramList() { return paramList; } public String[] datasetList() { return datasetList; } public String[] lexiconList() { return lexiconList; } public ScenarioSet scenarios() { return scenarios; } public String langslot() { if (lang != null) return lang + numSlots; else return null; } public String lang() { return lang; } public int numSlots() { return numSlots; } public Dataset dataset() { return dataset; } public Params params() { return params; } public Lexicon lexicon() { return lexicon; } // If argument is of the form "English2", set lang and numSlots // accordingly. If argument is null, unset lang and numSlots. In // either case, unset dataset and lexicon. public void setLang(String langslot) { VerbLearn.labelPanel().saveLabels(); // use GUI to confirm if (langslot == null) { lang = null; numSlots = 0; lexiconList = null; } else { int i; for (i = 0; !Character.isDigit(langslot.charAt(i)); i++) {} lang = langslot.substring(0, i); numSlots = Integer.parseInt(langslot.substring(i)); scenarios.loadLabels(); try { lexiconList = DataSource.getDirectoryList("lang/" + langslot + "/lexicon"); } catch (Exception e) { System.err.println("Context couldn't read language directory:" + e.getMessage()); } } privateSetLexicon(null); setChanged(); notifyObservers(); VerbLearn.frame().validate(); } // Set params from the file p, or to null if p is null. // Set params to the already existing Param object 'p'. public void setParams(Params p) { params = p; setChanged(); notifyObservers(); VerbLearn.frame().validate(); } public void setParams(String p) { // EVENTUALLY, WILL NEED TO OPTIONALLY SAVE CURRENT PARAMS if (p == null) { params = null; } else { try { params = new Params(DataSource. getNestedStreamTokenizer("param/" + p), p); } catch (IOException e) {} } setChanged(); notifyObservers(); if (VerbLearn.frame() != null) { VerbLearn.frame().validate(); } } // Set params to the already existing Dataset object 'd'. public void setDataset(Dataset d) { dataset = d; setChanged(); notifyObservers(); VerbLearn.frame().validate(); } // Set dataset from the file d, or to null if d is null. public void setDataset(String d) { privateSetDataset(d); setChanged(); notifyObservers(); VerbLearn.frame().validate(); } private void privateSetDataset(String d) { // EVENTUALLY, WILL NEED TO OPTIONALLY SAVE CURRENT DATASET if (d == null) { dataset = null; } else { try { dataset = new Dataset(DataSource. getNestedStreamTokenizer("dataset/" + d), d); } catch (IOException e) {} } } // Set lexicon from the file l, or to an empty lexicon if l is "New/Unnamed", // or to null if l is null. public void setLexicon(String l) { privateSetLexicon(l); setChanged(); notifyObservers(); VerbLearn.frame().validate(); } private void privateSetLexicon(String l) { if (l == null) { lexicon = null; } else if (l.equals("New/Unnamed")) { lexicon = new Lexicon(); } else { try { ObjectInputStream is = new ObjectInputStream(DataSource. getInputStream("lang/" + langslot() + "/lexicon/" + l)); lexicon = (Lexicon)is.readObject(); } catch (Exception e) { System.err.println("Exception loading lexicon: " + e.getMessage()); } } } } /////////////////////////////////////////////////////////////////////// // Holds the various pieces of information involved in an action, // including its initial state (as both a Jack environment and an // Fstruct), initial goal Fstruct, final linking Fstruct, a movie of // the action, and a label. These last three may be unbound. class Scenario { private String id; private String env; // a filename private Fstruct goal; // a partial linking fstruct (goal feats) private Fstruct initialLink; // a partial linking fstruct (world feats) private Fstruct finalLink; // the final linking fstruct private VerbComplex label; // a multi-slot label for the action in // the current language // Constructor for an unlabelled, grounded scenario Scenario(String id, String env, Fstruct goal) { this.id = id; this.env = env; this.goal = goal; } // Constructor for an unlabelled, ungrounded scenario. Scenario(String id, Fstruct initialLink, Fstruct finalLink) { this.id = id; this.initialLink = initialLink; this.finalLink = finalLink; } // SHOULD ONLY BE CALLED FROM SCENARIO SET public void setInitialLink(Fstruct initialLink) { this.initialLink = initialLink; } // SHOULD ONLY BE CALLED FROM SCENARIO SET public void setFinalLink(Fstruct finalLink) { if (!isGrounded()) { System.err.println("Warning: resetting finalLink on ungrounded " + "scenario!"); } this.finalLink = finalLink; } // SHOULD ONLY BE CALLED FROM SCENARIO SET public void setLabel(VerbComplex label) { this.label = label; } public String id() { return id; } public String env() { return env; } public Fstruct goal() { return goal; } public Fstruct initialLink() { return initialLink; } public Fstruct finalLink() { return finalLink; } public VerbComplex label() { return label; } public boolean isLabelled() { return (label != null); } // Returns true if either it's a "real" scenario that's been executed, // or it's a fake scenario. public boolean hasLinks() { return (finalLink != null); } // If false, this is a "fake" exemplar which was not generated by a // real Jack action. public boolean isGrounded() { return (env != null); } } /////////////////////////////////////////////////////////////////////// // A set of Scenarios. // Note: Loading and saving routines do their own file handling, // unlike other classes in the VerbLearn system. class ScenarioSet extends Observable { private Vector scenarioNames; // Vector private Hashtable scenarios; // maps String to Scenario private boolean isModified; private boolean labelsModified; ScenarioSet() { scenarioNames = new Vector(); scenarios = new Hashtable(); Hashtable goals = new Hashtable(); // maps String to Fstruct NestedStreamTokenizer input; int token; try { // Create goal f-structs: input = DataSource.getNestedStreamTokenizer("scenario/goals"); token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected goal name in goals")); } String g = input.sval; token = input.nextToken(); if (token != NestedStreamTokenizer.TT_NESTED) { throw(new IOException("expected bracketed fstruct in goals")); } goals.put(g, new Fstruct(input.sval)); token = input.nextToken(); } // Create scenarios from specs: input = DataSource.getNestedStreamTokenizer("scenario/specs"); token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected scenario name in specs")); } String s = input.sval; scenarioNames.addElement(s); token = input.nextToken(); if (token == NestedStreamTokenizer.TT_WORD) { String e = input.sval; token = input.nextToken(); if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected goal name in specs")); } if (!goals.containsKey(input.sval)) { throw(new IOException("invalid goal name in specs")); } scenarios.put(s, new Scenario(s, e, (Fstruct)goals.get(input.sval))); } else if (token == NestedStreamTokenizer.TT_NESTED) { String initial = input.sval; token = input.nextToken(); if (token != NestedStreamTokenizer.TT_NESTED) { throw(new IOException("expected final fstruct in fake scenario")); } scenarios.put(s, new Scenario(s, new Fstruct(initial), new Fstruct(input.sval))); } else { throw(new IOException("expected env name or nested fstruct in specs")); } token = input.nextToken(); } // Add initial linking fstructs where available: String fname = "scenario/initial"; if (DataSource.exists(fname)) { input = DataSource.getNestedStreamTokenizer(fname); token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected scenario name in initial links")); } String s = input.sval; token = input.nextToken(); if (token == NestedStreamTokenizer.TT_NESTED) { if (containsKey(s)) { get(s).setInitialLink(new Fstruct(input.sval)); } } else { throw(new IOException("expected nested fstruct in initial links")); } token = input.nextToken(); } } // Add final linking fstructs where available: fname = "scenario/final"; if (DataSource.exists(fname)) { input = DataSource.getNestedStreamTokenizer(fname); token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected scenario name in final links")); } String s = input.sval; token = input.nextToken(); if (token == NestedStreamTokenizer.TT_NESTED) { if (containsKey(s)) { get(s).setFinalLink(new Fstruct(input.sval)); } } else { throw(new IOException("expected nested fstruct in final links")); } token = input.nextToken(); } } } catch (IOException e) { System.err.println("ScenarioSet failed to load: " + e.getMessage()); scenarioNames = new Vector(); scenarios = new Hashtable(); } } public Enumeration names() { return scenarioNames.elements(); } public boolean containsKey(String s) { return scenarios.containsKey(s); } public Scenario get(String s) { return (Scenario)scenarios.get(s); } public boolean isModified() { return isModified; } public boolean labelsModified() { return labelsModified; } // Assumes sc is in the set. public void setLabel(Scenario sc, VerbComplex vc) { sc.setLabel(vc); setLabelsModified(); } // Assumes sc is in the set. public void setInitialAndFinalLinks(Scenario sc, Fstruct i, Fstruct f) { sc.setInitialLink(i); sc.setFinalLink(f); setModified(); } private void setModified() { isModified = true; setChanged(); notifyObservers(); VerbLearn.frame().validate(); } private void setLabelsModified() { labelsModified = true; setChanged(); notifyObservers(); VerbLearn.frame().validate(); } // Return true if successful. public boolean save() { if (isModified) { try { String pathname = VerbLearn.BASE_DIR + "scenario/initial"; DataOutputStream os = new DataOutputStream(new FileOutputStream(pathname)); os.writeBytes("// Initial links:\n\n"); Enumeration scnEnum = names(); while (scnEnum.hasMoreElements()) { Scenario sc = get((String)scnEnum.nextElement()); if (sc.isGrounded() && sc.hasLinks()) { os.writeBytes(sc.id() + " {\n" + sc.initialLink() + "}\n"); } } pathname = VerbLearn.BASE_DIR + "scenario/final"; os = new DataOutputStream(new FileOutputStream(pathname)); os.writeBytes("// Final links:\n\n"); scnEnum = names(); while (scnEnum.hasMoreElements()) { Scenario sc = get((String)scnEnum.nextElement()); if (sc.isGrounded() && sc.hasLinks()) { os.writeBytes(sc.id() + " {\n" + sc.finalLink() + "}\n"); } } isModified = false; return true; } catch (IOException e) { return false; } } else { return true; } } public void loadLabels() { clearLabels(); try { String fname = "lang/" + VerbLearn.context().langslot() +"/labels"; if (DataSource.exists(fname)) { NestedStreamTokenizer input = DataSource.getNestedStreamTokenizer(fname); int token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected scenario name in labels")); } String s = input.sval; token = input.nextToken(); if (token == NestedStreamTokenizer.TT_WORD) { if (containsKey(s)) { get(s).setLabel(new VerbComplex(input.sval)); } } else { throw(new IOException("expected verb-complex in labels")); } token = input.nextToken(); } } } catch (IOException e) { System.err.println("ScenarioSet loadLabels exception: " + e.getMessage()); clearLabels(); } setChanged(); notifyObservers(); VerbLearn.frame().validate(); } // Return true if successful. public boolean saveLabels() { if (labelsModified) { try { String pathname = VerbLearn.BASE_DIR + "lang/" + VerbLearn.context().langslot() + "/labels"; DataOutputStream os = new DataOutputStream(new FileOutputStream(pathname)); os.writeBytes("// Labels for " + VerbLearn.context().langslot() + ":\n\n"); Enumeration scnEnum = names(); while (scnEnum.hasMoreElements()) { Scenario sc = get((String)scnEnum.nextElement()); if (sc.isLabelled()) { os.writeBytes(sc.id() + "\t" + sc.label() + "\n"); } } labelsModified = false; return true; } catch (IOException e) { return false; } } else { return true; } } private void clearLabels() { Enumeration scEnum = scenarios.elements(); while (scEnum.hasMoreElements()) { ((Scenario)scEnum.nextElement()).setLabel(null); } } public String toString() { StringBuffer res = new StringBuffer().append("Scenario Set Summary:\n"); Enumeration scnEnum = names(); while (scnEnum.hasMoreElements()) { Scenario sc = get((String)scnEnum.nextElement()); res.append("\nScenario ").append(sc.id()).append(":\n"); if (sc.isGrounded()) { res.append("env = ").append(sc.env()).append("\n"); res.append("goal = ").append(sc.goal()); } if (sc.hasLinks()) { res.append("initial = ").append(sc.initialLink()); res.append("final = ").append(sc.finalLink()); } if (sc.isLabelled()) { res.append("label = ").append(sc.label()).append("\n"); } } return res.toString(); } } /////////////////////////////////////////////////////////////////////// // Holds numerical parameters of the various algorithms, and the list // of linking features. class Params { private String id; private double minLabel; private double minExplain; private double minObey; private double minSetFeature; private boolean useWordFrequencies; private double minMerge; private int maxTries; private double modelPriorWeight; private boolean adaptVirtuals; private double virtualInertia; private double maxVirtual; private int batchSize; private int trainingPasses; private boolean verboseSenses; private boolean testRecognition; private Vector motorNames, worldNames; // Vector: names of features private Hashtable motorVirtuals, worldVirtuals; // map String to Double private Hashtable motorValues, worldValues; // map String to Vector Params(NestedStreamTokenizer input, String newId) { id = newId; motorNames = new Vector(); worldNames = new Vector(); motorVirtuals = new Hashtable(); worldVirtuals = new Hashtable(); motorValues = new Hashtable(); worldValues = new Hashtable(); try { int token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected parameter name")); } String p = input.sval; token = input.nextToken(); if (p.equals("MinLabel") && token == NestedStreamTokenizer.TT_NUMBER) { minLabel = input.nval; } else if (p.equals("MinExplain") && token == NestedStreamTokenizer.TT_NUMBER) { minExplain = input.nval; } else if (p.equals("MinObey") && token == NestedStreamTokenizer.TT_NUMBER) { minObey = input.nval; } else if (p.equals("MinSetFeature") && token == NestedStreamTokenizer.TT_NUMBER) { minSetFeature = input.nval; } else if (p.equals("UseWordFrequencies") && token == NestedStreamTokenizer.TT_WORD && (input.sval.equals("true") || input.sval.equals("false"))) { useWordFrequencies = Boolean.valueOf(input.sval).booleanValue(); } else if (p.equals("MinMerge") && token == NestedStreamTokenizer.TT_NUMBER) { minMerge = input.nval; } else if (p.equals("MaxTries") && token == NestedStreamTokenizer.TT_NUMBER) { maxTries = (int)input.nval; } else if (p.equals("ModelPriorWeight") && token == NestedStreamTokenizer.TT_NUMBER) { modelPriorWeight = input.nval; } else if (p.equals("BatchSize") && token == NestedStreamTokenizer.TT_NUMBER) { batchSize = (int)input.nval; } else if (p.equals("TrainingPasses") && token == NestedStreamTokenizer.TT_NUMBER) { trainingPasses = (int)input.nval; } else if (p.equals("AdaptVirtuals") && token == NestedStreamTokenizer.TT_WORD && (input.sval.equals("true") || input.sval.equals("false"))) { adaptVirtuals = Boolean.valueOf(input.sval).booleanValue(); } else if (p.equals("VirtualInertia") && token == NestedStreamTokenizer.TT_NUMBER) { virtualInertia = input.nval; } else if (p.equals("MaxVirtual") && token == NestedStreamTokenizer.TT_NUMBER) { maxVirtual = input.nval; } else if (p.equals("VerboseSenses") && token == NestedStreamTokenizer.TT_WORD && (input.sval.equals("true") || input.sval.equals("false"))) { verboseSenses = Boolean.valueOf(input.sval).booleanValue(); } else if (p.equals("TestRecognition") && token == NestedStreamTokenizer.TT_WORD && (input.sval.equals("true") || input.sval.equals("false"))) { testRecognition = Boolean.valueOf(input.sval).booleanValue(); } else if (p.equals("MotorFeatures") && token == NestedStreamTokenizer.TT_NESTED) { readFeatures(motorNames, motorVirtuals, motorValues, input.sval); } else if (p.equals("WorldFeatures") && token == NestedStreamTokenizer.TT_NESTED) { readFeatures(worldNames, worldVirtuals, worldValues, input.sval); } else { throw(new IOException("unknown parameter specified: " + p)); } token = input.nextToken(); } } catch (IOException e) { System.err.println("Params load-from-file exception: " + e.getMessage()); } // MAKE SURE WE'VE SET EVERYTHING THAT IS OBLIGATORY?: } private void readFeatures(Vector nameVec, Hashtable virtMap, Hashtable valueMap, String s) throws IOException { try { NestedStreamTokenizer input = new NestedStreamTokenizer(new StringReader(s)); int token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD) { throw(new IOException("expected feature name (tok=" + token + ")")); } String fname = input.sval; nameVec.addElement(fname); token = input.nextToken(); if (token != NestedStreamTokenizer.TT_NUMBER) { throw(new IOException("expected virtual-sample count (tok=" + token +")")); } virtMap.put(fname, new Double(input.nval)); token = input.nextToken(); if (token != NestedStreamTokenizer.TT_NESTED) { throw(new IOException("expected nested list of feature values " + "(tok=" + token + ", sval=" + input.sval + ")")); } Vector values = new Vector(); // Vector NestedStreamTokenizer list = new NestedStreamTokenizer(new StringReader(input.sval)); token = list.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token == NestedStreamTokenizer.TT_WORD) { values.addElement(list.sval); } else if (token == NestedStreamTokenizer.TT_NUMBER) { values.addElement(Double.toString(list.nval)); } else { throw(new IOException("expected word or numerical feature value")); } token = list.nextToken(); } valueMap.put(fname, values); token = input.nextToken(); } } catch (IOException e) { System.err.println("Params exception loading features: " + e.getMessage()); } } public String id() { return id; } public void setId(String newId) { id = newId; } public double minLabel() { return minLabel; } public double minExplain() { return minExplain; } public double minObey() { return minObey; } public double minSetFeature() { return minSetFeature; } public boolean useWordFrequencies() { return useWordFrequencies; } public double minMerge() { return minMerge; } public int maxTries() { return maxTries; } public double modelPriorWeight() { return modelPriorWeight; } public double virtualInertia() { return virtualInertia; } public double maxVirtual() { return maxVirtual; } public boolean adaptVirtuals() { return adaptVirtuals; } public int batchSize() { return batchSize; } public int trainingPasses() { return trainingPasses; } public boolean verboseSenses() { return verboseSenses; } public void setVerboseSenses(boolean vs) { verboseSenses = vs; } public boolean testRecognition() { return testRecognition; } public Enumeration motorFeatureNames() { return motorNames.elements(); } public Enumeration worldFeatureNames() { return worldNames.elements(); } public Enumeration featureNames() { return new DoubleEnumeration(worldNames.elements(), motorNames.elements()); } // Returns -1 if fname is invalid. public double featureVirtuals(String fname) { if (motorVirtuals.containsKey(fname)) { return ((Double)motorVirtuals.get(fname)).doubleValue(); } else if (worldVirtuals.containsKey(fname)) { return ((Double)worldVirtuals.get(fname)).doubleValue(); } else { return -1 ; } } // Returns a Vector of Strings public Vector featureVals(String fname) { if (motorValues.containsKey(fname)) { return (Vector)motorValues.get(fname); } else if (worldValues.containsKey(fname)) { return (Vector)worldValues.get(fname); } else { return null; } } public int numFeatures() { return worldNames.size() + motorNames.size(); } // Return a copy of the union of motorVirtuals and worldVirtuals public Hashtable copyOfVirtuals() { Hashtable res = new Hashtable(); Enumeration fEnum = featureNames(); while (fEnum.hasMoreElements()) { String f = (String)fEnum.nextElement(); res.put(f, new Double(featureVirtuals(f))); } return res; } public String toString() { StringBuffer res = new StringBuffer(); res.append("// Parameter set: ").append(id).append("\n") .append("\nMinLabel\t\t").append(minLabel) .append("\nMinExplain\t\t").append(minExplain) .append("\nMinObey\t\t\t").append(minObey) .append("\nMinSetFeature\t\t").append(minSetFeature) .append("\nUseWordFrequencies\t").append(useWordFrequencies) .append("\nMinMerge\t\t").append(minMerge) .append("\nMaxTries\t\t").append(maxTries) .append("\nModelPriorWeight\t").append(modelPriorWeight) .append("\nBatchSize\t\t").append(batchSize) .append("\nTrainingPasses\t\t").append(trainingPasses) .append("\nAdaptVirtuals\t\t").append(adaptVirtuals) .append("\nVirtualInertia\t\t").append(virtualInertia) .append("\nMaxVirtual\t\t").append(maxVirtual) .append("\nVerboseSenses\t\t").append(verboseSenses) .append("\nTestRecognition\t\t").append(testRecognition) .append("\n\nMotorFeatures {"); Enumeration fEnum = motorNames.elements(); while (fEnum.hasMoreElements()) { String f = (String)fEnum.nextElement(); res.append("\n\t").append(f).append("\t").append(motorVirtuals.get(f)); res.append("\t{"); for (int i = 0; i < ((Vector)motorValues.get(f)).size(); i++) { if (i > 0) res.append(" "); res.append(((Vector)motorValues.get(f)).elementAt(i)); } res.append("}"); } res.append("\n}\nWorldFeatures {"); fEnum = worldNames.elements(); while (fEnum.hasMoreElements()) { String f = (String)fEnum.nextElement(); res.append("\n\t").append(f).append("\t").append(worldVirtuals.get(f)); res.append("\t{"); for (int i = 0; i < ((Vector)worldValues.get(f)).size(); i++) { if (i > 0) res.append(" "); res.append(((Vector)worldValues.get(f)).elementAt(i)); } res.append("}"); } res.append("\n}\n"); return res.toString(); } } /////////////////////////////////////////////////////////////////////// // A set of pointers to Scenarios for training, recognition testing, // and obeying. class Dataset { private String id; private Vector train, recognize, obey; // Vector Dataset(NestedStreamTokenizer input, String newId) { id = newId; train = new Vector(); recognize = new Vector(); obey = new Vector(); try { int token = input.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD || (!input.sval.equals("train") && !input.sval.equals("recognize") && !input.sval.equals("obey"))) { throw(new IOException("expected train/recognize/obey")); } Vector v = null; if (input.sval.equals("train")) v = train; else if (input.sval.equals("recognize")) v = recognize; else if (input.sval.equals("obey")) v = obey; token = input.nextToken(); if (token != NestedStreamTokenizer.TT_NESTED) { throw(new IOException("expected nested list of scenarios")); } NestedStreamTokenizer list = new NestedStreamTokenizer(new StringReader(input.sval)); token = list.nextToken(); while (token != NestedStreamTokenizer.TT_EOF) { if (token != NestedStreamTokenizer.TT_WORD || !VerbLearn.context().scenarios().containsKey(list.sval)) { throw(new IOException("invalid scenario name")); } v.addElement(list.sval); token = list.nextToken(); } token = input.nextToken(); } } catch (IOException e) { System.err.println("Dataset load-from-file exception: " + e.getMessage()); train = new Vector(); recognize = new Vector(); obey = new Vector(); } } public String id() { return id; } public void setId(String newId) { id = newId; } public Enumeration trainEnum() { return train.elements(); } public Enumeration recognizeEnum() { return recognize.elements(); } public Enumeration obeyEnum() { return obey.elements(); } public String toString() { StringBuffer res = new StringBuffer(); res.append("// Dataset: ").append(id).append("\n\ntrain {"); for (int i = 0; i < train.size(); i++) { res.append("\n\t").append(train.elementAt(i)); } res.append("\n}\nrecognize {"); for (int i = 0; i < recognize.size(); i++) { res.append("\n\t").append(recognize.elementAt(i)); } res.append("\n}\nobey {"); for (int i = 0; i < obey.size(); i++) { res.append("\n\t").append(obey.elementAt(i)); } res.append("\n}\n"); return res.toString(); } } /////////////////////////////////////////////////////////////////////// // Holds a verb complex, i.e. a label for an action. It's basically a // String[] extended to have a decent toString() routine. Note: when // converting to/from a String, we use dashes to separate slots, even // empty ones. E.g., "pick-up" and "push-". class VerbComplex { private String[] words; VerbComplex(int size) { words = new String[size]; } // Create a VerbComplex from a String. VerbComplex(String str) { Vector v = new Vector(); String prevToken = "-"; StringTokenizer st = new StringTokenizer(str, "-", true); while (st.hasMoreTokens()) { String token = st.nextToken(); if (!token.equals("-")) { v.addElement(token); } else { if (prevToken.equals("-")) { v.addElement(null); } } prevToken = token; } words = new String[v.size()]; for (int i = 0; i < v.size(); i++) { words[i] = (String)v.elementAt(i); } } public int size() { return words.length; } void setWord(int i, String word) { words[i] = word; } public String getWord(int i) { return words[i]; } public boolean slotFilled(int i) { return (words[i] != null); } public String toString() { String str = ""; for (int i = 0; i < words.length; i++) { if (i > 0) str += "-"; if (words[i] != null) str += words[i]; } return str; } } /////////////////////////////////////////////////////////////////////// // A tuple of label, sense, and prior, likelihood and posterior // probabilities. (This allows Slot.bestLabelFor() to essentially // return multiple values.) NOTE: SOME USERS OF THIS CLASS DO NOT // SET THE PRIOR OR LIKELIHOOD FIELDS. class CandidateSense { public String label; public Sense sense; public double prior; public double likelihood; public double posterior; CandidateSense() {} // This constructor leaves the prior and likelihood slots unused. CandidateSense(String label, Sense sense, double posterior) { this.label = label; this.sense = sense; this.posterior = posterior; } CandidateSense(String label, Sense sense, double prior, double likelihood, double posterior) { this.label = label; this.sense = sense; this.prior = prior; this.likelihood = likelihood; this.posterior = posterior; } public String toString() { return "sense = " + sense.id() + ", prior = " + Fmt.dbl(prior) + ", likelihood = " + Fmt.dbl(likelihood) + ", posterior = " + Fmt.dbl(posterior) + "\n"; } } /////////////////////////////////////////////////////////////////////// // Two senses, and the similarity between them. class CandidateMerge implements Serializable { public Sense s1, s2; public double similarity; CandidateMerge() {} CandidateMerge(Sense s1, Sense s2, double similarity) { this.s1 = s1; this.s2 = s2; this.similarity = similarity; } public String toString() { return "senses " + s1.id() + " and " + s2.id() + " (similarity = " + similarity + ")"; } }