|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcom.sun.labs.minion.indexer.partition.Partition
com.sun.labs.minion.indexer.partition.DiskPartition
com.sun.labs.minion.classification.ClassifierDiskPartition
public class ClassifierDiskPartition
A disk partition that will hold classifier data.
| Field Summary | |
|---|---|
protected ClassifierModel[] |
allModels
|
protected long |
dataStart
The place where the model specific data starts in the file. |
protected java.util.Map<java.lang.String,ClassificationFeature> |
features
Things to fix after the open house: the main dictionary in the classifiers doesn't store the feature scores for the documents (i.e., the classifiers.) So we can't do bulk evaluation without inverting the document vectors. |
protected static java.lang.String |
logTag
|
protected ClassifierModel |
modelInstance
|
protected java.util.Map<java.lang.String,ClassifierModel> |
modelMap
|
protected java.io.RandomAccessFile |
msd
The file containing the model specific data for this partition. |
protected ReadableBuffer |
msdOff
A buffer containing the offsets for the model specific data for each of our classifiers. |
protected int |
nModels
The number of models that we're storing. |
| Fields inherited from class com.sun.labs.minion.indexer.partition.DiskPartition |
|---|
BUFF_SIZE, deletions, delFile, delFileLock, docDict, docDictFile, docPostFile, documentDictFactory, dvl, ignored, mainDict, mainFiles, MATCH_CUT_OFF, MIN_LEN, removedFile, termCache |
| Fields inherited from class com.sun.labs.minion.indexer.partition.Partition |
|---|
DICT_OFFSETS_SIZE, docDictFactory, entryClass, entryName, indexConfig, mainDictFactory, mainDictFile, mainPostFiles, manager, maxID, nEntries, partNumber, PROP_DOC_DICT_FACTORY, PROP_INDEX_CONFIG, PROP_MAIN_DICT_FACTORY, PROP_PARTITION_MANAGER, stats |
| Constructor Summary | |
|---|---|
ClassifierDiskPartition(java.lang.Integer partNum,
ClassifierManager manager,
DictionaryFactory mainDictFactory,
DictionaryFactory documentDictFactory)
Constructs a disk partition for a specific partition number. |
|
| Method Summary | |
|---|---|
int |
assembleResults(float[] scores,
java.lang.String modelName,
java.lang.String resultField,
java.util.Map<java.lang.String,ClassificationResult> results)
|
void |
classify(DiskPartition sdp,
ExtraClassification ec,
java.util.Map<java.lang.String,ClassificationResult> results)
Classifies all the documents in a disk partition. |
boolean |
close()
Close the files associated with this partition. |
void |
findSimilar(ClassifierModel cm,
java.util.Map<java.lang.String,java.lang.Float> scores)
|
protected ClassifierModel[] |
getAllModels()
|
protected ClassifierModel |
getClassifier(FeatureEntry fe)
Gets a classifier model from an entry in our document dictionary. |
protected ClassifierModel |
getClassifier(java.lang.String cname)
|
float |
getDocumentVectorLength(int docID)
Gets the length of a document vector for a given document. |
java.util.Set |
getFeatures(java.lang.String cname)
|
protected java.util.Map<java.lang.String,ClassificationFeature> |
invert()
|
protected java.util.Set |
makeFeatures(FeatureEntry entry)
|
protected void |
mergeCustom(int newPartNumber,
DiskPartition[] sortedParts,
int[][] idMaps,
int newMaxDocID,
int[] docIDStart,
int[] nUndel,
int[][] docIDMaps)
Merges the model specific data for these classifiers. |
protected static void |
reap(PartitionManager m,
int n)
Reaps the given classifier partition. |
| Methods inherited from class com.sun.labs.minion.indexer.partition.DiskPartition |
|---|
close, createRemoveFile, delete, deleteDocument, deleteDocument, docsAreMerged, getAverageDocumentLength, getCloseTime, getDeletedDocumentsMap, getDelMap, getDocIDMap, getDocumentIterator, getDocumentIterator, getDocumentLength, getDocumentTerm, getDocumentTerm, getDocumentVectorLength, getDocumentVectorLength, getDVL, getInputBuffers, getMainDictionary, getMainDictionaryIterator, getMainDictionaryIterator, getMainIterator, getMaxDocumentID, getMaxTermID, getNDocs, getNEntries, getNTokens, getTerm, getTerm, getTerm, getTerm, getTermCache, initAll, initDocDict, initDVL, initMainDict, initMainFiles, isDeleted, isIndexed, merge, merge, normalize, setCloseTime, syncDeletedMap, toString, updatePartition |
| Methods inherited from class com.sun.labs.minion.indexer.partition.Partition |
|---|
compareTo, getAllFiles, getAllFiles, getDocFiles, getDocFiles, getIndexConfig, getMainFiles, getMainFiles, getManager, getName, getNumPostingsChannels, getPartitionNumber, getQueryConfig, getStats, newProperties |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
protected java.io.RandomAccessFile msd
protected ReadableBuffer msdOff
protected int nModels
protected long dataStart
protected static java.lang.String logTag
protected ClassifierModel modelInstance
protected java.util.Map<java.lang.String,ClassificationFeature> features
protected ClassifierModel[] allModels
protected java.util.Map<java.lang.String,ClassifierModel> modelMap
| Constructor Detail |
|---|
public ClassifierDiskPartition(java.lang.Integer partNum,
ClassifierManager manager,
DictionaryFactory mainDictFactory,
DictionaryFactory documentDictFactory)
throws java.io.IOException
partNum - the number of this partitionmanager - the classifier manager for this partition
java.io.IOException| Method Detail |
|---|
protected ClassifierModel getClassifier(java.lang.String cname)
public void findSimilar(ClassifierModel cm,
java.util.Map<java.lang.String,java.lang.Float> scores)
protected ClassifierModel[] getAllModels()
protected java.util.Map<java.lang.String,ClassificationFeature> invert()
protected ClassifierModel getClassifier(FeatureEntry fe)
public void classify(DiskPartition sdp,
ExtraClassification ec,
java.util.Map<java.lang.String,ClassificationResult> results)
sdp - a disk partitionec - a (possibly null) pair of field names. One is the
name of the field from which classifiers were built. If this pair is
non-null, then only classifiers
that were built from the contents of the classifier from field in the pair will be considered.
Also, if this pair is non-null then whatever classifiers are
applied will be applied against the contents of the document from field in the
pair. If this pair is null, then classification proceeds as
usual.results - a map to fill up with classification results
public int assembleResults(float[] scores,
java.lang.String modelName,
java.lang.String resultField,
java.util.Map<java.lang.String,ClassificationResult> results)
public java.util.Set getFeatures(java.lang.String cname)
protected java.util.Set makeFeatures(FeatureEntry entry)
public float getDocumentVectorLength(int docID)
getDocumentVectorLength in class DiskPartitiondocID - the ID of the document for whose vector we want the length
protected void mergeCustom(int newPartNumber,
DiskPartition[] sortedParts,
int[][] idMaps,
int newMaxDocID,
int[] docIDStart,
int[] nUndel,
int[][] docIDMaps)
throws java.lang.Exception
mergeCustom in class DiskPartitionnewPartNumber - the number of the new partitionsortedParts - the sorted list of partitionsidMaps - a set of maps from old entry ids in the main dictionary
to new entry ids in the merged dictionarynewMaxDocID - the new maximum document iddocIDStart - the starting doc idsnUndel - the number of undeleted documents in each partitiondocIDMaps - doc id maps (see merge)
java.lang.Exceptionpublic boolean close()
close in class DiskPartitiontrue if the files were successfully closed.
protected static void reap(PartitionManager m,
int n)
m - The manager associated with the partition.n - The partition number to reap.
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||