|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object com.sun.labs.minion.indexer.partition.Partition com.sun.labs.minion.indexer.partition.DiskPartition com.sun.labs.minion.classification.ClassifierDiskPartition
public class ClassifierDiskPartition
A disk partition that will hold classifier data.
Field Summary | |
---|---|
protected ClassifierModel[] |
allModels
|
protected long |
dataStart
The place where the model specific data starts in the file. |
protected java.util.Map<java.lang.String,ClassificationFeature> |
features
Things to fix after the open house: the main dictionary in the classifiers doesn't store the feature scores for the documents (i.e., the classifiers.) So we can't do bulk evaluation without inverting the document vectors. |
protected static java.lang.String |
logTag
|
protected ClassifierModel |
modelInstance
|
protected java.util.Map<java.lang.String,ClassifierModel> |
modelMap
|
protected java.io.RandomAccessFile |
msd
The file containing the model specific data for this partition. |
protected ReadableBuffer |
msdOff
A buffer containing the offsets for the model specific data for each of our classifiers. |
protected int |
nModels
The number of models that we're storing. |
Fields inherited from class com.sun.labs.minion.indexer.partition.DiskPartition |
---|
BUFF_SIZE, deletions, delFile, delFileLock, docDict, docDictFile, docPostFile, documentDictFactory, dvl, ignored, mainDict, mainFiles, MATCH_CUT_OFF, MIN_LEN, removedFile, termCache |
Fields inherited from class com.sun.labs.minion.indexer.partition.Partition |
---|
DICT_OFFSETS_SIZE, docDictFactory, entryClass, entryName, indexConfig, mainDictFactory, mainDictFile, mainPostFiles, manager, maxID, nEntries, partNumber, PROP_DOC_DICT_FACTORY, PROP_INDEX_CONFIG, PROP_MAIN_DICT_FACTORY, PROP_PARTITION_MANAGER, stats |
Constructor Summary | |
---|---|
ClassifierDiskPartition(java.lang.Integer partNum,
ClassifierManager manager,
DictionaryFactory mainDictFactory,
DictionaryFactory documentDictFactory)
Constructs a disk partition for a specific partition number. |
Method Summary | |
---|---|
int |
assembleResults(float[] scores,
java.lang.String modelName,
java.lang.String resultField,
java.util.Map<java.lang.String,ClassificationResult> results)
|
void |
classify(DiskPartition sdp,
ExtraClassification ec,
java.util.Map<java.lang.String,ClassificationResult> results)
Classifies all the documents in a disk partition. |
boolean |
close()
Close the files associated with this partition. |
void |
findSimilar(ClassifierModel cm,
java.util.Map<java.lang.String,java.lang.Float> scores)
|
protected ClassifierModel[] |
getAllModels()
|
protected ClassifierModel |
getClassifier(FeatureEntry fe)
Gets a classifier model from an entry in our document dictionary. |
protected ClassifierModel |
getClassifier(java.lang.String cname)
|
float |
getDocumentVectorLength(int docID)
Gets the length of a document vector for a given document. |
java.util.Set |
getFeatures(java.lang.String cname)
|
protected java.util.Map<java.lang.String,ClassificationFeature> |
invert()
|
protected java.util.Set |
makeFeatures(FeatureEntry entry)
|
protected void |
mergeCustom(int newPartNumber,
DiskPartition[] sortedParts,
int[][] idMaps,
int newMaxDocID,
int[] docIDStart,
int[] nUndel,
int[][] docIDMaps)
Merges the model specific data for these classifiers. |
protected static void |
reap(PartitionManager m,
int n)
Reaps the given classifier partition. |
Methods inherited from class com.sun.labs.minion.indexer.partition.DiskPartition |
---|
close, createRemoveFile, delete, deleteDocument, deleteDocument, docsAreMerged, getAverageDocumentLength, getCloseTime, getDeletedDocumentsMap, getDelMap, getDocIDMap, getDocumentIterator, getDocumentIterator, getDocumentLength, getDocumentTerm, getDocumentTerm, getDocumentVectorLength, getDocumentVectorLength, getDVL, getInputBuffers, getMainDictionary, getMainDictionaryIterator, getMainDictionaryIterator, getMainIterator, getMaxDocumentID, getMaxTermID, getNDocs, getNEntries, getNTokens, getTerm, getTerm, getTerm, getTerm, getTermCache, initAll, initDocDict, initDVL, initMainDict, initMainFiles, isDeleted, isIndexed, merge, merge, normalize, setCloseTime, syncDeletedMap, toString, updatePartition |
Methods inherited from class com.sun.labs.minion.indexer.partition.Partition |
---|
compareTo, getAllFiles, getAllFiles, getDocFiles, getDocFiles, getIndexConfig, getMainFiles, getMainFiles, getManager, getName, getNumPostingsChannels, getPartitionNumber, getQueryConfig, getStats, newProperties |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
protected java.io.RandomAccessFile msd
protected ReadableBuffer msdOff
protected int nModels
protected long dataStart
protected static java.lang.String logTag
protected ClassifierModel modelInstance
protected java.util.Map<java.lang.String,ClassificationFeature> features
protected ClassifierModel[] allModels
protected java.util.Map<java.lang.String,ClassifierModel> modelMap
Constructor Detail |
---|
public ClassifierDiskPartition(java.lang.Integer partNum, ClassifierManager manager, DictionaryFactory mainDictFactory, DictionaryFactory documentDictFactory) throws java.io.IOException
partNum
- the number of this partitionmanager
- the classifier manager for this partition
java.io.IOException
Method Detail |
---|
protected ClassifierModel getClassifier(java.lang.String cname)
public void findSimilar(ClassifierModel cm, java.util.Map<java.lang.String,java.lang.Float> scores)
protected ClassifierModel[] getAllModels()
protected java.util.Map<java.lang.String,ClassificationFeature> invert()
protected ClassifierModel getClassifier(FeatureEntry fe)
public void classify(DiskPartition sdp, ExtraClassification ec, java.util.Map<java.lang.String,ClassificationResult> results)
sdp
- a disk partitionec
- a (possibly null
) pair of field names. One is the
name of the field from which classifiers were built. If this pair is
non-null
, then only classifiers
that were built from the contents of the classifier from field in the pair will be considered.
Also, if this pair is non-null
then whatever classifiers are
applied will be applied against the contents of the document from field in the
pair. If this pair is null
, then classification proceeds as
usual.results
- a map to fill up with classification resultspublic int assembleResults(float[] scores, java.lang.String modelName, java.lang.String resultField, java.util.Map<java.lang.String,ClassificationResult> results)
public java.util.Set getFeatures(java.lang.String cname)
protected java.util.Set makeFeatures(FeatureEntry entry)
public float getDocumentVectorLength(int docID)
getDocumentVectorLength
in class DiskPartition
docID
- the ID of the document for whose vector we want the length
protected void mergeCustom(int newPartNumber, DiskPartition[] sortedParts, int[][] idMaps, int newMaxDocID, int[] docIDStart, int[] nUndel, int[][] docIDMaps) throws java.lang.Exception
mergeCustom
in class DiskPartition
newPartNumber
- the number of the new partitionsortedParts
- the sorted list of partitionsidMaps
- a set of maps from old entry ids in the main dictionary
to new entry ids in the merged dictionarynewMaxDocID
- the new maximum document iddocIDStart
- the starting doc idsnUndel
- the number of undeleted documents in each partitiondocIDMaps
- doc id maps (see merge)
java.lang.Exception
public boolean close()
close
in class DiskPartition
true
if the files were successfully closed.protected static void reap(PartitionManager m, int n)
m
- The manager associated with the partition.n
- The partition number to reap.
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |