|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object com.sun.labs.minion.pipeline.Token
public class Token
A class encapsulating all of our knowledge about a given token. Instances of this class are passed down an indexing pipeline as they are parsed from the file.
Field Summary | |
---|---|
static int |
BIGRAM
|
protected boolean |
containsDigits
An indicator to show if this token contains digits (The taxonomy classifier ignores such tokens.) |
protected int |
count
The occurrence count for this token. |
protected int |
end
The ending character offset for the token. |
protected int[] |
fields
A set of fields active for this token. |
protected int |
id
An ID assigned to this token. |
static int |
NORMAL
|
static int |
PUNCT
|
protected int |
start
The starting character offset for the token. |
protected java.lang.String |
token
The string for a token. |
protected int |
type
The type of this token, whether standard, bigram, or punctuation. |
protected int |
wordNum
The ordinal number of this word in the document. |
Constructor Summary | |
---|---|
Token()
|
|
Token(java.lang.String token,
int count)
Creates a token. |
|
Token(java.lang.String token,
int wordNum,
int type)
Creates a token. |
|
Token(java.lang.String token,
int wordNum,
int start,
int end)
Creates a token that can be passed down the pipeline. |
|
Token(java.lang.String token,
int wordNum,
int type,
int start,
int end)
Creates a token that can be passed down the pipeline. |
|
Token(java.lang.String token,
int wordNum,
int type,
int start,
int end,
int count)
Creates a token that can be passed down the pipeline. |
Method Summary | |
---|---|
boolean |
containsDigits()
|
int |
getCount()
Gets the count of occurrences for this token. |
int |
getEnd()
|
int[] |
getFields()
Gets the fields that are active at the time of the occurrence. |
int |
getID()
Gets the ID of the term in this occurrence. |
int |
getPos()
Gets the position at which the occurrence was found. |
int |
getStart()
|
java.lang.String |
getToken()
|
int |
getType()
|
int |
getWordNum()
|
void |
incrWordNum()
|
int |
length()
|
Token |
reset(java.lang.String token,
int wordNum,
int start,
int end)
|
Token |
reset(java.lang.String token,
int wordNum,
int type,
int start,
int end)
|
Token |
reset(java.lang.String token,
int wordNum,
int type,
int start,
int end,
int count)
|
void |
setCount(int count)
Sets the count of occurrences that this occurrence represents. |
void |
setFields(int[] fields)
|
void |
setID(int id)
Sets the ID for this token. |
void |
setPos(int pos)
Sets the position for this token. |
void |
setToken(java.lang.String token)
This method is intentionally package-private. |
void |
setType(int type)
|
void |
setWordNum(int wordNum)
Sets the word number for this token. |
java.lang.String |
toString()
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
protected java.lang.String token
protected int wordNum
protected int type
protected int start
protected int end
protected int count
protected int id
protected int[] fields
protected boolean containsDigits
public static final int NORMAL
public static final int BIGRAM
public static final int PUNCT
Constructor Detail |
---|
public Token()
public Token(java.lang.String token, int count)
public Token(java.lang.String token, int wordNum, int type)
public Token(java.lang.String token, int wordNum, int start, int end)
token
- The string tokenized from the input datawordNum
- The ordinal word number of this token in the indexed
material.start
- The starting character offset of this tokenend
- The ending character offset of this tokenpublic Token(java.lang.String token, int wordNum, int type, int start, int end)
token
- The string tokenized from the input datawordNum
- The ordinal word number of this token in the indexed
material.type
- The type of this token, from our constant typesstart
- The beginning character offset of this tokenend
- The ending character offset of this tokenpublic Token(java.lang.String token, int wordNum, int type, int start, int end, int count)
token
- The string tokenized from the input datawordNum
- The ordinal word number of this token in the indexed
material.type
- The type of this token, from our constant typesstart
- The beginning character offset of this tokenend
- The ending character offset of this tokenMethod Detail |
---|
public Token reset(java.lang.String token, int wordNum, int type, int start, int end)
public Token reset(java.lang.String token, int wordNum, int start, int end)
public Token reset(java.lang.String token, int wordNum, int type, int start, int end, int count)
public int length()
public java.lang.String getToken()
public void setToken(java.lang.String token)
public int getType()
public void setType(int type)
public int getWordNum()
public void incrWordNum()
public int getStart()
public int getEnd()
public java.lang.String toString()
toString
in class java.lang.Object
public int getID()
getID
in interface Occurrence
public void setID(int id)
setID
in interface Occurrence
id
- the ID.public int getCount()
getCount
in interface Occurrence
public void setWordNum(int wordNum)
public void setCount(int count)
setCount
in interface Occurrence
count
- the number of occurrences.public int getPos()
getPos
in interface FieldOccurrence
public void setPos(int pos)
public int[] getFields()
getFields
in interface FieldOccurrence
public void setFields(int[] fields)
public boolean containsDigits()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |