Tokenizer

class Tokenizer {

constructor(options: {

xmlMode?: boolean;

decodeEntities?: boolean;

} | null, cbs: Callbacks);

private baseState;

private buffer;

private bufferOffset;

private readonly cbs;

private cleanup;

private readonly decodeEntities;

private decodeNumericEntity;

private emitPartial;

private emitToken;

private ended;

private finish;

private getSection;

private handleInAttributeValue;

private handleTrailingData;

private isTagStartChar;

private parse;

private parseFixedEntity;

private parseLegacyEntity;

private running;

private special;

private stateAfterAttributeName;

private stateAfterCdata1;

private stateAfterCdata2;

private stateAfterClosingTagName;

private stateAfterComment1;

private stateAfterComment2;

private stateAfterSpecialLast;

private stateBeforeAttributeName;

private stateBeforeAttributeValue;

private stateBeforeCdata6;

private stateBeforeClosingTagName;

private stateBeforeComment;

private stateBeforeDeclaration;

private stateBeforeSpecialLast;

private stateBeforeSpecialS;

private stateBeforeSpecialSEnd;

private stateBeforeTagName;

private stateInAttributeName;

private stateInAttributeValueDoubleQuotes;

private stateInAttributeValueNoQuotes;

private stateInAttributeValueSingleQuotes;

private stateInCdata;

private stateInClosingTagName;

private stateInComment;

private stateInDeclaration;

private stateInHexEntity;

private stateInNamedEntity;

private stateInNumericEntity;

private stateInProcessingInstruction;

private stateInSelfClosingTag;

private stateInSpecialComment;

private stateInTagName;

private stateText;

private readonly xmlMode;

_index: number;

_state: State;

sectionStart: number;

end(chunk?: string): void;

getAbsoluteIndex(): number;

pause(): void;

reset(): void;

resume(): void;

write(chunk: string): void;

}

§Constructors

new Tokenizer(options: {

xmlMode?: boolean;

decodeEntities?: boolean;

} | null, cbs: Callbacks)

[src]

§Properties

baseState

[src]

Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type.

buffer

[src]

The read buffer.

bufferOffset

[src]

Data that has already been processed will be removed from the buffer occasionally. _bufferOffset keeps track of how many characters have been removed, to make sure position information is accurate.

cbs

cleanup

decodeEntities

decodeNumericEntity

emitPartial

emitToken

ended

Indicates whether the tokenizer has finished running / .end has been called.

finish

[src]

getSection

[src]

handleInAttributeValue

[src]

handleTrailingData

[src]

isTagStartChar

[src]

HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.

XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). We allow anything that wouldn't end the tag.

parse

[src]

Iterates through the buffer, calling the function corresponding to the current state.

States that are more likely to be hit are higher up, as a performance improvement.

parseFixedEntity

[src]

parseLegacyEntity

[src]

running

[src]

Indicates whether the tokenizer has been paused.

special

[src]

For special parsing behavior inside of script and style tags.

stateAfterAttributeName

[src]

stateAfterCdata1

[src]

stateAfterCdata2

[src]

stateAfterClosingTagName

[src]

stateAfterComment1

[src]

stateAfterComment2

[src]

stateAfterSpecialLast

[src]

stateBeforeAttributeName

[src]

stateBeforeAttributeValue

[src]

stateBeforeCdata6

[src]

stateBeforeClosingTagName

[src]

stateBeforeComment

[src]

stateBeforeDeclaration

[src]

stateBeforeSpecialLast

[src]

stateBeforeSpecialS

[src]

stateBeforeSpecialSEnd

[src]

stateBeforeTagName

[src]

stateInAttributeName

[src]

stateInAttributeValueDoubleQuotes

[src]

stateInAttributeValueNoQuotes

[src]

stateInAttributeValueSingleQuotes

[src]

stateInCdata

[src]

stateInClosingTagName

stateInComment

stateInDeclaration

stateInHexEntity

stateInNamedEntity

stateInNumericEntity

stateInProcessingInstruction

[src]

stateInSelfClosingTag

[src]

stateInSpecialComment

stateInTagName

stateText

xmlMode

_index: number

The index within the buffer that we are currently looking at.

_state: State

[src]

The current state the tokenizer is in.

sectionStart: number

[src]

The beginning of the section that is currently being read.

§Methods

end(chunk?: string): void

[src]

getAbsoluteIndex(): number

[src]

The current index within all of the written data.

pause(): void

[src]

reset(): void

[src]

resume(): void

[src]

write(chunk: string): void

[src]