00001 {*********************************************************}
00002 { }
00003 { Zeos Database Objects }
00004 { String tokenizing classes and interfaces }
00005 { }
00006 { Originally written by Sergey Seroukhov }
00007 { }
00008 {*********************************************************}
00009
00010 {@********************************************************}
00011 { Copyright (c) 1999-2006 Zeos Development Group }
00012 { }
00013 { License Agreement: }
00014 { }
00015 { This library is distributed in the hope that it will be }
00016 { useful, but WITHOUT ANY WARRANTY; without even the }
00017 { implied warranty of MERCHANTABILITY or FITNESS FOR }
00018 { A PARTICULAR PURPOSE. See the GNU Lesser General }
00019 { Public License for more details. }
00020 { }
00021 { The source code of the ZEOS Libraries and packages are }
00022 { distributed under the Library GNU General Public }
00023 { License (see the file COPYING / COPYING.ZEOS) }
00024 { with the following modification: }
00025 { As a special exception, the copyright holders of this }
00026 { library give you permission to link this library with }
00027 { independent modules to produce an executable, }
00028 { regardless of the license terms of these independent }
00029 { modules, and to copy and distribute the resulting }
00030 { executable under terms of your choice, provided that }
00031 { you also meet, for each linked independent module, }
00032 { the terms and conditions of the license of that module. }
00033 { An independent module is a module which is not derived }
00034 { from or based on this library. If you modify this }
00035 { library, you may extend this exception to your version }
00036 { of the library, but you are not obligated to do so. }
00037 { If you do not wish to do so, delete this exception }
00038 { statement from your version. }
00039 { }
00040 { }
00041 { The project web site is located on: }
00042 { http:
00043 { http:
00044 { svn:
00045 { }
00046 { http:
00047 { http:
00048 { }
00049 { }
00050 { }
00051 { Zeos Development Group. }
00052 {********************************************************@}
00053
00054 unit ZTokenizer;
00055
00056 interface
00057
00058 {$I ZCore.inc}
00059
00060 uses
00061 Classes, SysUtils, ZClasses;
00062
00063 type
00064
00065 {**
00066 Objects of this class represent a type of token,
00067 such as "number", "symbol" or "word".
00068 }
00069 TZTokenType = (ttUnknown, ttEOF, ttFloat, ttInteger, ttHexDecimal,
00070 ttNumber, ttSymbol, ttQuoted, ttQuotedIdentifier, ttWord, ttKeyword, ttWhitespace,
00071 ttComment, ttSpecial);
00072
00073 {**
00074 Defines options for tokenizing strings.
00075 }
00076 TZTokenOption = (toSkipUnknown, toSkipWhitespaces, toSkipComments,
00077 toSkipEOF, toUnifyWhitespaces, toUnifyNumbers, toDecodeStrings);
00078 TZTokenOptions = set of TZTokenOption;
00079
00080 {**
00081 A token represents a logical chunk of a string. For
00082 example, a typical tokenizer would break the string
00083 <code>"1.23 <= 12.3"</code> into three tokens: the number
00084 1.23, a less-than-or-equal symbol, and the number 12.3. A
00085 token is a receptacle, and relies on a tokenizer to decide
00086 precisely how to divide a string into tokens.
00087 }
00088 TZToken = packed record
00089 Value: string;
00090 TokenType: TZTokenType;
00091 end;
00092
00093 {** Defines a dynamic array of tokens. }
00094 TZTokenDynArray = array of TZToken;
00095
00096
00097 TZTokenizer = class;
00098
00099 {**
00100 A tokenizerState returns a token, given a reader, an initial character
00101 read from the reader, and a tokenizer that is conducting an overall
00102 tokenization of the reader. The tokenizer will typically have a character
00103 state table that decides which state to use, depending on an initial
00104 character. If a single character is insufficient, a state such
00105 as <code>SlashState</code> will read a second character, and may delegate
00106 to another state, such as <code>SlashStarState</code>. This prospect
00107 of delegation is the reason that the <code>nextToken()</code> method has a
00108 tokenizer argument.
00109 }
00110 TZTokenizerState = class (TObject)
00111 public
00112 function NextToken(Stream: TStream; FirstChar: Char;
00113 Tokenizer: TZTokenizer): TZToken; virtual; abstract;
00114 end;
00115
00116 {**
00117 A NumberState object returns a number from a reader. This
00118 state's idea of a number allows an optional, initial
00119 minus sign, followed by one or more digits. A decimal
00120 point and another string of digits may follow these digits.
00121 }
00122 TZNumberState = class (TZTokenizerState)
00123 public
00124 function NextToken(Stream: TStream; FirstChar: Char;
00125 Tokenizer: TZTokenizer): TZToken; override;
00126 end;
00127
00128 {**
00129 A quoteState returns a quoted string token from a reader.
00130 This state will collect characters until it sees a match
00131 to the character that the tokenizer used to switch to
00132 this state. For example, if a tokenizer uses a double-
00133 quote character to enter this state, then <code>
00134 nextToken()</code> will search for another double-quote
00135 until it finds one or finds the end of the reader.
00136 }
00137 TZQuoteState = class (TZTokenizerState)
00138 public
00139 function NextToken(Stream: TStream; FirstChar: Char;
00140 Tokenizer: TZTokenizer): TZToken; override;
00141
00142 function EncodeString(const Value: string; QuoteChar: Char): string; virtual;
00143 function DecodeString(const Value: string; QuoteChar: Char): string; virtual;
00144 end;
00145
00146 {**
00147 A CommentState object returns a comment from a reader.
00148 }
00149 TZCommentState = class (TZTokenizerState)
00150 public
00151 function NextToken(Stream: TStream; FirstChar: Char;
00152 Tokenizer: TZTokenizer): TZToken; override;
00153 end;
00154
00155 {**
00156 This state will either delegate to a comment-handling
00157 state, or return a token with just a slash in it.
00158 }
00159 TZCppCommentState = class (TZCommentState)
00160 protected
00161 function GetMultiLineComment(Stream: TStream): string;
00162 function GetSingleLineComment(Stream: TStream): string;
00163 public
00164 function NextToken(Stream: TStream; FirstChar: Char;
00165 Tokenizer: TZTokenizer): TZToken; override;
00166 end;
00167
00168 {**
00169 This state will either delegate to a comment-handling
00170 state, or return a token with just a slash in it.
00171 }
00172 TZCCommentState = class (TZCppCommentState)
00173 public
00174 function NextToken(Stream: TStream; FirstChar: Char;
00175 Tokenizer: TZTokenizer): TZToken; override;
00176 end;
00177
00178 {*Fix for C++ Builder hpp generation bug - #817612 *}
00179 (*$HPPEMIT 'namespace Ztokenizer {class DELPHICLASS TZSymbolNode;}' *)
00180 // Forward declaration
00181 TZSymbolNode = class;
00182 TZSymbolNodeArray = array of TZSymbolNode;
00183
00184 {**
00185 A <code>SymbolNode</code> object is a member of a tree that
00186 contains all possible prefixes of allowable symbols. Multi-
00187 character symbols appear in a <code>SymbolNode</code> tree
00188 with one node for each character.
00189
00190 For example, the symbol <code>=:~</code> will appear in a
00191 tree as three nodes. The first node contains an equals sign,
00192 and has a child; that child contains a colon and has a
00193 child; this third child contains a tilde, and has no
00194 children of its own. If the colon node had another child
00195 for a dollar sign character, then the tree would contain
00196 the symbol <code>=:$</code>.
00197
00198 A tree of <code>SymbolNode</code> objects collaborate to
00199 read a (potentially multi-character) symbol from an input
00200 stream. A root node with no character of its own finds an
00201 initial node that represents the first character in the
00202 input. This node looks to see if the next character in the
00203 stream matches one of its children. If so, the node
00204 delegates its reading task to its child. This approach
00205 walks down the tree, pulling symbols from the input that
00206 match the path down the tree.
00207
00208 When a node does not have a child that matches the next
00209 character, we will have read the longest possible symbol
00210 prefix. This prefix may or may not be a valid symbol.
00211 Consider a tree that has had <code>=:~</code> added and has
00212 not had <code>=:</code> added. In this tree, of the three
00213 nodes that contain <code>=:~</code>, only the first and
00214 third contain complete symbols. If, say, the input contains
00215 <code>=:a</code>, the colon node will not have a child that
00216 matches the 'a' and so it will stop reading. The colon node
00217 has to "unread": it must push back its character, and ask
00218 its parent to unread. Unreading continues until it reaches
00219 an ancestor that represents a valid symbol.
00220 }
00221 TZSymbolNode = class (TObject)
00222 private
00223 FCharacter: Char;
00224 FChildren: TZSymbolNodeArray;
00225 FValid: Boolean;
00226 FParent: TZSymbolNode;
00227 protected
00228 procedure AddDescendantLine(const Value: string);
00229 function DeepestRead(Stream: TStream): TZSymbolNode;
00230 function EnsureChildWithChar(Value: Char): TZSymbolNode;
00231 function FindChildWithChar(Value: Char): TZSymbolNode; virtual;
00232 function FindDescendant(const Value: string): TZSymbolNode;
00233 function UnreadToValid(Stream: TStream): TZSymbolNode;
00234
00235 property Children: TZSymbolNodeArray read FChildren write FChildren;
00236 property Character: Char read FCharacter write FCharacter;
00237 property Valid: Boolean read FValid write FValid;
00238 property Parent: TZSymbolNode read FParent write FParent;
00239 public
00240 constructor Create(Parent: TZSymbolNode; Character: Char);
00241 destructor Destroy; override;
00242
00243 function Ancestry: string; virtual;
00244 end;
00245
00246 {**
00247 This class is a special case of a <code>SymbolNode</code>. A
00248 <code>SymbolRootNode</code> object has no symbol of its
00249 own, but has children that represent all possible symbols.
00250 }
00251 TZSymbolRootNode = class (TZSymbolNode)
00252 protected
00253 function FindChildWithChar(Value: Char): TZSymbolNode; override;
00254 public
00255 constructor Create;
00256
00257 procedure Add(const Value: string);
00258 function Ancestry: string; override;
00259 function NextSymbol(Stream: TStream; FirstChar: Char): string;
00260 end;
00261
00262 {**
00263 The idea of a symbol is a character that stands on its
00264 own, such as an ampersand or a parenthesis. For example,
00265 when tokenizing the expression <code>(isReady)&
00266 (isWilling) </code>, a typical tokenizer would return 7
00267 tokens, including one for each parenthesis and one for
00268 the ampersand. Thus a series of symbols such as
00269 <code>)&( </code> becomes three tokens, while a series
00270 of letters such as <code>isReady</code> becomes a single
00271 word token.
00272 <p>
00273 Multi-character symbols are an exception to the rule
00274 that a symbol is a standalone character. For example, a
00275 tokenizer may want less-than-or-equals to tokenize as a
00276 single token. This class provides a method for
00277 establishing which multi-character symbols an object of
00278 this class should treat as single symbols. This allows,
00279 for example, <code>"cat <= dog"</code> to tokenize as
00280 three tokens, rather than splitting the less-than and
00281 equals symbols into separate tokens.
00282 <p>
00283 By default, this state recognizes the following multi-
00284 character symbols: <code>!=, :-, <=, >=</code>
00285 }
00286 TZSymbolState = class (TZTokenizerState)
00287 private
00288 FSymbols: TZSymbolRootNode;
00289 protected
00290 property Symbols: TZSymbolRootNode read FSymbols write FSymbols;
00291 public
00292 constructor Create;
00293 destructor Destroy; override;
00294
00295 function NextToken(Stream: TStream; FirstChar: Char;
00296 Tokenizer: TZTokenizer): TZToken; override;
00297 procedure Add(const Value: string); virtual;
00298 end;
00299
00300 {**
00301 A whitespace state ignores whitespace (such as blanks
00302 and tabs), and returns the tokenizer's next token. By
00303 default, all characters from 0 to 32 are whitespace.
00304 }
00305 TZWhitespaceState = class (TZTokenizerState)
00306 private
00307 FWhitespaceChars: array[0..255] of Boolean;
00308 public
00309 constructor Create;
00310
00311 function NextToken(Stream: TStream; FirstChar: Char;
00312 Tokenizer: TZTokenizer): TZToken; override;
00313 procedure SetWhitespaceChars(FromChar: Char; ToChar: Char; Enable: Boolean);
00314 end;
00315
00316 {**
00317 A wordState returns a word from a reader. Like other
00318 states, a tokenizer transfers the job of reading to this
00319 state, depending on an initial character. Thus, the
00320 tokenizer decides which characters may begin a word, and
00321 this state determines which characters may appear as a
00322 second or later character in a word. These are typically
00323 different sets of characters; in particular, it is typical
00324 for digits to appear as parts of a word, but not as the
00325 initial character of a word.
00326 <p>
00327 By default, the following characters may appear in a word.
00328 The method <code>setWordChars()</code> allows customizing
00329 this.
00330 <blockquote><pre>
00331 From To
00332 'a', 'z'
00333 'A', 'Z'
00334 '0', '9'
00335
00336 as well as: minus sign, underscore, and apostrophe.
00337 </pre></blockquote>
00338 }
00339 TZWordState = class (TZTokenizerState)
00340 private
00341 FWordChars: array[0..255] of Boolean;
00342 public
00343 constructor Create;
00344
00345 function NextToken(Stream: TStream; FirstChar: Char;
00346 Tokenizer: TZTokenizer): TZToken; override;
00347 procedure SetWordChars(FromChar: Char; ToChar: Char; Enable: Boolean);
00348 end;
00349
00350 {**
00351 A tokenizer divides a string into tokens. This class is
00352 highly customizable with regard to exactly how this division
00353 occurs, but it also has defaults that are suitable for many
00354 languages. This class assumes that the character values read
00355 from the string lie in the range 0-255. For example, the
00356 Unicode value of a capital A is 65, so
00357 <code> System.out.println((char)65); </code> prints out a
00358 capital A.
00359 <p>
00360 The behavior of a tokenizer depends on its character state
00361 table. This table is an array of 256 <code>TokenizerState
00362 </code> states. The state table decides which state to
00363 enter upon reading a character from the input string.
00364 <p>
00365 For example, by default, upon reading an 'A', a tokenizer
00366 will enter a "word" state. This means the tokenizer will
00367 ask a <code>WordState</code> object to consume the 'A',
00368 along with the characters after the 'A' that form a word.
00369 The state's responsibility is to consume characters and
00370 return a complete token.
00371 <p>
00372 The default table sets a SymbolState for every character
00373 from 0 to 255, and then overrides this with:
00374 <blockquote><pre>
00375 From To State
00376 0 ' ' whitespaceState
00377 'a' 'z' wordState
00378 'A' 'Z' wordState
00379 160 255 wordState
00380 '0' '9' numberState
00381 '-' '-' numberState
00382 '.' '.' numberState
00383 '"' '"' quoteState
00384 '\'' '\'' quoteState
00385 '/' '/' slashState
00386 </pre></blockquote>
00387 In addition to allowing modification of the state table,
00388 this class makes each of the states above available. Some
00389 of these states are customizable. For example, wordState
00390 allows customization of what characters can be part of a
00391 word, after the first character.
00392 }
00393 IZTokenizer = interface (IZInterface)
00394 ['{C7CF190B-C45B-4AB4-A406-5999643DF6A0}']
00395
00396 function TokenizeBufferToList(const Buffer: string; Options: TZTokenOptions):
00397 TStrings;
00398 function TokenizeStreamToList(Stream: TStream; Options: TZTokenOptions):
00399 TStrings;
00400
00401 function TokenizeBuffer(const Buffer: string; Options: TZTokenOptions):
00402 TZTokenDynArray;
00403 function TokenizeStream(Stream: TStream; Options: TZTokenOptions):
00404 TZTokenDynArray;
00405
00406 function GetCommentState: TZCommentState;
00407 function GetNumberState: TZNumberState;
00408 function GetQuoteState: TZQuoteState;
00409 function GetSymbolState: TZSymbolState;
00410 function GetWhitespaceState: TZWhitespaceState;
00411 function GetWordState: TZWordState;
00412 end;
00413
00414 {** Implements a default tokenizer object. }
00415 TZTokenizer = class (TZAbstractObject, IZTokenizer)
00416 private
00417 FCharacterStates: array[0..255] of TZTokenizerState;
00418 FCommentState: TZCommentState;
00419 FNumberState: TZNumberState;
00420 FQuoteState: TZQuoteState;
00421 FSymbolState: TZSymbolState;
00422 FWhitespaceState: TZWhitespaceState;
00423 FWordState: TZWordState;
00424 public
00425 constructor Create;
00426 destructor Destroy; override;
00427
00428 function TokenizeBufferToList(const Buffer: string; Options: TZTokenOptions):
00429 TStrings;
00430 function TokenizeStreamToList(Stream: TStream; Options: TZTokenOptions):
00431 TStrings;
00432
00433 function TokenizeBuffer(const Buffer: string; Options: TZTokenOptions):
00434 TZTokenDynArray;
00435 function TokenizeStream(Stream: TStream; Options: TZTokenOptions):
00436 TZTokenDynArray;
00437
00438 function GetCharacterState(StartChar: Char): TZTokenizerState;
00439 procedure SetCharacterState(FromChar, ToChar: Char; State: TZTokenizerState);
00440
00441 function GetCommentState: TZCommentState;
00442 function GetNumberState: TZNumberState;
00443 function GetQuoteState: TZQuoteState;
00444 function GetSymbolState: TZSymbolState;
00445 function GetWhitespaceState: TZWhitespaceState;
00446 function GetWordState: TZWordState;
00447
00448 property CommentState: TZCommentState read FCommentState write FCommentState;
00449 property NumberState: TZNumberState read FNumberState write FNumberState;
00450 property QuoteState: TZQuoteState read FQuoteState write FQuoteState;
00451 property SymbolState: TZSymbolState read FSymbolState write FSymbolState;
00452 property WhitespaceState: TZWhitespaceState read FWhitespaceState
00453 write FWhitespaceState;
00454 property WordState: TZWordState read FWordState write FWordState;
00455 end;
00456
00457 implementation
00458
00459 uses
00460 Math, ZCompatibility;
00461
00462 { TZNumberState }
00463
00464 {**
00465 Return a number token from a reader.
00466 @return a number token from a reader
00467 }
00468 function TZNumberState.NextToken(Stream: TStream; FirstChar: Char;
00469 Tokenizer: TZTokenizer): TZToken;
00470 var
00471 ReadNum: Integer;
00472 AbsorbedLeadingMinus: Boolean;
00473 AbsorbedDot: Boolean;
00474 GotAdigit: Boolean;
00475
00476 function AbsorbDigits: string;
00477 begin
00478 Result := '';
00479 while FirstChar in ['0'..'9'] do
00480 begin
00481 GotAdigit := True;
00482 Result := Result + FirstChar;
00483 ReadNum := Stream.Read(FirstChar, 1);
00484 if ReadNum = 0 then
00485 Break;
00486 end;
00487 end;
00488
00489 begin
00490 { Initializes the process. }
00491 ReadNum := 0;
00492 AbsorbedLeadingMinus := False;
00493 AbsorbedDot := False;
00494 GotAdigit := False;
00495
00496 Result.TokenType := ttUnknown;
00497 Result.Value := '';
00498
00499 { Parses left part of the number. }
00500 if FirstChar = '-' then
00501 begin
00502 ReadNum := Stream.Read(FirstChar, 1);
00503 Result.Value := '-';
00504 AbsorbedLeadingMinus := True;
00505 end;
00506 Result.Value := Result.Value + AbsorbDigits;
00507
00508 { Parses right part of the number. }
00509 if FirstChar = '.' then
00510 begin
00511 AbsorbedDot := True;
00512 Result.Value := Result.Value + '.';
00513 ReadNum := Stream.Read(FirstChar, 1);
00514 if ReadNum > 0 then
00515 Result.Value := Result.Value + AbsorbDigits;
00516 end;
00517
00518 { Pushback wrong symbols. }
00519 Stream.Seek(-ReadNum, soFromCurrent);
00520
00521 { Gets a token result. }
00522 if not GotAdigit then
00523 begin
00524 if AbsorbedLeadingMinus and AbsorbedDot then
00525 begin
00526 Stream.Seek(-1, soFromCurrent);
00527 if Tokenizer.SymbolState <> nil then
00528 Result := Tokenizer.SymbolState.NextToken(Stream, '-', Tokenizer);
00529 end
00530 else if AbsorbedLeadingMinus then
00531 begin
00532 if Tokenizer.SymbolState <> nil then
00533 Result := Tokenizer.SymbolState.NextToken(Stream, '-', Tokenizer)
00534 end
00535 else if AbsorbedDot then
00536 begin
00537 if Tokenizer.SymbolState <> nil then
00538 Result := Tokenizer.SymbolState.NextToken(Stream, '.', Tokenizer);
00539 end;
00540 end
00541 else
00542 begin
00543 if AbsorbedDot then
00544 Result.TokenType := ttFloat
00545 else Result.TokenType := ttInteger;
00546 end;
00547 end;
00548
00549 { TZQuoteState }
00550
00551 {**
00552 Return a quoted string token from a reader. This method
00553 will collect characters until it sees a match to the
00554 character that the tokenizer used to switch to this state.
00555
00556 @return a quoted string token from a reader
00557 }
00558 function TZQuoteState.NextToken(Stream: TStream; FirstChar: Char;
00559 Tokenizer: TZTokenizer): TZToken;
00560 var
00561 TempChar: Char;
00562 TempStr: string;
00563 begin
00564 TempStr := FirstChar;
00565 repeat
00566 if Stream.Read(TempChar, 1) = 0 then
00567 TempChar := FirstChar;
00568 TempStr := TempStr + TempChar;
00569 until TempChar = FirstChar;
00570
00571 Result.TokenType := ttQuoted;
00572 Result.Value := TempStr;
00573 end;
00574
00575 {**
00576 Encodes a string value.
00577 @param Value a string value to be encoded.
00578 @param QuoteChar a string quote character.
00579 @returns an encoded string.
00580 }
00581 function TZQuoteState.EncodeString(const Value: string; QuoteChar: Char): string;
00582 begin
00583 Result := QuoteChar + Value + QuoteChar;
00584 end;
00585
00586 {**
00587 Decodes a string value.
00588 @param Value a string value to be decoded.
00589 @param QuoteChar a string quote character.
00590 @returns an decoded string.
00591 }
00592 function TZQuoteState.DecodeString(const Value: string; QuoteChar: Char): string;
00593 begin
00594 if (Length(Value) >= 2) and (Value[1] = QuoteChar)
00595 and (Value[Length(Value)] = Value[1]) then
00596 Result := Copy(Value, 2, Length(Value) - 2)
00597 else Result := Value;
00598 end;
00599
00600 { TZBasicCommentState }
00601
00602 {**
00603 Either delegate to a comment-handling state, or return a
00604 token with just a slash in it.
00605
00606 @return either just a slash token, or the results of
00607 delegating to a comment-handling state
00608 }
00609 function TZCommentState.NextToken(Stream: TStream; FirstChar: Char;
00610 Tokenizer: TZTokenizer): TZToken;
00611 var
00612 ReadChar: Char;
00613 ReadStr: string;
00614 begin
00615 ReadStr := FirstChar;
00616 while (Stream.Read(ReadChar, 1) > 0) and not (ReadChar in [#10, #13]) do
00617 ReadStr := ReadStr + ReadChar;
00618
00619 if ReadChar in [#10, #13] then
00620 Stream.Seek(-1, soFromCurrent);
00621
00622 Result.TokenType := ttComment;
00623 Result.Value := ReadStr;
00624 end;
00625
00626 { TZCppCommentState }
00627
00628 {**
00629 Ignore everything up to a closing star and slash, and
00630 then return the tokenizer's next token.
00631 @return the tokenizer's next token
00632 }
00633 function TZCppCommentState.GetMultiLineComment(Stream: TStream): string;
00634 var
00635 ReadChar, LastChar: Char;
00636 begin
00637 LastChar := #0;
00638 Result := '';
00639 while Stream.Read(ReadChar, 1) > 0 do
00640 begin
00641 Result := Result + ReadChar;
00642 if (LastChar = '*') and (ReadChar = '/') then
00643 Break;
00644 LastChar := ReadChar;
00645 end;
00646 end;
00647
00648 {**
00649 Ignore everything up to an end-of-line and return the tokenizer's next token.
00650 @return the tokenizer's next token
00651 }
00652 function TZCppCommentState.GetSingleLineComment(Stream: TStream): string;
00653 var
00654 ReadChar: Char;
00655 begin
00656 Result := '';
00657 while (Stream.Read(ReadChar, 1) > 0) and not (ReadChar in [#10, #13]) do
00658 Result := Result + ReadChar;
00659
00660
00661
00662 if ReadChar in [#10, #13] then
00663 begin
00664 Result := Result + ReadChar;
00665 if (Stream.Read(ReadChar, 1) > 0) then
00666 if (ReadChar in [#10, #13]) then
00667 Result := Result + ReadChar
00668 else
00669 Stream.Seek(-1, soFromCurrent);
00670 end;
00671 end;
00672
00673 {**
00674 Either delegate to a comment-handling state, or return a
00675 token with just a slash in it.
00676
00677 @return either just a slash token, or the results of
00678 delegating to a comment-handling state
00679 }
00680 function TZCppCommentState.NextToken(Stream: TStream; FirstChar: Char;
00681 Tokenizer: TZTokenizer): TZToken;
00682 var
00683 ReadChar: Char;
00684 ReadNum: Integer;
00685 begin
00686 Result.TokenType := ttUnknown;
00687 Result.Value := FirstChar;
00688
00689 ReadNum := Stream.Read(ReadChar, 1);
00690 if (ReadNum > 0) and (ReadChar = '*') then
00691 begin
00692 Result.TokenType := ttComment;
00693 Result.Value := '
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712 .
00713 @return either just a slash token, or the results of
00714 delegating to a comment-handling state
00715 }
00716 function TZCCommentState.NextToken(Stream: TStream; FirstChar: Char;
00717 Tokenizer: TZTokenizer): TZToken;
00718 var
00719 ReadChar: Char;
00720 ReadNum: Integer;
00721 begin
00722 Result.TokenType := ttUnknown;
00723 Result.Value := FirstChar;
00724
00725 if FirstChar = '/' then
00726 begin
00727 ReadNum := Stream.Read(ReadChar, 1);
00728 if (ReadNum > 0) and (ReadChar = '*') then
00729 begin
00730 Result.TokenType := ttComment;
00731 Result.Value := '
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393