diff --git a/.svnignore b/.svnignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/.svnignore @@ -0,0 +1 @@ +build diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1035510 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +THE SHANNON PROJECT LICENSE AGREEMENT AND DISCLAIMER + +The Shannon Project +Copyright (c) 2009-2010 Hovik Melikyan + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/SYNTAX b/SYNTAX new file mode 100644 index 0000000..27c1cbb --- /dev/null +++ b/SYNTAX @@ -0,0 +1,158 @@ + +# +# BNF for Shannon: +# repeat 0 or more times: {...} +# optional: [...] +# group: (...) +# OR: | +# literal: "..." or '...' +# any word: reference to another syntactic element +# glue: ## (otherwise elements are separated by whitespace or can be +# distinguished by the parser) +# is one or more new lines and/or semicolons +# Comments are like in C++ /* */ and // +# + + +# ---------- Statements + +program ::= { statement } + +statement ::= definition | var-def | sub-block | builtin | assignment | fifo-push | + if-block | case-block | while-block | for-block | 'break' | 'continue' | + return | delete | insert + +block ::= single-block | multi-block + +single-block ::= ':' [ ] statement + +multi-block ::= [ ] '{' [ ] { statement } '}' + +definition ::= 'def' [ type-expr ] ident { type-derivator } '=' const-expr + +var-def ::= 'var' [ type-expr ] ident { type-derivator } '=' expr + +sub-block ::= 'begin' block + +builtin ::= assertion | dump | exit + +assertion ::= 'assert' expr + +dump ::= 'dump' expr { ',' expr } + +exit ::= 'exit' expr + +assignment ::= designator [ assignment-opr expr ] + +assignment-opr = '=' | '+=' | '-=' | '*= ' | '/=' | '%=' | '|=' + +fifo-push ::= expr '<<' expr { '<<' expr } + +if-block ::= 'if' expr block { 'elif' expr block } [ 'else' block ] + +case-block ::= 'case' expr '{' case-label { case-label } [ 'default' block ] '}' + +case-label ::= case-range { ',' case-range } block + +case-range ::= expr [ '..' expr ] + +while-block ::= 'while' expr block + +for-block ::= 'for' ident [ ',' ident ] '=' expr [ '..' expr ] block + +return ::= 'return' [ expr ] + +delete ::= 'del' designator + +insert ::= 'ins' designator '=' expr + + +# ---------- Const Expression + +const-expr ::= subrange-type | enum-type | expr + +subrange-type ::= expr '..' expr + +enum-type ::= '(' ident { ',' ident } ')' -- this is not correct + +type-expr ::= const-expr + + +# ---------- Expression + +expr ::= and-level { ( 'or | 'xor' ) and-level } + +and-level ::= not-level { ( 'and' | 'shl' | 'shr' ) not-level } + +not-level ::= [ 'not' ] relation + +relation ::= arithm-expr [ 'in' in-expr | + ( '==' | '!=' | '<' | '<=' | '>' | '>=' ) arithm-expr ] + +in-expr ::= arithm-expr [ '..' arithm-expr ] + +arithm-expr ::= term { ( '+' | '-' ) term } + +term ::= cat-expr { ( '*' | '/' | '%' ) cat-expr } + +cat-expr ::= factor { '|' factor } + +factor ::= [ '-' ] designator [ '?' ] [ ( 'as' | 'is' ) type-expr ] + +designator ::= [ '@' ] atom { '.' ident | '[' index-expr ']' | + '(' [ actual-args ] ')' } + +atom ::= ident | number | string-literal | vec-ctor | dict-ctor | + fifo-ctor | range-ctor | if-func | typeof | type-spec | 'this' | '(' expr ')' + +vec-ctor ::= '[' [ expr { ',' expr } ] ']' + +dict-ctor ::= '{' [ dict-elem { ',' dict-elem } ] '}' + +dict-elem ::= expr [ '=' expr ] + +fifo-ctor ::= '<' [ expr { ',' expr } ] '>' + +range-ctor ::= '[' expr '..' expr ']' + +if-func ::= 'if' '(' expr ',' expr ',' expr ')' + +typeof ::= 'typeof' designator + +type-spec ::= ident { '*' type-derivator { type-derivator } } + +type-derivator ::= container-derivator | prototype-derivator | + state-derivator | '[..]' | '<>' + +container-derivator ::= '[' [ const-expr ] ']' + +prototype-derivator ::= formal-args + +state-derivator ::= formal-args multi-block + +formal-args ::= '(' formal-arg { ',' formal-arg } ')' + +formal-arg ::= type-expr [ ident ] [ '=' const-expr ] + +index-expr ::= expr [ '..' [ expr ] ] + +actual-args ::= expr { ',' [ expr ] } + + +# ---------- Basic elements + +ident ::= ( letter | "_" ) ## { letter | digit | "_" } + +number ::= decimal | hexadecimal + +decimal ::= digit ## { digit } + +hexadecimal ::= "0x" ## hexdigit ## { hexdigit } + +string-literal ::= "'" ## { string-element } ## "'" + +string-element ::= printable-char | string-escape + +string-escape ::= "\\" | "\t" | "\r" | "\n" | "\'" + | ( "\x" ## hexdigit ## hexdigit ) + diff --git a/Shannon.xcodeproj/.svnignore b/Shannon.xcodeproj/.svnignore new file mode 100644 index 0000000..0c1632b --- /dev/null +++ b/Shannon.xcodeproj/.svnignore @@ -0,0 +1 @@ +hovik.* diff --git a/Shannon.xcodeproj/MyXCode32.pbxkeys b/Shannon.xcodeproj/MyXCode32.pbxkeys new file mode 100644 index 0000000..7f7c5e4 --- /dev/null +++ b/Shannon.xcodeproj/MyXCode32.pbxkeys @@ -0,0 +1,530 @@ + + + + + menu + + $@A + buildAndAnalyze: + $@B + showBuildResults: + $@C + showClassBrowser: + $@D + openQuicklyAction: + $@E + toggleProjectWindowEditor: + $@F + showFindTab: + $@G + findPrevious: + $@H + toggleBuildBubblesShown: + $@J + refactor: + $@K + cleanBuild: + $@M + showBookmarksSmartGroup: + $@N + newProject: + $@R + showConsole: + $@S + saveDocumentAs: + $@W + closeCurrentFile: + $@Y + showDebugger: + $@Z + redo: + $ + stopGoSession: + @ + buildAndGo: + @+ + previousBuildResult: + @, + showPreferences: + @0 + showProject: + @: + showGuessPanel: + @; + checkSpelling: + @= + nextBuildResult: + @? + showXcodeHelp: + @[ + PBX_nestLeft: + @\\ + toggleAddToBreakpoints: + @\^ + PBX_toggleShowsControlCharacters: + @] + PBX_nestRight: + @a + selectAll: + @c + copy: + @d + addToBookmarks: + @e + enterSelection: + @f + showIncrementalFindBar: + @g + findNext: + @h + hide: + @i + getInfo: + @j + centerSelectionInVisibleArea: + @k + compile: + @l + PBX_showGotoPanel: + @m + performMiniaturize: + @n + newFile: + @o + openDocument: + @p + printDocument: + @q + terminate: + @s + saveDocument: + @t + orderFrontFontPanel: + @u + revertDocumentToSaved: + @v + paste: + @w + performClose: + @x + cut: + @z + undo: + @{ + alignLeft: + @| + alignCenter: + @} + alignRight: + @ + jumpToSelectionDefinition: + ^$@B + sendToBack: + ^$@F + bringToFront: + ^$@L + unlock: + ^$@ + foldAllComments: + ^$@ + unfoldAllComments: + ^. + nextCompletion: + ^/ + completionPlaceholderSelect: + ^@+ + zoomIn: + ^@- + zoomOut: + ^@= + zoomToFit: + ^@? + showResearchAssistant: + ^@\\ + debugTaskToggleEnableBreakpoints: + ^@a + addAttribute: + ^@b + toggleModelBrowser: + ^@c + copyRuler: + ^@e + enterSelectionForReplace: + ^@f + showIncrementalFindAndReplaceBar: + ^@g + _layoutGraphicsUsingForceDirectedGraphLayouter: + ^@h + _layoutGraphicsUsingHierarchicalGraphLayouter: + ^@l + lock: + ^@n + newFileDocument: + ^@o + showBinderWindow: + ^@r + addRelationship: + ^@s + makeSnapshotFromMainMenu: + ^@t + toggleTokenizedEditing: + ^@v + pasteRuler: + ^@w + closeProject: + ^@ + foldAllMethods: + ^@ + unfoldAllMethods: + ^@ + fold: + ^@ + unfold: + ^@ + debugTaskShowHUD: + ^u + unfoldAll: + ^~@f + toggleCodeFocus: + ^~@r + debugTaskClearLogs: + ^~@ + debugTaskNextThread: + ^~@ + debugTaskPreviousThread: + ^ + debugTaskPause: + ^ + debugTaskStepOut: + ~ + complete: + ~$@D + openThisQuicklyAction: + ~$@E + toggleProjectWindowEditorFully: + ~$@G + ungroup: + ~$@I + debugTaskStepInstruction: + ~$@O + debugTaskStepOverInstruction: + ~$@S + saveDocumentTo: + ~$@T + revealCurrentFileInGroupTree: + ~$@V + pasteAsPlainText: + ~$@ + previousFile: + ~$@ + nextFile: + ~@? + showDocumentationWindow: + ~@\\ + toggleEnableBreakpoint: + ~@a + addFiles: + ~@b + showBreakpoints: + ~@c + copyFont: + ~@e + editActiveTarget: + ~@f + showDetail: + ~@g + group: + ~@h + hideOtherApplications: + ~@i + showInspector: + ~@m + miniaturizeAll: + ~@n + newGroup: + ~@o + openInSeparateNavigator: + ~@r + toggleGoBreakpointsOff: + ~@s + saveAllDocuments: + ~@t + orderFrontCharacterPalette: + ~@v + pasteFont: + ~@w + closeAll: + ~@x + editActiveExecutable: + ~@y + toggleGoBreakpointsOn: + ~@ + switchToCounterpartOfCurrentFile: + ~@ + previousBookmark: + ~@ + nextBookmark: + ~ + buildAndGoBreakpointsOff: + + buildAndGoBreakpointsOn: + + debugTaskStepInto: + + debugTaskStepOver: + + toggleBuild: + + text + +  + insertNewline: +  + deleteBackward: + + insertTab: + + + insertNewline: + + insertNewline: +  + insertBacktab: + $@ + moveToLeftEndOfLineAndModifySelection: + $@ + moveToRightEndOfLineAndModifySelection: + $ + moveUpAndModifySelection: + $ + moveDownAndModifySelection: + $ + moveLeftAndModifySelection: + $ + moveRightAndModifySelection: + $ + moveToBeginningOfLineAndModifySelection: + $ + moveToEndOfLineAndModifySelection: + $ + pageUpAndModifySelection: + $ + pageDownAndModifySelection: + @" + splitCurrentNavigator: + @' + closeCurrentNavigator: + @ + deleteToBeginningOfLine: + @ + moveToLeftEndOfLine: + @ + moveToRightEndOfLine: + ^ + insertLineBreak: + ^ + selectNextKeyView: + ^ + + insertLineBreak: + ^ + insertLineBreak: + ^ + selectPreviousKeyView: + ^" + insertDoubleQuoteIgnoringSubstitution: + ^$A + moveToBeginningOfParagraphAndModifySelection: + ^$B + moveBackwardAndModifySelection: + ^$E + moveToEndOfParagraphAndModifySelection: + ^$F + moveForwardAndModifySelection: + ^$N + moveDownAndModifySelection: + ^$P + moveUpAndModifySelection: + ^$V + pageDownAndModifySelection: + ^$ + moveSubWordBackwardAndModifySelection: + ^$ + moveSubWordForwardAndModifySelection: + ^$ + moveToBeginningOfDocumentAndModifySelection: + ^$ + moveToEndOfDocumentAndModifySelection: + ^' + insertSingleQuoteIgnoringSubstitution: + ^/ + insertRightToLeftSlash: + ^1 + popLoadedFilesPopUp: + ^2 + popSymbolsPopUp: + ^3 + popIncludedHeadersPopUp: + ^4 + popBookmarksPopUp: + ^5 + popBreakpointsPopUp: + ^6 + popNavigatorPopUp: + ^> + previousCompletion: + ^? + completionPlaceholderSelectPrevious: + ^@ + makeBaseWritingDirectionNatural: + ^@ + makeBaseWritingDirectionRightToLeft: + ^@ + makeBaseWritingDirectionLeftToRight: + ^\ + setMark: + ^\@ + setMark: + ^a + moveToBeginningOfParagraph: + ^b + moveBackward: + ^c + PBX_interrupt: + ^d + deleteForward: + ^e + moveToEndOfParagraph: + ^f + moveForward: + ^h + deleteBackward: + ^i + indentSelection: + ^k + deleteToEndOfParagraph: + ^l + centerSelectionInVisibleArea: + ^n + moveDown: + ^p + moveUp: + ^r + PBX_executeSelectionInsertingOutput: + ^t + transpose: + ^v + pageDown: + ^w + deleteToMark: + ^x + + ^m + selectToMark: + ^x + swapWithMark: + + ^y + yank: + ^~$B + moveWordBackwardAndModifySelection: + ^~$F + moveWordForwardAndModifySelection: + ^~$r + PBX_executeSelectionAlertOutput: + ^~1 + popLoadedFilesPopUp: + ^~2 + popSymbolsPopUp: + ^~@ + makeTextWritingDirectionNatural: + ^~@ + makeTextWritingDirectionRightToLeft: + ^~@ + makeTextWritingDirectionLeftToRight: + ^~b + moveWordBackward: + ^~f + moveWordForward: + ^~r + PBX_executeSelectionReplacingOutput: + ^~ + deleteWordBackward: + ^ + deleteSubWordBackward: + ^ + scrollLineUp: + ^ + scrollLineDown: + ^ + moveSubWordBackward: + ^ + moveSubWordForward: + ^ + deleteSubWordForward: + ^ + moveToBeginningOfDocument: + ^ + moveToEndOfDocument: + ^ + scrollPageUp: + ^ + scrollPageDown: + ~ + insertNewlineIgnoringFieldEditor: + ~ + deleteWordBackward: + ~ + insertTabIgnoringFieldEditor: + ~ + + insertNewlineIgnoringFieldEditor: + ~ + insertNewlineIgnoringFieldEditor: + ~$ + moveParagraphBackwardAndModifySelection: + ~$ + moveParagraphForwardAndModifySelection: + ~$ + moveWordLeftAndModifySelection: + ~$ + moveWordRightAndModifySelection: + ~@" + splitCurrentNavigatorHorizontally: + ~ + deleteWordBackward: + ~ + moveWordLeft: + ~ + moveWordRight: + ~ + deleteWordForward: + ~ + pageUp: + ~ + pageDown: +  + deleteBackward: + + moveUp: + + moveDown: + + moveLeft: + + moveRight: + + deleteForward: + + moveToBeginningOfLine: + + moveToEndOfLine: + + pageUp: + + pageDown: + + delete: + + + diff --git a/Shannon.xcodeproj/project.pbxproj b/Shannon.xcodeproj/project.pbxproj new file mode 100644 index 0000000..064e940 --- /dev/null +++ b/Shannon.xcodeproj/project.pbxproj @@ -0,0 +1,459 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 363C70F70EF70F4400866D2C /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 363C70ED0EF70F4400866D2C /* main.cpp */; }; + 363C710D0EF70FBE00866D2C /* main-ut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 363C710C0EF70FBE00866D2C /* main-ut.cpp */; }; + 3646492C11CF89A500853339 /* vminfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3646492B11CF89A500853339 /* vminfo.cpp */; }; + 3646492D11CF89A500853339 /* vminfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3646492B11CF89A500853339 /* vminfo.cpp */; }; + 365B4A71100283E40023D0FD /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 365B4A70100283E40023D0FD /* common.cpp */; }; + 365B4A72100283E40023D0FD /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 365B4A70100283E40023D0FD /* common.cpp */; }; + 3698665C12A322D50028FB7A /* sysmodule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3698665A12A322D50028FB7A /* sysmodule.cpp */; }; + 3698665D12A322D50028FB7A /* sysmodule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3698665A12A322D50028FB7A /* sysmodule.cpp */; }; + 369DE917119447A600BBD287 /* runtime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 369DE915119447A600BBD287 /* runtime.cpp */; }; + 369DE918119447A600BBD287 /* runtime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 369DE915119447A600BBD287 /* runtime.cpp */; }; + 36A45F4F11D68D3E0010EBAC /* compexpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36A45F4E11D68D3E0010EBAC /* compexpr.cpp */; }; + 36A45F5011D68D3E0010EBAC /* compexpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36A45F4E11D68D3E0010EBAC /* compexpr.cpp */; }; + 36B6ACEC1195E0A3001EF4C0 /* rtio.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6ACEB1195E0A3001EF4C0 /* rtio.cpp */; }; + 36B6ACED1195E0A3001EF4C0 /* rtio.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6ACEB1195E0A3001EF4C0 /* rtio.cpp */; }; + 36B6AD201195F0C7001EF4C0 /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD1E1195F0C7001EF4C0 /* parser.cpp */; }; + 36B6AD211195F0C7001EF4C0 /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD1E1195F0C7001EF4C0 /* parser.cpp */; }; + 36B6AD3C1195F863001EF4C0 /* typesys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD3A1195F863001EF4C0 /* typesys.cpp */; }; + 36B6AD3D1195F863001EF4C0 /* typesys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD3A1195F863001EF4C0 /* typesys.cpp */; }; + 36B6AD4811960432001EF4C0 /* vm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD4511960432001EF4C0 /* vm.cpp */; }; + 36B6AD4911960432001EF4C0 /* vmcodegen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD4711960432001EF4C0 /* vmcodegen.cpp */; }; + 36B6AD4A11960432001EF4C0 /* vm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD4511960432001EF4C0 /* vm.cpp */; }; + 36B6AD4B11960432001EF4C0 /* vmcodegen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD4711960432001EF4C0 /* vmcodegen.cpp */; }; + 36B6AD551196061B001EF4C0 /* compiler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD531196061B001EF4C0 /* compiler.cpp */; }; + 36B6AD561196061B001EF4C0 /* compiler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 36B6AD531196061B001EF4C0 /* compiler.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 363C71060EF70F6000866D2C /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 3609125811CA87BB0091B8AA /* test.shn */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = test.shn; path = src/tests/test.shn; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.shannon; }; + 361AF9621000E38D00E12EAE /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = common.h; path = src/common.h; sourceTree = ""; }; + 3626BA9E0EFAE7E2005644AF /* SYNTAX */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = SYNTAX; sourceTree = ""; }; + 362E29FD0F0D6C58005E05B4 /* TODO */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = TODO; path = src/TODO; sourceTree = ""; }; + 363C70ED0EF70F4400866D2C /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = src/main.cpp; sourceTree = ""; }; + 363C710A0EF70F6000866D2C /* Shannon */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Shannon; sourceTree = BUILT_PRODUCTS_DIR; }; + 363C710C0EF70FBE00866D2C /* main-ut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "main-ut.cpp"; path = "src/main-ut.cpp"; sourceTree = ""; }; + 3646492B11CF89A500853339 /* vminfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vminfo.cpp; path = src/vminfo.cpp; sourceTree = ""; }; + 36510CA511C9731500071AB4 /* shannon.syntax */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = shannon.syntax; path = src/shannon.syntax; sourceTree = ""; }; + 365B4A70100283E40023D0FD /* common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = common.cpp; path = src/common.cpp; sourceTree = ""; }; + 3698665A12A322D50028FB7A /* sysmodule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sysmodule.cpp; path = src/sysmodule.cpp; sourceTree = ""; }; + 3698665B12A322D50028FB7A /* sysmodule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sysmodule.h; path = src/sysmodule.h; sourceTree = ""; }; + 369DE915119447A600BBD287 /* runtime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = runtime.cpp; path = src/runtime.cpp; sourceTree = ""; }; + 369DE916119447A600BBD287 /* runtime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = runtime.h; path = src/runtime.h; sourceTree = ""; }; + 36A45F4E11D68D3E0010EBAC /* compexpr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compexpr.cpp; path = src/compexpr.cpp; sourceTree = ""; }; + 36A4D54012A86DA900B5D16C /* Shannon.xclangspec */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xclangspec; name = Shannon.xclangspec; path = src/Shannon.xclangspec; sourceTree = ""; }; + 36B6ACEB1195E0A3001EF4C0 /* rtio.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rtio.cpp; path = src/rtio.cpp; sourceTree = ""; }; + 36B6AD1E1195F0C7001EF4C0 /* parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parser.cpp; path = src/parser.cpp; sourceTree = ""; }; + 36B6AD1F1195F0C7001EF4C0 /* parser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = parser.h; path = src/parser.h; sourceTree = ""; }; + 36B6AD3A1195F863001EF4C0 /* typesys.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = typesys.cpp; path = src/typesys.cpp; sourceTree = ""; }; + 36B6AD3B1195F863001EF4C0 /* typesys.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = typesys.h; path = src/typesys.h; sourceTree = ""; }; + 36B6AD4511960432001EF4C0 /* vm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vm.cpp; path = src/vm.cpp; sourceTree = ""; }; + 36B6AD4611960432001EF4C0 /* vm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = vm.h; path = src/vm.h; sourceTree = ""; }; + 36B6AD4711960432001EF4C0 /* vmcodegen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vmcodegen.cpp; path = src/vmcodegen.cpp; sourceTree = ""; }; + 36B6AD531196061B001EF4C0 /* compiler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compiler.cpp; path = src/compiler.cpp; sourceTree = ""; }; + 36B6AD541196061B001EF4C0 /* compiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compiler.h; path = src/compiler.h; sourceTree = ""; }; + 36C8E962100EA94D00117341 /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = version.h; path = src/version.h; sourceTree = ""; }; + 8DD76F6C0486A84900D96B5E /* Shannon */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Shannon; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 363C71050EF70F6000866D2C /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* Shannon */ = { + isa = PBXGroup; + children = ( + 08FB7795FE84155DC02AAC07 /* Source */, + C6859E8C029090F304C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = Shannon; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 36C8E962100EA94D00117341 /* version.h */, + 361AF9621000E38D00E12EAE /* common.h */, + 365B4A70100283E40023D0FD /* common.cpp */, + 369DE916119447A600BBD287 /* runtime.h */, + 369DE915119447A600BBD287 /* runtime.cpp */, + 36B6ACEB1195E0A3001EF4C0 /* rtio.cpp */, + 36B6AD1F1195F0C7001EF4C0 /* parser.h */, + 36B6AD1E1195F0C7001EF4C0 /* parser.cpp */, + 36B6AD3B1195F863001EF4C0 /* typesys.h */, + 36B6AD3A1195F863001EF4C0 /* typesys.cpp */, + 3698665B12A322D50028FB7A /* sysmodule.h */, + 3698665A12A322D50028FB7A /* sysmodule.cpp */, + 36B6AD4611960432001EF4C0 /* vm.h */, + 36B6AD4511960432001EF4C0 /* vm.cpp */, + 36B6AD4711960432001EF4C0 /* vmcodegen.cpp */, + 3646492B11CF89A500853339 /* vminfo.cpp */, + 36B6AD541196061B001EF4C0 /* compiler.h */, + 36A45F4E11D68D3E0010EBAC /* compexpr.cpp */, + 36B6AD531196061B001EF4C0 /* compiler.cpp */, + 363C710C0EF70FBE00866D2C /* main-ut.cpp */, + 363C70ED0EF70F4400866D2C /* main.cpp */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* Shannon */, + 363C710A0EF70F6000866D2C /* Shannon */, + ); + name = Products; + sourceTree = ""; + }; + C6859E8C029090F304C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + 362E29FD0F0D6C58005E05B4 /* TODO */, + 3626BA9E0EFAE7E2005644AF /* SYNTAX */, + 36510CA511C9731500071AB4 /* shannon.syntax */, + 36A4D54012A86DA900B5D16C /* Shannon.xclangspec */, + 3609125811CA87BB0091B8AA /* test.shn */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 363C70FC0EF70F6000866D2C /* Shannon Unit Tests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 363C71070EF70F6000866D2C /* Build configuration list for PBXNativeTarget "Shannon Unit Tests" */; + buildPhases = ( + 363C70FD0EF70F6000866D2C /* Sources */, + 363C71050EF70F6000866D2C /* Frameworks */, + 363C71060EF70F6000866D2C /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "Shannon Unit Tests"; + productInstallPath = "$(HOME)/bin"; + productName = Shannon; + productReference = 363C710A0EF70F6000866D2C /* Shannon */; + productType = "com.apple.product-type.tool"; + }; + 8DD76F620486A84900D96B5E /* Shannon */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "Shannon" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = Shannon; + productInstallPath = "$(HOME)/bin"; + productName = Shannon; + productReference = 8DD76F6C0486A84900D96B5E /* Shannon */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "Shannon" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + knownRegions = ( + English, + Japanese, + French, + German, + ); + mainGroup = 08FB7794FE84155DC02AAC07 /* Shannon */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8DD76F620486A84900D96B5E /* Shannon */, + 363C70FC0EF70F6000866D2C /* Shannon Unit Tests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 363C70FD0EF70F6000866D2C /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 363C710D0EF70FBE00866D2C /* main-ut.cpp in Sources */, + 365B4A72100283E40023D0FD /* common.cpp in Sources */, + 369DE917119447A600BBD287 /* runtime.cpp in Sources */, + 36B6ACEC1195E0A3001EF4C0 /* rtio.cpp in Sources */, + 36B6AD201195F0C7001EF4C0 /* parser.cpp in Sources */, + 36B6AD3C1195F863001EF4C0 /* typesys.cpp in Sources */, + 36B6AD4811960432001EF4C0 /* vm.cpp in Sources */, + 36B6AD4911960432001EF4C0 /* vmcodegen.cpp in Sources */, + 36B6AD551196061B001EF4C0 /* compiler.cpp in Sources */, + 3646492C11CF89A500853339 /* vminfo.cpp in Sources */, + 36A45F4F11D68D3E0010EBAC /* compexpr.cpp in Sources */, + 3698665C12A322D50028FB7A /* sysmodule.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 363C70F70EF70F4400866D2C /* main.cpp in Sources */, + 365B4A71100283E40023D0FD /* common.cpp in Sources */, + 369DE918119447A600BBD287 /* runtime.cpp in Sources */, + 36B6ACED1195E0A3001EF4C0 /* rtio.cpp in Sources */, + 36B6AD211195F0C7001EF4C0 /* parser.cpp in Sources */, + 36B6AD3D1195F863001EF4C0 /* typesys.cpp in Sources */, + 36B6AD4A11960432001EF4C0 /* vm.cpp in Sources */, + 36B6AD4B11960432001EF4C0 /* vmcodegen.cpp in Sources */, + 36B6AD561196061B001EF4C0 /* compiler.cpp in Sources */, + 3646492D11CF89A500853339 /* vminfo.cpp in Sources */, + 36A45F5011D68D3E0010EBAC /* compexpr.cpp in Sources */, + 3698665D12A322D50028FB7A /* sysmodule.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = Shannon; + ZERO_LINK = YES; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = Shannon; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(NATIVE_ARCH)"; + GCC_ENABLE_CPP_RTTI = YES; + GCC_ENABLE_OBJC_EXCEPTIONS = NO; + GCC_OPTIMIZATION_LEVEL = 2; + GCC_PREPROCESSOR_DEFINITIONS = ""; + GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES; + GCC_TREAT_NONCONFORMANT_CODE_ERRORS_AS_WARNINGS = YES; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_VERSION = 4.2; + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + GCC_WARN_ABOUT_GLOBAL_CONSTRUCTORS = NO; + GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_CHECK_SWITCH_STATEMENTS = YES; + GCC_WARN_EFFECTIVE_CPLUSPLUS_VIOLATIONS = NO; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = YES; + GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; + GCC_WARN_INHIBIT_ALL_WARNINGS = NO; + GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES; + GCC_WARN_MISSING_PARENTHESES = YES; + GCC_WARN_MULTIPLE_DEFINITION_TYPES_FOR_SELECTOR = YES; + GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; + GCC_WARN_PEDANTIC = NO; + GCC_WARN_PROTOTYPE_CONVERSION = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_SIGN_COMPARE = YES; + GCC_WARN_STRICT_SELECTOR_MATCH = YES; + GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_LABEL = YES; + GCC_WARN_UNUSED_PARAMETER = YES; + GCC_WARN_UNUSED_VALUE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + OTHER_CFLAGS = ""; + OTHER_CPLUSPLUSFLAGS = ( + "-DDEBUG", + "-DXCODE", + ); + PREBINDING = NO; + SDKROOT = macosx10.5; + SKIP_INSTALL = YES; + STANDARD_C_PLUS_PLUS_LIBRARY_TYPE = dynamic; + WARNING_CFLAGS = ""; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(NATIVE_ARCH)"; + GCC_ENABLE_CPP_RTTI = YES; + GCC_ENABLE_OBJC_EXCEPTIONS = NO; + GCC_PREPROCESSOR_DEFINITIONS = ""; + GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES; + GCC_TREAT_NONCONFORMANT_CODE_ERRORS_AS_WARNINGS = YES; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_VERSION = 4.2; + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + GCC_WARN_ABOUT_GLOBAL_CONSTRUCTORS = NO; + GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_CHECK_SWITCH_STATEMENTS = YES; + GCC_WARN_EFFECTIVE_CPLUSPLUS_VIOLATIONS = NO; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = YES; + GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; + GCC_WARN_INHIBIT_ALL_WARNINGS = NO; + GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES; + GCC_WARN_MISSING_PARENTHESES = YES; + GCC_WARN_MULTIPLE_DEFINITION_TYPES_FOR_SELECTOR = YES; + GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; + GCC_WARN_PEDANTIC = NO; + GCC_WARN_PROTOTYPE_CONVERSION = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_SIGN_COMPARE = YES; + GCC_WARN_STRICT_SELECTOR_MATCH = YES; + GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNKNOWN_PRAGMAS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_LABEL = YES; + GCC_WARN_UNUSED_PARAMETER = YES; + GCC_WARN_UNUSED_VALUE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + OTHER_CFLAGS = ""; + OTHER_CPLUSPLUSFLAGS = ( + "-DNDEBUG", + "-DXCODE", + ); + PREBINDING = NO; + SDKROOT = macosx10.5; + SKIP_INSTALL = YES; + STANDARD_C_PLUS_PLUS_LIBRARY_TYPE = dynamic; + }; + name = Release; + }; + 363C71080EF70F6000866D2C /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = Shannon; + ZERO_LINK = YES; + }; + name = Debug; + }; + 363C71090EF70F6000866D2C /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = Shannon; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "Shannon" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "Shannon" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 363C71070EF70F6000866D2C /* Build configuration list for PBXNativeTarget "Shannon Unit Tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 363C71080EF70F6000866D2C /* Debug */, + 363C71090EF70F6000866D2C /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/doc/logo-big.psd b/doc/logo-big.psd new file mode 100644 index 0000000..16f8c3e Binary files /dev/null and b/doc/logo-big.psd differ diff --git a/doc/logo-small.png b/doc/logo-small.png new file mode 100644 index 0000000..21568a4 Binary files /dev/null and b/doc/logo-small.png differ diff --git a/doc/logo-small.psd b/doc/logo-small.psd new file mode 100644 index 0000000..f07aead Binary files /dev/null and b/doc/logo-small.psd differ diff --git a/src/.svnignore b/src/.svnignore new file mode 100644 index 0000000..256a649 --- /dev/null +++ b/src/.svnignore @@ -0,0 +1,5 @@ +debug +release +shannon +shannon-ut +shn diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..a6cf07a --- /dev/null +++ b/src/Makefile @@ -0,0 +1,87 @@ + +# ARCH = -arch i386 +# ARCH = -arch x86_64 +# SHBITS = -DSHN_64 +# SHTHR = -DSHN_THR + +CXXDOPTS = $(ARCH) $(SHBITS) $(SHTHR) -Wall -Wextra -Werror -DDEBUG -g +CXXROPTS = $(ARCH) $(SHBITS) $(SHTHR) -Wall -Wextra -Werror -Wno-strict-aliasing -DNDEBUG -O2 +LDLIBS = -ldl + +DOBJS = debug/common.o debug/runtime.o debug/rtio.o \ + debug/parser.o debug/typesys.o debug/vm.o debug/vmcodegen.o \ + debug/vminfo.o debug/compexpr.o debug/compiler.o \ + debug/sysmodule.o + +ROBJS = release/common.o release/runtime.o release/rtio.o \ + release/parser.o release/typesys.o release/vm.o release/vmcodegen.o \ + release/vminfo.o release/compexpr.o release/compiler.o \ + release/sysmodule.o + +SRCS = common.cpp runtime.cpp rtio.cpp \ + parser.cpp typesys.cpp vm.cpp vmcodegen.cpp \ + vminfo.cpp compexpr.cpp compiler.cpp \ + sysmodule.cpp \ + main.cpp main-ut.cpp + +HDRS = version.h common.h runtime.h typesys.h parser.h vm.h compiler.h sysmodule.h + + +all: dirs shannon shannon-ut release + +release: dirs shn + +ut unit-test: dirs shannon-ut + @echo + @if ./shannon-ut ; then echo "Unit tests succeeded." ; else echo "***** Unit tests failed *****" ; fi + @echo + +dirs: + @mkdir -p debug release + +depend: Makefile.dep.debug Makefile.dep.release + + +Makefile.dep.debug: $(SRCS) $(HDRS) + @touch $@ + @makedepend -pdebug/ -f$@ -Y $(SRCS) 2>/dev/null + @if diff $@.bak $@ > /dev/null ; then echo "Dependencies unchanged" ; else echo "****** DEBUG dependecies have changed" ; fi + @rm -f $@.bak + +include Makefile.dep.debug + + +Makefile.dep.release: $(SRCS) $(HDRS) + @touch $@ + @makedepend -prelease/ -f$@ -Y $(SRCS) 2>/dev/null + @if diff $@.bak $@ > /dev/null ; then echo "Dependencies unchanged" ; else echo "****** RELEASE dependecies have changed" ; fi + @rm -f $@.bak + +include Makefile.dep.release + + +shannon: $(DOBJS) debug/main.o + $(CXX) $(CXXDOPTS) $(LDLIBS) $^ -o $@ + +shannon-ut: $(DOBJS) debug/main-ut.o + $(CXX) $(CXXDOPTS) $(LDLIBS) $^ -o $@ + +debug/%.o: %.cpp Makefile + $(CXX) $(CXXDOPTS) -c $< -o $@ + +shn: $(ROBJS) release/main.o + $(CXX) $(CXXROPTS) $(LDLIBS) $^ -o $@ + @strip $@ + +release/%.o: %.cpp Makefile + $(CXX) $(CXXROPTS) -c $< -o $@ + +%.s: %.cpp Makefile + $(CXX) $(CXXROPTS) -S -fverbose-asm $< -o $@ + +clean: + @rm -f $(DOBJS) debug/main.o debug/main-ut.o + @rm -f $(ROBJS) release/main.o + @rm -f shannon shannon-ut shn + @rm -f core *.core + diff --git a/src/Makefile.dep.debug b/src/Makefile.dep.debug new file mode 100644 index 0000000..094d125 --- /dev/null +++ b/src/Makefile.dep.debug @@ -0,0 +1,20 @@ +# DO NOT DELETE + +debug/common.o: common.h version.h +debug/runtime.o: runtime.h common.h version.h typesys.h +debug/rtio.o: runtime.h common.h version.h +debug/parser.o: parser.h common.h version.h runtime.h +debug/typesys.o: sysmodule.h runtime.h common.h version.h typesys.h vm.h +debug/typesys.o: parser.h +debug/vm.o: vm.h common.h version.h runtime.h parser.h typesys.h compiler.h +debug/vmcodegen.o: vm.h common.h version.h runtime.h parser.h typesys.h +debug/vminfo.o: vm.h common.h version.h runtime.h parser.h typesys.h +debug/compexpr.o: vm.h common.h version.h runtime.h parser.h typesys.h +debug/compexpr.o: compiler.h +debug/compiler.o: vm.h common.h version.h runtime.h parser.h typesys.h +debug/compiler.o: compiler.h +debug/sysmodule.o: sysmodule.h runtime.h common.h version.h typesys.h vm.h +debug/sysmodule.o: parser.h compiler.h +debug/main.o: common.h version.h runtime.h parser.h typesys.h vm.h compiler.h +debug/main-ut.o: common.h version.h runtime.h parser.h typesys.h vm.h +debug/main-ut.o: compiler.h diff --git a/src/Makefile.dep.release b/src/Makefile.dep.release new file mode 100644 index 0000000..201e4af --- /dev/null +++ b/src/Makefile.dep.release @@ -0,0 +1,21 @@ +# DO NOT DELETE + +release/common.o: common.h version.h +release/runtime.o: runtime.h common.h version.h typesys.h +release/rtio.o: runtime.h common.h version.h +release/parser.o: parser.h common.h version.h runtime.h +release/typesys.o: sysmodule.h runtime.h common.h version.h typesys.h vm.h +release/typesys.o: parser.h +release/vm.o: vm.h common.h version.h runtime.h parser.h typesys.h compiler.h +release/vmcodegen.o: vm.h common.h version.h runtime.h parser.h typesys.h +release/vminfo.o: vm.h common.h version.h runtime.h parser.h typesys.h +release/compexpr.o: vm.h common.h version.h runtime.h parser.h typesys.h +release/compexpr.o: compiler.h +release/compiler.o: vm.h common.h version.h runtime.h parser.h typesys.h +release/compiler.o: compiler.h +release/sysmodule.o: sysmodule.h runtime.h common.h version.h typesys.h vm.h +release/sysmodule.o: parser.h compiler.h +release/main.o: common.h version.h runtime.h parser.h typesys.h vm.h +release/main.o: compiler.h +release/main-ut.o: common.h version.h runtime.h parser.h typesys.h vm.h +release/main-ut.o: compiler.h diff --git a/src/Shannon.xclangspec b/src/Shannon.xclangspec new file mode 100644 index 0000000..0c3c8c2 --- /dev/null +++ b/src/Shannon.xclangspec @@ -0,0 +1,160 @@ +// Syntax coloring for XCode +( + { + Identifier = "xcode.lang.shannon.identifier"; + Syntax = { + StartChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"; + Chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + Words = ( + and, + as, + assert, + begin, + break, + case, + class, + const, + continue, + def, + default, + del, + dump, + elif, + else, + exit, + for, + if, + in, + ins, + is, + not, + or, + return, + shl, + shr, + switch, + this, + typeof, + var, + while, + xor, + ); + Type = "xcode.syntax.keyword"; + AltType = "xcode.syntax.identifier"; + }; + }, + { + Identifier = "xcode.lang.shannon"; + Description = "Shannon Coloring"; + BasedOn = "xcode.lang.simpleColoring"; + IncludeInMenu = YES; + Name = Shannon; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + IncludeRules = ( + "xcode.lang.shannon.function", + "xcode.lang.shannon.block", + "xcode.lang.shannon.bracketexpr", + "xcode.lang.shannon.parenexpr", + ); + Type = "xcode.syntax.plain"; + }; + }, + { + Identifier = "xcode.lang.shannon.lexer"; + Syntax = { + IncludeRules = ( + "xcode.lang.comment", + "xcode.lang.comment.singleline", + "xcode.lang.string", + "xcode.lang.character", + "xcode.lang.shannon.identifier", + "xcode.lang.number", + ); + }; + }, + { + Identifier = "xcode.lang.shannon.function"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Rules = ( + "xcode.lang.shannon.function.declarator", + "xcode.lang.shannon.block", + ); + Type = "xcode.syntax.definition.function"; + }; + }, + { + Identifier = "xcode.lang.shannon.function.declarator"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Rules = ( + def, + "xcode.lang.shannon.identifier", + "xcode.lang.shannon.function.name", + "xcode.lang.shannon.parenexpr", + ); + }; + }, + { + Identifier = "xcode.lang.shannon.function.name"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Rules = ( + "xcode.lang.shannon.identifier", + "xcode.lang.shannon.function.name.more*", + ); + Type = "xcode.syntax.name.partial"; + }; + }, + { + Identifier = "xcode.lang.shannon.function.name.more"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Rules = ( + ".", + "xcode.lang.shannon.identifier", + ); + Type = "xcode.syntax.name.partial"; + }; + }, + { + Identifier = "xcode.lang.shannon.block"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Start = "{"; + End = "}"; + Foldable = YES; + Recursive = YES; + IncludeRules = ( + "xcode.lang.shannon.function", + "xcode.lang.shannon.bracketexpr", + "xcode.lang.shannon.parenexpr", + ); + }; + }, + { + Identifier = "xcode.lang.shannon.parenexpr"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Start = "("; + End = ")"; + Recursive = YES; + IncludeRules = ( + "xcode.lang.shannon.bracketexpr", + ); + }; + }, + { + Identifier = "xcode.lang.shannon.bracketexpr"; + Syntax = { + Tokenizer = "xcode.lang.shannon.lexer"; + Start = "["; + End = "]"; + Recursive = YES; + IncludeRules = ( + "xcode.lang.shannon.parenexpr", + ); + }; + }, +) \ No newline at end of file diff --git a/src/TODO b/src/TODO new file mode 100644 index 0000000..41bfd2a --- /dev/null +++ b/src/TODO @@ -0,0 +1,93 @@ + +* Static directive: def static int inc()... + +* Inheritance, virtual calls, compatibility rules. Syntax? + class point3d(int x, int y, int z) .point(x, y) + +* Properties + +* Operator 'in' for vectors? 'lin'? + +* Static calls at compile time: objects constructed at compile time + can be modified with method calls, so either restrict static calls + or make sure def objects are not modified through method calls + +* Procedures, pipes. Procedures are declared as, e.g. + def void convert() ... + +* Exceptions: there is no 'throw' keyword, just calling an exception ctor + throws one + +* Alternate syntax for string keys in dictionaries: dict->key. This will + work for dynamic states too (?). Also, the ability to iterate over all + state members with the 'for' operator - how? + +* Nullable types: patented by Microsoft. Instead, maybe just use 'any' + and operator 'default', e.g. v = v default 0 (if null then assign 0) + +* Read-only variables (const) and const subexpressions? A bit problematic: + const should be part of type, not var definition. + +* 'public' and 'forward' blocks + +* External function defs: syntax? def int v() #extern + +* Small ordinals with the 's' prefix? Custom prefixes can be used for + measurement units as well. Meas. units are compatible with ints but not + compatible with each other. + +* finally { ... } is a nested block which is executed if the enclosing + block reaches its end, i.e. the finally label. For states this is the + destructor. Not the same as try ... finally construct. + +* Structs are almost like vectors, except element types are specified + individually. Can be used for returning multiple values from functions. + So a struct descriptor is a vector of types? + +* Operator 'split': a combination of 'case' and 'while' for fifos (?) + Or maybe switch i = ... with semantics similar to 'for'? + +* For loop for fifos? + +* Ordinal set operations: inversion (not), union (or), intersection + (and), subtraction (-) + +* Run-time range checking (as well as compile-time) + +* Tiny set implementation? + +* HTML/XML embedded mode, also arbitrary text embedded mode + +* Compiler directives: DEBUG ON|OFF RANGE_CHECK ON|OFF ASSERT ON|OFF + DUMP ON|OFF MARKUP ON|OFF (syntax?) + +* Two versions of the binary: debug and release. Debug helps to debug + both the program (assertions, dumps and range checking are on) and the + compiler itself + +* Abstract module and module inheritance paradigm. Modules can be loaded + at run time, but they should be inherited from some known module type. + Database drivers can be written this way. + +* Have the code generator evaluate some operations at generation time if + possible. + +* Dequeue operator >> -- does it make sense when there's deq()? Don't + think so. + +* Floating point type + +* Transform some of the dict opcodes to vector ops so that the future + transition to static data allocation is easier (?) + +* goto (?) + +* Multidimensional arrays implemented as a single vector with "flat" + indexing? + +* Assignment operators (+=, etc) aren't very nice with the + boolean/bitwise and, or, etc. However, these are necessary at least: + |=, +=, -=, *=, /=, %= + +* A lot of TODO's in the source + diff --git a/src/check-bloat.sh b/src/check-bloat.sh new file mode 100644 index 0000000..e2b3c4d --- /dev/null +++ b/src/check-bloat.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +SRCDIR="." +OBJDIR="release" +EXT="cpp" + +make release || exit 1 + +for i in "$SRCDIR"/*.$EXT ; do + name=$(echo "$i" | sed 's/\.\'$EXT'$//;s|^./||') + lines=$(wc -l "$i" | awk '{print $1}') + objfile="$OBJDIR/$name.o" + [ -f "$objfile" ] || continue + bytes=$(wc -c "$objfile" | awk '{print $1}') + coeff=$((bytes / lines)) + echo "$coeff - $name ($bytes/$lines)" + cpp_names="$cpp_names $name.cpp" +done + +lines=$(wc -l $cpp_names *.h | tail -1 | awk '{print $1}') +objfile="shn" +bytes=$(wc -c "$objfile" | awk '{print $1}') +coeff=$((bytes / lines)) +echo "$coeff - *.cpp *.h ($bytes/$lines)" diff --git a/src/common.cpp b/src/common.cpp new file mode 100644 index 0000000..2ea1560 --- /dev/null +++ b/src/common.cpp @@ -0,0 +1,84 @@ + + +#include "common.h" + + +void _fatal(int code, const char* msg) +{ +#ifdef DEBUG + fprintf(stderr, "\nInternal 0x%04x: %s\n", code, msg); + // We want to see the stack backtrace in XCode debugger + assert(code == 0); +#else + fprintf(stderr, "\nInternal 0x%04x: %s\n", code, msg); +#endif + exit(100); +} + + +void _fatal(int code) +{ +#ifdef DEBUG + assert(code == 0); +#else + fprintf(stderr, "\nInternal error [%04x]\n", code); +#endif + exit(100); +} + + +void notimpl() +{ + fatal(0x0001, "Feature not implemented yet"); +} + + +exception::exception() throw() { } +exception::~exception() throw() { } + +void outofmemory() +{ + fatal(0x0001, "Out of memory"); +} + +static void newdel() +{ + fatal(0x0002, "Global new/delete are disabled"); +} + +void* operator new(size_t) throw() { newdel(); return NULL; } +void* operator new[](size_t) throw() { newdel(); return NULL; } +void operator delete (void*) throw() { newdel(); } +void operator delete[](void*) throw() { newdel(); } + + +#ifdef SHN_THR + +#if defined(__GNUC__) && (defined(__i386__) || defined(__I386__)|| defined(__x86_64__)) +// multi-threaded version with GCC on i386 + + +atomicint pincrement(atomicint* target) throw() +{ + atomicint temp = 1; + __asm__ __volatile ("lock ; xaddl %0,(%1)" : "+r" (temp) : "r" (target)); + return temp + 1; +} + + +atomicint pdecrement(atomicint* target) throw() +{ + atomicint temp = -1; + __asm__ __volatile ("lock ; xaddl %0,(%1)" : "+r" (temp) : "r" (target)); + return temp - 1; +} + + +#else + +#error Undefined architecture: atomic functions are not available + +#endif + +#endif + diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..c8fe5fa --- /dev/null +++ b/src/common.h @@ -0,0 +1,178 @@ +#ifndef __COMMON_H +#define __COMMON_H + +#if !defined(DEBUG) && !defined(NDEBUG) +# define NDEBUG // to suppress assert() +#endif + +#define __STDC_LIMIT_MACROS + +// All standard library headers should go only here +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "version.h" + + +// SHN_64 can be enabled on 32-bit systems, and should be enabled on 64-bit +// systems; it affects the size of the default int (defined as `integer' +// below) and accordingly the size of the `variant' structure. In any case, +// for various reasons the `integer' type should not be smaller than +// sizeof(void*), otherwise initRuntime() will fail at startup. +#if defined(__x86_64__) || defined(_WIN64) +# define SHN_64 +#endif + + +// Generate faster but bigger code (more inlined functions) +// #define SHN_FASTER + + +#define SOURCE_EXT ".shn" + + +// --- BASIC DATA TYPES --------------------------------------------------- // + + +// Default fundamental types + +#ifdef SHN_64 + typedef int64_t integer; + typedef uint64_t uinteger; + typedef double real; +# define INTEGER_MIN INT64_MIN +# define INTEGER_MAX INT64_MAX +#else + typedef int32_t integer; + typedef uint32_t uinteger; + typedef float real; +# define INTEGER_MIN INT32_MIN +# define INTEGER_MAX INT32_MAX +#endif + +// Equivalent of size_t, signed; used everywhere for container sizes/indexes +typedef ssize_t memint; +typedef size_t umemint; +typedef int16_t jumpoffs; +#define MEMINT_MAX LONG_MAX + +// Convenient aliases +typedef unsigned char uchar; +typedef long long large; +typedef unsigned long long ularge; +typedef char* pchar; +typedef uchar* puchar; + + +// --- MISC --------------------------------------------------------------- // + + +void _fatal(int code, const char* msg); +void _fatal(int code); + +#ifdef DEBUG +# define fatal(code,msg) _fatal(code, msg) +#else +# define fatal(code,msg) _fatal(code) +#endif + +void notimpl(); + + +template + inline T imax(T x, T y) { return (x > y) ? x : y; } + +template + inline T imin(T x, T y) { return (x < y) ? x : y; } + +template + inline T exchange(T& target, const T& value) + { T temp = target; target = value; return temp; } + + +template + inline T cast(const X& x) +#ifdef DEBUG + { return dynamic_cast(x); } +#else + { return (T)x; } +#endif + + +struct noncopyable +{ + noncopyable(const noncopyable&) throw(); // trap + const noncopyable& operator= (const noncopyable&) throw(); // trap + noncopyable() throw() {} + ~noncopyable() throw() {} +}; + + +struct exception +{ + exception() throw(); + virtual ~exception() throw(); + virtual const char* what() throw() = 0; + exception(const exception&) throw(); // trap + const exception& operator= (const exception&) throw(); // trap +}; + + +inline memint pstrlen(const char* s) + { return s == NULL ? 0 : ::strlen(s); } + +void outofmemory(); + +inline void* pmemcheck(void* p) + { if (p == NULL) outofmemory(); return p; } + +inline void* pmemalloc(memint s) + { return pmemcheck(::malloc(s)); } + +inline void* pmemcalloc(memint s) + { return pmemcheck(::calloc(1, s)); } + +inline void* pmemrealloc(void* p, memint s) + { return pmemcheck(::realloc(p, s)); } + +inline void pmemfree(void* p) + { ::free(p); } + + +// Default placement versions of new and delete +inline void* operator new(size_t, void* p) throw() { return p; } +inline void operator delete (void*, void*) throw() { } + +// Disable all new/delete by default; redefine where necessary +void* operator new(size_t) throw(); +void* operator new[](size_t) throw(); +void operator delete (void*) throw(); +void operator delete[](void*) throw(); + + +// --- ATOMIC OPERATIONS -------------------------------------------------- // + + +typedef int atomicint; + +// TODO: the atomic functions below should be 64-bit on a 64-bit platform + +#ifndef SHN_THR + inline atomicint pincrement(atomicint* target) throw() { return ++(*target); } + inline atomicint pdecrement(atomicint* target) throw() { return --(*target); } +#else + atomicint pincrement(atomicint* target) throw(); + atomicint pdecrement(atomicint* target) throw(); +#endif + + +#endif // __COMMON_H diff --git a/src/compexpr.cpp b/src/compexpr.cpp new file mode 100644 index 0000000..7b4d07d --- /dev/null +++ b/src/compexpr.cpp @@ -0,0 +1,791 @@ + +#include "vm.h" +#include "compiler.h" + + +// --- EXPRESSION ---------------------------------------------------------- // + +/* + + + @ + unary- ? # as is + | + * / mod + + – + == <> < > <= >= in + not + and + or xor + range, enum +*/ + + +Type* Compiler::getEnumeration(const str& firstIdent) +{ + Enumeration* enumType = state->registerType(new Enumeration()); + enumType->addValue(state, scope, firstIdent); + expect(tokComma, "','"); + do + { + enumType->addValue(state, scope, getIdentifier()); + } + while (skipIf(tokComma)); + return enumType; +} + + +Type* Compiler::getStateDerivator(Type* retType, bool allowProto) +{ + FuncPtr* proto = state->registerType(new FuncPtr(retType)); + if (!skipIf(tokRParen)) + { + do + { + bool isPtr = skipIf(tokVar); + Type* argType = getTypeValue(/* true */); + str ident; + if (token == tokIdent) + { + ident = getIdentifier(); + argType = getTypeDerivators(argType); + } + if (skipIf(tokAssign)) + { + if (isPtr) + error("Var arguments can not have a default value"); + variant defValue; + getConstValue(argType, defValue); + proto->addFormalArg(ident, argType, false, &defValue); + } + else + proto->addFormalArg(ident, argType, isPtr, NULL); + } + while (skipIf(tokComma)); + skipRParen(); + } + if (skipIf(tokEllipsis)) + { + if (!allowProto) + error("Function pointer type not allowed here"); + return proto; + } + else + { + State* newState = state->registerType(new State(state, proto)); + stateBody(newState); + return newState; + } +} + + +Type* Compiler::getTypeDerivators(Type* type) +{ + // TODO: anonymous functions are static, named ones are not + if (skipIf(tokLSquare)) // container + { + if (skipIf(tokRSquare)) + return getTypeDerivators(type)->deriveVec(state); + + else if (skipIf(tokRange)) + { + if (!type->isAnyOrd()) + error("Range can be derived from ordinal type only"); + expect(tokRSquare, "']'"); + return POrdinal(type)->getRangeType(); + } + + else + { + Type* indexType = getTypeValue(/* false */); + expect(tokRSquare, "']'"); + if (indexType->isVoid()) + return getTypeDerivators(type)->deriveVec(state); + else + return getTypeDerivators(type)->deriveContainer(state, indexType); + } + } + + else if (skipIf(tokLessThan)) // fifo + { + expect(tokGreaterThan, "'>'"); + return getTypeDerivators(type)->deriveFifo(state); + } + + else if (skipIf(tokLParen)) // prototype/function + return getStateDerivator(type, true); + + else if (skipIf(tokCaret)) // ^ + { + type = getTypeDerivators(type); + if (type->isReference()) + error("Double reference"); + if (!type->isDerefable()) + error("Reference can not be derived from this type"); + return type->getRefType(); + } + + return type; +} + + +void Compiler::builtin(Builtin* b, bool skipFirst) +{ + if (b->prototype) + { + skipLParen(); + actualArgs(b->prototype, skipFirst); + } + if (b->compile) + b->compile(this, b); + else + codegen->staticCall(b->staticFunc); +} + + +void Compiler::identifier(str ident) +{ + // Go up the current scope hierarchy within the module. Currently not + // everything is accessible even if found by the code below: an error + // will be thrown by the CodeGen in case a symbol can not be accessed. + if (token == tokPrevIdent) + redoIdent(); + else + next(); + Scope* sc = scope; + do + { + // TODO: implement loading from outer scopes + Symbol* sym = sc->find(ident); + if (sym) + { + if (sym->isBuiltin()) + builtin(PBuiltin(sym)); + else + codegen->loadSymbol(sym); + return; + } + sc = sc->outer; + } + while (sc != NULL); + + // Look up in used modules; search backwards + for (memint i = module->usedModuleVars.size(); i--; ) + { + InnerVar* m = module->usedModuleVars[i]; + Symbol* sym = m->getModuleType()->find(ident); + if (sym) + { + if (sym->isBuiltin()) + builtin(PBuiltin(sym)); + else if (codegen->isCompileTime()) + codegen->loadSymbol(sym); + else + { + codegen->loadVariable(m); + codegen->loadMember(sym->host, sym); + } + return; + } + } + + throw EUnknownIdent(ident); +} + + +void Compiler::dotIdentifier(str ident) +{ + Type* type = codegen->getTopType(); + Builtin* b = queenBee->findBuiltin(ident); + if (b) + { + // See if the first formal arg of the builtin matches the current top stack item + if (b->prototype->formalArgs.size() == 0) + error("Invalid builtin call"); + Type* firstArgType = b->prototype->formalArgs[0]->type; + if (firstArgType) + codegen->implicitCast(firstArgType, "Builtin not applicable to this type"); + builtin(b, true); + } + else if (type->isFuncPtr()) + { + // Scope resolution: we have a state name followed by '.', but because + // state names are by default transformed into function pointers, we + // need to roll it back + codegen->implicitCast(defTypeRef, "Invalid member selection"); + codegen->loadSymbol(codegen->undoStateRef()->findShallow(ident)); + } + else if (type->isAnyState()) + // State object (variable or subexpr) on the stack followed by '.' and member: + codegen->loadMember(PState(type), PState(type)->findShallow(ident)); + else + error("Invalid member selection"); +} + + +void Compiler::vectorCtor(Type* typeHint) +{ + // if (typeHint && !typeHint->isAnyVec()) + // error("Vector constructor not expected here"); + + Type* elemType = NULL; + if (typeHint) + { + if (typeHint->isAnyCont()) + elemType = PContainer(typeHint)->elem; + else if (typeHint->isRange()) + elemType = PRange(typeHint)->elem; + } + + if (skipIf(tokRSquare)) + { + // Since typeHint can be anything, empty vector [] can actually be an + // empty constant for any type: + codegen->loadEmptyConst(typeHint ? typeHint : queenBee->defNullCont); + return; + } + + expression(elemType); + + if (skipIf(tokRange)) + { + expression(elemType); + codegen->mkRange(); + } + + else + { + Container* contType = codegen->elemToVec( + typeHint && typeHint->isAnyCont() ? PContainer(typeHint) : NULL); + while (skipIf(tokComma)) + { + expression(contType->elem); + codegen->elemCat(); + } + } + expect(tokRSquare, "]"); +} + + +void Compiler::fifoCtor(Type* typeHint) +{ + if (typeHint && !typeHint->isAnyFifo()) + error("Fifo constructor not expected here"); + Fifo* fifoType = PFifo(typeHint); + if (skipIf(tokRAngle)) + { + if (fifoType == NULL) + codegen->loadEmptyConst(queenBee->defNullCont); + else + codegen->loadFifo(fifoType); + return; + } + expression(fifoType ? fifoType->elem : NULL); + fifoType = codegen->elemToFifo(); + while (skipIf(tokComma)) + { + expression(fifoType->elem); + codegen->fifoEnq(); + } + expect(tokRAngle, "'>'"); +} + + +void Compiler::dictCtor(Type* typeHint) +{ + if (skipIf(tokRCurly)) + { + codegen->loadEmptyConst(typeHint ? typeHint : queenBee->defNullCont); + return; + } + + if (typeHint && !typeHint->isAnySet() && !typeHint->isAnyDict()) + error("Set/dict constructor not expected here"); + Container* type = PContainer(typeHint); + + expression(type ? type->index : NULL); + + // Dictionary + if (skipIf(tokAssign)) + { + expression(type ? type->elem : NULL); + type = codegen->pairToDict(); + while (skipIf(tokComma)) + { + expression(type->index); + codegen->checkDictKey(); + expect(tokAssign, "="); + expression(type->elem); + codegen->dictAddPair(); + } + } + + // Set + else + { + if (skipIf(tokRange)) + { + expression(type ? type->index : NULL); + type = codegen->rangeToSet(); + } + else + type = codegen->elemToSet(); + while (skipIf(tokComma)) + { + expression(type->index); + if (skipIf(tokRange)) + { + codegen->checkRangeLeft(); + expression(type->index); + codegen->setAddRange(); + } + else + codegen->setAddElem(); + } + } + + expect(tokRCurly, "}"); +} + + +void Compiler::typeOf() +{ + designator(NULL); + Type* type = codegen->getTopType(); + codegen->undoSubexpr(); + codegen->loadTypeRefConst(type); +} + + +void Compiler::ifFunc() +{ + skipLParen(); + expression(queenBee->defBool); + memint jumpFalse = codegen->boolJumpForward(opJumpFalse); + expect(tokComma, "','"); + expression(NULL); + Type* exprType = codegen->getTopType(); + codegen->justForget(); // will get the expression type from the second branch + memint jumpOut = codegen->jumpForward(); + codegen->resolveJump(jumpFalse); + expect(tokComma, "','"); + expression(exprType); + codegen->resolveJump(jumpOut); + skipRParen(); +} + + +void Compiler::actualArgs(FuncPtr* proto, bool skipFirst) +{ + // skipFirst is for member-style builtin calls + memint i = int(skipFirst); + if (token != tokRParen) + { + do + { + if (i >= proto->formalArgs.size()) + error("Too many arguments"); + FormalArg* arg = proto->formalArgs[i]; + if (arg->hasDefValue && (token == tokComma || token == tokRParen)) + codegen->loadConst(arg->type, arg->defValue); + else + { + // TODO: improve 'type mismatch' error message; note however that + // it's important to pass the arg type to expression() + expression(arg->type); + if (arg->isPtr) + codegen->toLea(); + } + i++; + } + while (skipIf(tokComma)); + } + skipRParen(); + while (i < proto->formalArgs.size()) + { + FormalArg* arg = proto->formalArgs[i]; + if (!arg->hasDefValue) + error("Too few arguments"); + codegen->loadConst(arg->type, arg->defValue); + i++; + } +} + + +void Compiler::atom(Type* typeHint) +{ + if (token == tokPrevIdent) // from partial (typeless) definition + identifier(getPrevIdent()); + + else if (token == tokIntValue) + { + codegen->loadConst(queenBee->defInt, integer(intValue)); + next(); + } + + else if (token == tokStrValue) + { + str value = strValue; + if (value.size() == 1) + codegen->loadConst(queenBee->defChar, value[0]); + else + { + module->registerString(value); + codegen->loadConst(queenBee->defStr, value); + } + next(); + } + + else if (token == tokIdent) + identifier(strValue); + + else if (skipIf(tokLParen)) + { + if (codegen->isCompileTime() && token == tokIdent) // Enumeration? + { + str ident = strValue; + if (next() == tokComma) + { + codegen->loadTypeRef(getEnumeration(ident)); + goto skipExpr; + } + undoIdent(ident); + } + expression(typeHint); +skipExpr: + skipRParen(); + } + + else if (skipIf(tokLSquare)) + vectorCtor(typeHint); + + else if (skipIf(tokLAngle)) + fifoCtor(typeHint); + + else if (skipIf(tokLCurly)) + dictCtor(typeHint); + + else if (skipIf(tokIf)) + ifFunc(); + + else if (skipIf(tokTypeOf)) + typeOf(); + + else if (skipIf(tokThis)) + codegen->loadThis(); + + else + error("Expression syntax"); + + while (token == tokWildcard && codegen->tryImplicitCast(defTypeRef)) + { + next(); // * + codegen->loadTypeRef(getTypeDerivators(codegen->undoTypeRef())); + } +} + + +void Compiler::designator(Type* typeHint) +{ + bool isAt = skipIf(tokAt); + Type* refTypeHint = typeHint && typeHint->isReference() ? PReference(typeHint)->to : NULL; + + atom(refTypeHint ? refTypeHint : typeHint); + + while (1) + { + if (skipIf(tokPeriod)) + { + codegen->deref(); + dotIdentifier(getIdentifier()); + } + + else if (skipIf(tokLSquare)) + { + codegen->deref(); + expression(NULL); + if (skipIf(tokRange)) + { + if (token == tokRSquare) + codegen->loadConst(defVoid, variant()); + else + expression(codegen->getTopType()); + codegen->loadSubvec(); + } + else + codegen->loadContainerElem(); + expect(tokRSquare, "]"); + } + + else if (skipIf(tokLParen)) + { + Type* type = codegen->getTopType(); + if (type->isFuncPtr()) + { + actualArgs(PFuncPtr(type)); + codegen->call(PFuncPtr(type)); // May throw evoidfunc() + } + else + error("Invalid function call"); + } + + else + break; + } + + if (isAt || refTypeHint) + codegen->mkref(); + else + codegen->deref(); +} + + +void Compiler::factor(Type* typeHint) +{ + bool isNeg = skipIf(tokMinus); + + designator(typeHint); + + if (skipIf(tokQuestion)) + codegen->nonEmpty(); + if (isNeg) + codegen->arithmUnary(opNeg); + if (skipIf(tokAs)) + { + Type* type = getTypeValue(/* true */); + // TODO: default value in parens? + codegen->explicitCast(type); + } + if (skipIf(tokIs)) + codegen->isType(getTypeValue(/* true */)); +} + + +void Compiler::concatExpr(Container* contType) +{ + factor(contType); + if (skipIf(tokCat)) + { + Type* top = codegen->getTopType(); + if (top->isAnyVec()) + if (contType) + codegen->implicitCast(contType); + else + contType = PContainer(top); + else + contType = codegen->elemToVec(contType); + do + { + factor(contType); + if (codegen->tryImplicitCast(contType)) + codegen->cat(); + else + codegen->elemCat(); + } + while (skipIf(tokCat)); + } +} + + +void Compiler::term() +{ + concatExpr(NULL); + while (token == tokMul || token == tokDiv || token == tokMod) + { + OpCode op = token == tokMul ? opMul + : token == tokDiv ? opDiv : opMod; + next(); + factor(NULL); + codegen->arithmBinary(op); + } +} + + +void Compiler::arithmExpr() +{ + term(); + while (token == tokPlus || token == tokMinus) + { + OpCode op = token == tokPlus ? opAdd : opSub; + next(); + term(); + codegen->arithmBinary(op); + } +} + + +void Compiler::relation() +{ + arithmExpr(); + if (skipIf(tokIn)) + { + arithmExpr(); + Type* right = codegen->getTopType(); + if (right->isTypeRef()) + codegen->inBounds(); + else if (right->isAnyCont()) + codegen->inCont(); + else if (right->isRange()) + codegen->inRange(); + else if (right->isAnyOrd() && skipIf(tokRange)) + { + arithmExpr(); + codegen->inRange2(); + } + else + error("Operator 'in' expects container, numeric range, or ordinal type ref"); + } + else if (token >= tokEqual && token <= tokGreaterEq) + { + OpCode op = OpCode(opEqual + int(token - tokEqual)); + next(); + arithmExpr(); + codegen->cmp(op); + } +} + + +void Compiler::notLevel() +{ + bool isNot = skipIf(tokNot); + relation(); + if (isNot) + codegen->_not(); +} + + +void Compiler::andLevel() +{ + notLevel(); + while (token == tokShl || token == tokShr || token == tokAnd) + { + Type* type = codegen->getTopType(); + if (type->isBool() && skipIf(tokAnd)) + { + memint offs = codegen->boolJumpForward(opJumpAnd); + andLevel(); + codegen->resolveJump(offs); + break; + } + else // if (type->isInt()) + { + OpCode op = token == tokShl ? opBitShl + : token == tokShr ? opBitShr : opBitAnd; + next(); + notLevel(); + codegen->arithmBinary(op); + } + } +} + + +void Compiler::orLevel() +{ + andLevel(); + while (token == tokOr || token == tokXor) + { + Type* type = codegen->getTopType(); + // TODO: boolean XOR? Beautiful thing, but not absolutely necessary + if (type->isBool() && skipIf(tokOr)) + { + memint offs = codegen->boolJumpForward(opJumpOr); + orLevel(); + codegen->resolveJump(offs); + break; + } + else // if (type->isInt()) + { + OpCode op = token == tokOr ? opBitOr : opBitXor; + next(); + andLevel(); + codegen->arithmBinary(op); + } + } +} + + +void Compiler::caseValue(Type* ctlType) +{ + expression(ctlType); + if (skipIf(tokRange)) + { + expression(ctlType); + codegen->caseInRange(); + } + else + codegen->caseCmp(); + if (skipIf(tokComma)) + { + memint offs = codegen->boolJumpForward(opJumpOr); + caseValue(ctlType); + codegen->resolveJump(offs); + } +} + + +void Compiler::expression(Type* expectType) +{ + // Some tricks to shorten the path whenever possible: + if (expectType == NULL || expectType->isBool()) + orLevel(); + else if (expectType->isAnyCont()) + // expectType will propagate all the way down to vectorCtor()/dictCtor(): + concatExpr(PContainer(expectType)); + else if (expectType->isReference() || expectType->isAnyState()) + designator(expectType); + else + arithmExpr(); + if (expectType) + codegen->implicitCast(expectType); +} + + +Type* Compiler::getConstValue(Type* expectType, variant& result) +{ + CodeSeg constCode(NULL); + CodeGen constCodeGen(constCode, module, state, true); + CodeGen* prevCodeGen = exchange(codegen, &constCodeGen); + Type* resultType = NULL; + try + { + // We don't pass expectType here because we may have a subrange type + // expression, in which case expression() below evaluates to an Ordinal + expression(expectType == NULL || expectType->isTypeRef() ? NULL : expectType); + + if (skipIf(tokRange)) + { + expression(codegen->getTopType()); + codegen->mkRange(); + resultType = constCodeGen.runConstExpr(constStack, result); + if (expectType && !expectType->isTypeRef()) + error("Subrange type not expected here"); + result = state->registerType(PRange(resultType)->elem->createSubrange(result._range())); + resultType = defTypeRef; + } + else + { + if (expectType && expectType->isTypeRef()) + codegen->implicitCast(defTypeRef, "Type mismatch in const expression"); + else if (codegen->getTopType()->isFuncPtr()) + codegen->implicitCast(defTypeRef, "Invalid use of function in const expression"); + resultType = constCodeGen.runConstExpr(constStack, result); + } + } + catch(exception&) + { + codegen = prevCodeGen; + throw; + } + + codegen = prevCodeGen; + return resultType; +} + + +Type* Compiler::getTypeValue() +{ + variant result; + getConstValue(defTypeRef, result); + return cast(result._rtobj()); +} + diff --git a/src/compiler.cpp b/src/compiler.cpp new file mode 100644 index 0000000..bc4e843 --- /dev/null +++ b/src/compiler.cpp @@ -0,0 +1,720 @@ + +#include "vm.h" +#include "compiler.h" + + +Compiler::AutoScope::AutoScope(Compiler* c) throw() + : BlockScope(c->scope, c->codegen), compiler(c) + { compiler->scope = this; } + + +Compiler::AutoScope::~AutoScope() throw() + { compiler->scope = outer; } + + +StkVar* Compiler::AutoScope::addInitStkVar(const str& name, Type* type) +{ + StkVar* var = addStkVar(name, type); + compiler->codegen->initStkVar(var); + return var; +} + + +Compiler::ReturnInfo::ReturnInfo(Compiler& c) throw() + : compiler(c), prev(c.returnInfo), topLevelReturned(false), jumps() + { compiler.returnInfo = this; } + + +Compiler::ReturnInfo::~ReturnInfo() throw() + { compiler.returnInfo = prev; } + + +void Compiler::ReturnInfo::resolveJumps() +{ + for (memint i = 0; i < jumps.size(); i++) + compiler.codegen->resolveJump(jumps[i]); + jumps.clear(); +} + + +Compiler::LoopInfo::LoopInfo(Compiler& c) throw() + : compiler(c), prev(c.loopInfo), + stackLevel(c.codegen->getStackLevel()), + continueTarget(c.codegen->getCurrentOffs()), + jumps() + { compiler.loopInfo = this; } + + +Compiler::LoopInfo::~LoopInfo() throw() + { compiler.loopInfo = prev; } + + +void Compiler::LoopInfo::resolveJumps() +{ + for (memint i = 0; i < jumps.size(); i++) + compiler.codegen->resolveJump(jumps[i]); + jumps.clear(); +} + + +Compiler::Compiler(Context& c, Module* mod, buffifo* f) + : Parser(f), context(c), constStack(c.options.stackSize), + module(mod), scope(NULL), state(NULL), + loopInfo(NULL), returnInfo(NULL) { } + + +Compiler::~Compiler() + { } + + +Type* Compiler::getTypeAndIdent(str* ident) +{ + Type* type = NULL; + if (token == tokIdent) + { + *ident = strValue; + if (next() == tokAssign || isEos()) + goto ICantBelieveIUsedAGotoStatement; + undoIdent(*ident); + } + type = getTypeValue(/* true */); + *ident = getIdentifier(); + type = getTypeDerivators(type); +ICantBelieveIUsedAGotoStatement: + return type; +} + + +void Compiler::definition() +{ + str ident; + Type* type = getTypeAndIdent(&ident); + if (type && type->isState()) + state->addDefinition(ident, defTypeRef, PState(type), scope); + else + { + expect(tokAssign, "'='"); + variant value; + Type* valueType = getConstValue(type, value); + if (type == NULL) + type = valueType; + if (type->isAnyOrd() && !POrdinal(type)->isInRange(value.as_ord())) + error("Constant out of range"); + state->addDefinition(ident, type, value, scope); + skipEos(); + } +} + + +void Compiler::classDef() +{ + str ident = getIdentifier(); + skipLParen(); + State* type = cast(getStateDerivator(queenBee->defSelfStub, false)); + state->addDefinition(ident, defTypeRef, type, scope); +} + + +void Compiler::variable() +{ + str ident; + Type* type = getTypeAndIdent(&ident); + if (isEos()) + { + // Argument reclamation + if (!isStateScope()) + error("Argument reclamation not allowed here"); + Symbol* sym = state->findShallow(ident); + if (!sym->isArgVar()) + error("Only function arguments can be reclaimed"); + ArgVar* arg = PArgVar(sym); + if (type == NULL) + type = arg->type; + codegen->loadArgVar(arg); + InnerVar* var = state->reclaimArg(arg, type); + codegen->initInnerVar(var); + } + else + { + expect(tokAssign, "'='"); + expression(type); + if (type == NULL) + type = codegen->getTopType(); + if (type->isNullCont()) + error("Type undefined (null container)"); + if (isLocalScope()) + { + StkVar* var = PBlockScope(scope)->addStkVar(ident, type); + codegen->initStkVar(var); + } + else if (isStateScope()) + { + InnerVar* var = state->addInnerVar(ident, type); + codegen->initInnerVar(var); + } + else + notimpl(); + } + skipEos(); +} + + +void Compiler::statementList() +{ + while (!isBlockEnd() && !(isModuleScope() && eof())) + { + singleStatement(); + skipWsSeps(); + } +} + + +void Compiler::singleOrMultiBlock() +{ + if (skipIf(tokColon)) + { + skipWsSeps(); + singleStatement(); + } + else + { + skipMultiBlockBegin("':' or '{'"); + statementList(); + skipMultiBlockEnd(); + } +} + + +void Compiler::nestedBlock() +{ + AutoScope local(this); + singleOrMultiBlock(); + skipWsSeps(); + local.deinitLocals(); +} + + +void Compiler::singleStatement() +{ + if (skipIf(tokSemi)) + return; + + if (isStateScope() && returnInfo->topLevelReturned) + error("Statement after 'return'"); + if (context.options.lineNumbers) + codegen->linenum(getLineNum()); + + if (skipIf(tokDef)) + definition(); + else if (skipIf(tokClass)) + classDef(); + else if (skipIf(tokVar)) + variable(); + else if (skipIf(tokBegin)) + nestedBlock(); + else if (skipIf(tokIf)) + ifBlock(); + else if (skipIf(tokSwitch)) + switchBlock(); + else if (skipIf(tokWhile)) + whileBlock(); + else if (skipIf(tokFor)) + forBlock(); + else if (skipIf(tokContinue)) + doContinue(); + else if (skipIf(tokBreak)) + doBreak(); + else if (skipIf(tokReturn)) + doReturn(); + else if (skipIf(tokDel)) + doDel(); + else if (skipIf(tokIns)) + doIns(); + else if (token == tokAssert) + assertion(); + else if (token == tokDump) + dumpVar(); + else if (skipIf(tokExit)) + programExit(); + else + otherStatement(); + + codegen->endStatement(); +} + + +void Compiler::assertion() +{ + assert(token == tokAssert); + if (context.options.enableAssert) + { + integer ln = getLineNum(); + beginRecording(); + next(); + expression(NULL); + str s = endRecording(); + module->registerString(s); + if (!context.options.lineNumbers) + codegen->linenum(ln); + codegen->assertion(ln, s); + } + else + skipToEos(); + skipEos(); +} + + +void Compiler::dumpVar() +{ + assert(token == tokDump); + if (context.options.enableDump) + do + { + beginRecording(); + next(); + expression(NULL); + str s = endRecording(); + module->registerString(s); + codegen->dumpVar(s); + } + while (token == tokComma); + else + skipToEos(); + skipEos(); +} + + +void Compiler::programExit() +{ + expression(NULL); + codegen->programExit(); + skipEos(); +} + + +void Compiler::otherStatement() +{ + // TODO: pipes + memint stkLevel = codegen->getStackLevel(); + try + { + designator(NULL); + } + catch (evoidfunc&) + { + skipEos(); + return; + } + + if (skipIf(tokAssign)) + { + str storerCode = codegen->lvalue(); + expression(codegen->getTopType()); + codegen->assign(storerCode); + } + + else if (token >= tokAddAssign && token <= tokModAssign) + { + str storerCode = codegen->arithmLvalue(token); + next(); + expression(codegen->getTopType()); + codegen->assign(storerCode); + } + + else if (skipIf(tokCatAssign)) + { + codegen->catLvalue(); + expression(NULL); + codegen->catAssign(); + } + + else if (skipIf(tokPush)) + { + do + { + expression(NULL); + codegen->fifoPush(); + } + while (skipIf(tokPush)); + codegen->popValue(); + } + + // TODO: for fifoPull(): store the fifo in a local var so that designators can be + // parsed and assigned properly + + skipEos(); + + if (codegen->getStackLevel() == stkLevel + 1) + { + if (codegen->canDiscardValue()) + codegen->popValue(); + else + error("Unused value in statement"); + } + assert(codegen->getStackLevel() == stkLevel); +} + + +void Compiler::doIns() +{ + designator(NULL); + expect(tokAssign, "'='"); + str inserterCode = codegen->insLvalue(); + expression(NULL); + codegen->insAssign(inserterCode); + skipEos(); +} + + +void Compiler::doDel() +{ + designator(NULL); + codegen->deleteContainerElem(); + skipEos(); +} + + +void Compiler::ifBlock() +{ + expression(queenBee->defBool); + memint out = codegen->boolJumpForward(opJumpFalse); + nestedBlock(); + if (token == tokElif || token == tokElse) + { + memint t = codegen->jumpForward(); + codegen->resolveJump(out); + out = t; + if (skipIf(tokElif)) + ifBlock(); + else if (skipIf(tokElse)) + nestedBlock(); + } + codegen->resolveJump(out); +} + + +void Compiler::caseLabel(Type* ctlType) +{ + // Expects the case control variable to be the top stack element + expect(tokCase, "'case' or 'default'"); + caseValue(ctlType); + memint out = codegen->boolJumpForward(opJumpFalse); + nestedBlock(); + if (!isBlockEnd()) + { + memint t = codegen->jumpForward(); + codegen->resolveJump(out); + out = t; + if (skipIf(tokDefault)) + nestedBlock(); + else + caseLabel(ctlType); + } + codegen->resolveJump(out); +} + + +void Compiler::switchBlock() +{ + AutoScope local(this); + expression(NULL); + Type* ctlType = codegen->getTopType(); + local.addInitStkVar("__switch", ctlType); + skipMultiBlockBegin("'{'"); + caseLabel(ctlType); + local.deinitLocals(); + skipMultiBlockEnd(); +} + + +void Compiler::whileBlock() +{ + LoopInfo loop(*this); + expression(queenBee->defBool); + memint out = codegen->boolJumpForward(opJumpFalse); + nestedBlock(); + codegen->jump(loop.continueTarget); + codegen->resolveJump(out); + loop.resolveJumps(); +} + + +void Compiler::forBlockTail(StkVar* ctlVar, memint outJumpOffs, memint incJumpOffs) +{ + if (incJumpOffs >= 0) + codegen->resolveJump(incJumpOffs); + codegen->incStkVar(ctlVar); + codegen->jump(loopInfo->continueTarget); + codegen->resolveJump(outJumpOffs); + loopInfo->resolveJumps(); +} + + +void Compiler::forBlock() +{ + AutoScope local(this); + str ident = getIdentifier(); + str ident2; + if (skipIf(tokComma)) + ident2 = getIdentifier(); + expect(tokAssign, "'='"); + expression(NULL); + Type* iterType = codegen->getTopType(); + + // Simple integer range iteration + if (iterType->isAnyOrd()) + { + if (!ident2.empty()) + error("Key/value pair is not allowed for range loops"); + StkVar* ctlVar = local.addInitStkVar(ident, iterType); + { + LoopInfo loop(*this); + expect(tokRange, "'..'"); + expression(iterType); + codegen->stkVarCmp(ctlVar, opGreaterThan); + memint out = codegen->boolJumpForward(opJumpTrue); + nestedBlock(); + forBlockTail(ctlVar, out); + } + } + + // Vector iterator + else if (iterType->isAnyVec() || iterType->isNullCont()) + { + StkVar* vecVar = local.addInitStkVar(LOCAL_ITERATOR_NAME, iterType); + codegen->loadConst(queenBee->defInt, 0); + StkVar* ctlVar = local.addInitStkVar(ident, queenBee->defInt); + { + LoopInfo loop(*this); + codegen->stkVarCmpLength(ctlVar, vecVar); + memint out = codegen->boolJumpForward(opJumpTrue); + if (!ident2.empty()) + { + AutoScope inner(this); + if (iterType->isNullCont()) + { + // For a null container we don't know the element type, neither + // do we care, because this code below is never executed, however + // we want the ident2 variable to exist within the block + codegen->loadConst(defVoid, variant()); + inner.addInitStkVar(ident2, defVoid); + } + else + { + // TODO: optimize this? + codegen->loadStkVar(vecVar); + codegen->loadStkVar(ctlVar); + codegen->loadContainerElem(); + inner.addInitStkVar(ident2, PContainer(iterType)->elem); + } + nestedBlock(); + inner.deinitLocals(); + } + else + nestedBlock(); + forBlockTail(ctlVar, out); + } + } + + // Byte set and byte dict + else if (iterType->isByteSet() || iterType->isByteDict()) + { + if (iterType->isByteSet() && !ident2.empty()) + error("Key/value pair is not allowed for set loops"); + Container* contType = PContainer(iterType); + Ordinal* idxType = POrdinal(contType->index); + StkVar* contVar = local.addInitStkVar(LOCAL_ITERATOR_NAME, contType); + codegen->loadConst(idxType, idxType->left); + StkVar* ctlVar = local.addInitStkVar(ident, idxType); + { + LoopInfo loop(*this); + if (iterType->isByteSet()) + { + codegen->loadConst(idxType, idxType->right); + codegen->stkVarCmp(ctlVar, opGreaterThan); + } + else + codegen->stkVarCmpLength(ctlVar, contVar); + memint out = codegen->boolJumpForward(opJumpTrue); + // TODO: optimize this? + codegen->loadStkVar(ctlVar); + codegen->loadStkVar(contVar); + codegen->inCont(); + memint inc = codegen->boolJumpForward(opJumpFalse); + if (!ident2.empty()) // dict only + { + AutoScope inner(this); + // TODO: optimize this? + codegen->loadStkVar(contVar); + codegen->loadStkVar(ctlVar); + codegen->loadContainerElem(); + inner.addInitStkVar(ident2, contType->elem); + nestedBlock(); + inner.deinitLocals(); + } + else + nestedBlock(); + forBlockTail(ctlVar, out, inc); + } + } + + // Other sets and dictionaries + else if (iterType->isAnySet() || iterType->isAnyDict()) + { + if (iterType->isAnySet() && !ident2.empty()) + error("Key/value pair is not allowed for set loops"); + Container* contType = PContainer(iterType); + StkVar* contVar = local.addInitStkVar(LOCAL_ITERATOR_NAME, iterType); + codegen->loadConst(queenBee->defInt, 0); + StkVar* idxVar = local.addInitStkVar(LOCAL_INDEX_NAME, queenBee->defInt); + { + LoopInfo loop(*this); + codegen->stkVarCmpLength(idxVar, contVar); + memint out = codegen->boolJumpForward(opJumpTrue); + { + AutoScope inner(this); + codegen->loadStkVar(contVar); + codegen->loadStkVar(idxVar); + codegen->loadKeyByIndex(); + inner.addInitStkVar(ident, contType->index); + if (!ident2.empty()) // dict only + { + codegen->loadStkVar(contVar); + codegen->loadStkVar(idxVar); + codegen->loadDictElemByIndex(); + inner.addInitStkVar(ident2, contType->elem); + } + nestedBlock(); + inner.deinitLocals(); + } + forBlockTail(idxVar, out); + } + } + + else + error("Invalid iterator type in 'for' statement"); + local.deinitLocals(); +} + + +void Compiler::doContinue() +{ + if (loopInfo == NULL) + error("'continue' not within loop"); + codegen->deinitFrame(loopInfo->stackLevel); + codegen->jump(loopInfo->continueTarget); + skipEos(); +} + + +void Compiler::doBreak() +{ + if (loopInfo == NULL) + error("'break' not within loop"); + codegen->deinitFrame(loopInfo->stackLevel); + loopInfo->jumps.push_back(codegen->jumpForward()); + skipEos(); +} + + +void Compiler::doReturn() +{ + if (!state->isCtor && state->prototype->returns && !isEos()) + { + expression(state->prototype->returnType); + codegen->storeResultVar(); + } + skipEos(); + if (isStateScope()) + // Don't generate a jump at the end of a function body (and make sure + // there are no statements beyond this point) + returnInfo->topLevelReturned = true; + else + { +#ifdef DEBUG + codegen->deinitFrame(state->varCount); +#endif + returnInfo->jumps.push_back(codegen->jumpForward()); + } +} + + +void Compiler::stateBody(State* newState) +{ + CodeGen newCodeGen(*newState->getCodeSeg(), module, newState, false); + CodeGen* saveCodeGen = exchange(codegen, &newCodeGen); + State* saveState = exchange(state, newState); + Scope* saveScope = exchange(scope, cast(newState)); + try + { + ReturnInfo ret(*this); + codegen->prolog(); + singleOrMultiBlock(); + ret.resolveJumps(); + codegen->epilog(); + } + catch (exception&) + { + scope = saveScope; + state = saveState; + codegen = saveCodeGen; + throw; + } + scope = saveScope; + state = saveState; + codegen = saveCodeGen; + newState->setComplete(); + module->registerCodeSeg(newState->getCodeSeg()); +} + + +void Compiler::compileModule() +{ + // The system module is always added implicitly + module->addUsedModule(queenBee); + // Start parsing and code generation + scope = state = module; + CodeGen mainCodeGen(*module->getCodeSeg(), module, state, false); + codegen = &mainCodeGen; + loopInfo = NULL; + try + { + try + { + ReturnInfo ret(*this); + codegen->prolog(); + next(); + skipWsSeps(); + statementList(); + expect(tokEof, "End of file"); + ret.resolveJumps(); + codegen->epilog(); + } + catch (EDuplicate& e) + { + strValue.clear(); // don't need the " near..." part in error message + error("'" + e.ident + "' is already defined within this scope"); + } + catch (EUnknownIdent& e) + { + strValue.clear(); // don't need the " near..." part in error message + error("'" + e.ident + "' is unknown in this context"); + } + } + catch (exception& e) + { + str s; + if (!getFileName().empty()) + { + s += getFileName() + '(' + to_string(getLineNum()) + ')'; + if (!strValue.empty() || token == tokStrValue) // may be an empty string literal + s += " near '" + to_displayable(to_printable(strValue)) + '\''; + s += ": "; + } + s += e.what(); + error(s); + } + module->setComplete(); + module->registerCodeSeg(module->getCodeSeg()); +} + diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..d451fee --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,127 @@ +#ifndef __COMPILER_H +#define __COMPILER_H + +#include "parser.h" +#include "typesys.h" + + +class Compiler: public Parser +{ + friend class Context; + friend class AutoScope; + + struct AutoScope: public BlockScope + { + Compiler* compiler; + + AutoScope(Compiler* c) throw(); + ~AutoScope() throw(); + StkVar* addInitStkVar(const str&, Type*); + }; + + struct ReturnInfo + { + Compiler& compiler; + ReturnInfo* prev; + bool topLevelReturned; + podvec jumps; + ReturnInfo(Compiler&) throw(); + ~ReturnInfo() throw(); + void resolveJumps(); + }; + + struct LoopInfo + { + Compiler& compiler; + LoopInfo* prev; + memint stackLevel; + memint continueTarget; + podvec jumps; + LoopInfo(Compiler& c) throw(); + ~LoopInfo() throw(); + void resolveJumps(); + }; + +public: + Context& context; + rtstack constStack; + Module* const module; + CodeGen* codegen; + Scope* scope; // for looking up symbols, can be local or state scope + State* state; // for this-vars, type objects and definitions + LoopInfo* loopInfo; + ReturnInfo* returnInfo; + + bool isLocalScope() const + { return scope != state; } + bool isStateScope() const + { return scope == state; } + bool isModuleScope() const + { return scope == module; } + + // in compexpr.cpp + Type* getStateDerivator(Type*, bool allowProto); + Type* getTypeDerivators(Type*); + Type* getEnumeration(const str& firstIdent); + void builtin(Builtin*, bool skipFirst = false); + void identifier(str); + void dotIdentifier(str); + void vectorCtor(Type* type); + void fifoCtor(Type* type); + void dictCtor(Type* type); + void typeOf(); + void ifFunc(); + void actualArgs(FuncPtr*, bool skipFirst = false); + void atom(Type*); + void designator(Type*); + void factor(Type*); + void concatExpr(Container*); + void term(); + void arithmExpr(); + void relation(); + void notLevel(); + void andLevel(); + void orLevel(); + void expression(Type*); + Type* getConstValue(Type* resultType, variant& result); + Type* getTypeValue(); + + // in compiler.cpp + Type* getTypeAndIdent(str* ident); + void definition(); + void classDef(); + void variable(); + void assertion(); + void dumpVar(); + void programExit(); + void otherStatement(); + void doDel(); + void doIns(); + void singleOrMultiBlock(); + void nestedBlock(); + void singleStatement(); + void statementList(); + void ifBlock(); + void caseValue(Type*); + void caseLabel(Type*); + void switchBlock(); + void whileBlock(); + void forBlockTail(StkVar*, memint outJumpOffs, memint incJumpOffs = -1); + void forBlock(); + void doContinue(); + void doBreak(); + void doReturn(); + void stateBody(State*); + + void compileModule(); + + Compiler(Context&, Module*, buffifo*); + ~Compiler(); +}; + + +#define LOCAL_ITERATOR_NAME "__iter" +#define LOCAL_INDEX_NAME "__idx" + + +#endif // __COMPILER_H diff --git a/src/main-ut.cpp b/src/main-ut.cpp new file mode 100644 index 0000000..7dead28 --- /dev/null +++ b/src/main-ut.cpp @@ -0,0 +1,750 @@ + +#include "common.h" +#include "runtime.h" +#include "parser.h" +#include "typesys.h" +#include "vm.h" +#include "compiler.h" + + +static void ut_fail(unsigned line, const char* e) +{ + fprintf(stderr, "%s:%u: test failed `%s'\n", __FILE__, line, e); + exit(200); +} + +#define fail(e) ut_fail(__LINE__, e) +#define check(e) { if (!(e)) fail(#e); } + +#define check_throw(a) \ + { bool chk_throw = false; try { a; } catch(exception&) { chk_throw = true; } check(chk_throw); } + + +#define XSTR(s) _STR(s) +#define _STR(s) #s + +#ifdef SHN_64 +# define INTEGER_MAX_STR "9223372036854775807" +# define INTEGER_MAX_STR_PLUS "9223372036854775808" +# define INTEGER_MIN_STR "-9223372036854775808" +#else +# define INTEGER_MAX_STR "2147483647" +# define INTEGER_MAX_STR_PLUS "2147483648" +# define INTEGER_MIN_STR "-2147483648" +#endif + + +static void test_common() +{ + int i = 1; + check(pincrement(&i) == 2); + check(pdecrement(&i) == 1); +} + + +struct testobj: public object +{ + testobj() { } +}; + + +static void test_object() +{ + { + object* b = (new testobj())->grab(); + check(b->isunique()); + object* c = b->grab(); + check(!b->isunique()); + c->release(); + check(b->isunique()); + b->release(); + b = (new testobj())->grab(); + b->release(); + } + { + objptr p3 = new testobj(); + objptr p4 = p3; + check(!p3.empty()); + check(!p4.empty()); + } +} + + +static void test_ordset() +{ + ordset s1; + check(s1.empty()); + s1.find_insert(129); + check(s1.find(129)); + check(!s1.find(1)); + check(!s1.empty()); + ordset s2 = s1; + check(s2.find(129)); + check(!s2.find(1)); + check(!s2.empty()); + s1.find_erase(129); + check(s1.empty()); + check(!s2.empty()); + ordset s3; + s3 = s1; +} + + +void test_bytevec() +{ + // TODO: check the number of reallocations + bytevec c1; + check(c1.begin() == NULL); + check(c1.empty()); + check(c1._isunique()); + check(c1.size() == 0); + check(c1.capacity() == 0); + + bytevec c2("ABC", 3); + check(!c2.empty()); + check(c2.size() == 3); + check(c2.capacity() == 3); + + check(c1._isunique()); + check(c2._isunique()); + c1 = c2; + check(!c2._isunique()); + check(!c1._isunique()); + c2.clear(); + check(c1._isunique()); + check(c2._isunique()); + check(c2.empty()); + check(!c1.empty()); + check(c1.size() == 3); + c1 = c1; + + c2 = c1; + check(!c2._isunique()); + check(!c1._isunique()); + *c2.atw(0) = 'a'; + check(c2._isunique()); + check(c1._isunique()); + check(c2.data()[0] == 'a'); + check(c1.data()[0] == 'A'); + *c2.atw(0) = 'A'; + check(c2.data()[0] == 'A'); + + bytevec c2a("", 0); + check(c2a.begin() == NULL); + check(c2a.empty()); + check(c2a.size() == 0); + check(c2a.capacity() == 0); + + bytevec c3("DEFG", 4); + c1.insert(3, c3); + check(c1._isunique()); + check(c1.size() == 7); + check(c1.capacity() > 7); + check(memcmp(c1.data(), "ABCDEFG", 7) == 0); + + bytevec c4 = c1; + check(!c1._isunique()); + c1.insert(3, "ab", 2); + check(c1._isunique()); + check(c1.size() == 9); + check(c1.capacity() == 9); + check(memcmp(c1.data(), "ABCabDEFG", 9) == 0); + c1.insert(0, "@", 1); + check(c1._isunique()); + check(c1.size() == 10); + check(memcmp(c1.data(), "@ABCabDEFG", 10) == 0); + c1.insert(10, "0123456789", 10); + check(c1.size() == 20); + check(memcmp(c1.data(), "@ABCabDEFG0123456789", 20) == 0); + + c2.append(c2); + check(memcmp(c2.data(), "ABCABC", 6) == 0); + check(c2.size() == 6); + c2.append("abcd", 4); + check(c2.size() == 10); + check(memcmp(c2.data(), "ABCABCabcd", 10) == 0); + c4 = c2; + check(!c2._isunique()); + c2.append(c3); + check(c2._isunique()); + check(c2.size() == 14); + check(memcmp(c2.data(), "ABCABCabcdDEFG", 14) == 0); + + c1.erase(4, 2); + check(memcmp(c1.data(), "@ABCDEFG0123456789", 18) == 0); + c4 = c1; + c1.erase(8, 5); + check(c1.size() == 13); + check(memcmp(c1.data(), "@ABCDEFG56789", 13) == 0); + c1.erase(8, 5); + check(c1.size() == 8); + check(memcmp(c1.data(), "@ABCDEFG", 8) == 0); + + c1.pop(2); + check(c1.size() == 6); + check(memcmp(c1.data(), "@ABCDE", 6) == 0); + c1.pop(2); + check(c1.size() == 4); + c1.pop(4); + check(c1.empty()); + + c1.append("@AB", 3); + check(c1.size() == 3); + check(memcmp(c1.data(), "@AB", 3) == 0); + c1.resize(6, '!'); + check(c1.size() == 6); + check(memcmp(c1.data(), "@AB!!!", 3) == 0); + c1.resize(0); + check(c1.empty()); + check(c1.begin() == NULL); +} + + +void test_string() +{ + str s1; + check(s1.empty()); + check(s1.size() == 0); + check(s1.c_str()[0] == 0); + check(s1.begin() == NULL); + str s2 = "Kuku"; + check(!s2.empty()); + check(s2.size() == 4); + check(s2.capacity() == 4); + check(s2 == "Kuku"); + str s3 = s1; + check(s3.empty()); + str s4 = s2; + check(s4 == s2); + check(s4 == "Kuku"); + check(!s4._isunique()); + check(!s2._isunique()); + str s5 = "!"; + check(s5.size() == 1); + check(s5.c_str()[0] == '!'); + check(s5.c_str()[1] == 0); + check(s5.size() == 1); + str s6 = ""; + check(s6.empty()); + check(s6.c_str()[0] == 0); + s6 = s5; + check(s6 == s5); + s5 = s6; + check(s6 == "!"); + s4 = "Mumu"; + check(s4 == "Mumu"); + check(*s4.data(2) == 'm'); + + str s7 = "ABC"; + s7 += "DEFG"; + check(s7.size() == 7); + check(s7 == "ABCDEFG"); + s7 += "HIJKL"; + check(s7.size() == 12); + check(s7 == "ABCDEFGHIJKL"); + check(s7.back() == 'L'); + s7 += s4; + check(s7.size() == 16); + check(s7 == "ABCDEFGHIJKLMumu"); + s1 += "Bubu"; + check(s1 == "Bubu"); + check(s1.size() == 4); + s1.append("Tutu", 4); + check(s1.size() == 8); + check(s1 == "BubuTutu"); + s1.erase(2, 4); + check(s1.size() == 4); + check(s1 == "Butu"); + check(s1.substr(1, 2) == "ut"); + check(s1.substr(1) == "utu"); + check(s1.substr(0) == "Butu"); + + check(s1.find('u') == 1); + check(s1.find('v') == str::npos); + check(s1.rfind('u') == 3); + check(s1.rfind('t') == 2); + check(s1.rfind('B') == 0); + check(s1.rfind('v') == str::npos); + + s1.clear(); + check(s1.begin() == NULL); + + str s8; + s8 += ""; + check(s8 == ""); + check(s8.empty()); + s8 += "ABC"; + check(s8 == "ABC"); + s8.clear(); + s8.clear(); + s8.insert(0, "DEF"); + check(s8 == "DEF"); +} + + +static void test_strutils() +{ + // string conversion + check(to_string(integer(0)) == "0"); + check(to_string(integer(-1)) == "-1"); + check(to_string(INTEGER_MAX) == INTEGER_MAX_STR); + check(to_string(INTEGER_MIN) == INTEGER_MIN_STR); + check(to_string(1, 10, 4, '0') == "0001"); + check(to_string(integer(123456789)) == "123456789"); + check(to_string(-123, 10, 7, '0') == "-000123"); + check(to_string(0xabcde, 16, 6) == "0ABCDE"); + + bool e = true, o = true; + check(from_string("0", &e, &o) == 0); + check(!o); check(!e); + check(from_string(INTEGER_MAX_STR, &e, &o) == INTEGER_MAX); + check(from_string(INTEGER_MAX_STR_PLUS, &e, &o) == uinteger(INTEGER_MAX) + 1); + check(from_string("92233720368547758070", &e, &o) == 0); check(o); + check(from_string("-1", &e, &o) == 0 ); check(e); + check(from_string("89abcdef", &e, &o, 16) == 0x89abcdef); + check(from_string("afg", &e, &o, 16) == 0); check(e); + + check(remove_filename_path("/usr/bin/true") == "true"); + check(remove_filename_path("usr/bin/true") == "true"); + check(remove_filename_path("/true") == "true"); + check(remove_filename_path("true") == "true"); + check(remove_filename_path("c:\\Windows\\false") == "false"); + check(remove_filename_path("\\Windows\\false") == "false"); + check(remove_filename_path("Windows\\false") == "false"); + check(remove_filename_path("\\false") == "false"); + check(remove_filename_path("false") == "false"); + + check(remove_filename_ext("/usr/bin/true.exe") == "/usr/bin/true"); + check(remove_filename_ext("true.exe") == "true"); + check(remove_filename_ext("true") == "true"); + + check(to_printable('a') == "a"); + check(to_printable('\\') == "\\\\"); + check(to_printable('\'') == "\\'"); + check(to_printable('\x00') == "\\x00"); + check(to_printable('\x7f') == "\\x7F"); + check(to_printable("abc \x01'\\") == "abc \\x01\\'\\\\"); +} + + +void test_podvec() +{ + podvec v1; + check(v1.empty()); + podvec v2 = v1; + check(v1.empty() && v2.empty()); + v1.push_back(10); + v1.push_back(20); + v1.push_back(30); + v1.push_back(40); + check(v1.size() == 4); + check(v2.empty()); + check(v1[0] == 10); + check(v1[1] == 20); + check(v1[2] == 30); + check(v1[3] == 40); + v2 = v1; + check(!v1._isunique() && !v2._isunique()); + check(v2.size() == 4); + v1.erase(2); + check(v1.size() == 3); + check(v2.size() == 4); + check(v1[0] == 10); + check(v1[1] == 20); + check(v1[2] == 40); + v1.erase(2); + check(v1.size() == 2); + v1.insert(0, 50); + check(v1.size() == 3); + check(v1[0] == 50); + check(v1[1] == 10); + check(v1[2] == 20); + v2.clear(); + check(v2.empty()); + check(!v1.empty()); + check(v1.back() == 20); + int t; + v1.pop_back(t); + check(t == 20); +} + + +static void test_vector() +{ + vector v1; + v1.push_back("ABC"); + check(v1[0] == "ABC"); + vector v2 = v1; + check(v2[0] == "ABC"); + v1.push_back("DEF"); + v1.push_back("GHI"); + v1.push_back("JKL"); + vector v3 = v1; + check(v1.size() == 4); + check(v2.size() == 1); + check(v3.size() == 4); + str s1 = "ABC"; + check(v1[0] == s1); + check(v1[1] == "DEF"); + check(v1[2] == "GHI"); + check(v1[3] == "JKL"); + v1.erase(2); + check(v1[0] == "ABC"); + check(v1[1] == "DEF"); + check(v1[2] == "JKL"); + check(v1.back() == "JKL"); + v3 = v1; + v1.replace(2, "MNO"); + check(v1[2] == "MNO"); + check(v3[2] == "JKL"); +} + + +static void test_dict() +{ + dict d1; + d1.find_replace("three", 3); + d1.find_replace("one", 1); + d1.find_replace("two", 2); + check(d1.size() == 3); + check(d1.at(0).key == "one"); + check(d1.at(1).key == "three"); + check(d1.key(2) == "two"); + check(d1.at(0).value == 1); + check(d1.value(1) == 3); + check(d1.value(2) == 2); + dict d2 = d1; + d1.find_erase("three"); + check(d1.size() == 2); + check(d1.key(0) == "one"); + check(d1.key(1) == "two"); + check(*d1.find("one") == 1); + check(d1.find_key("one")); + check(d1.find("three") == NULL); + check(!d1.find_key("three")); + dict d3; + d3 = d2; + check(d2 == d3); + d3.replace(0, 0); + check(d2 != d3); + check(d2.size() == 3); +} + + +static void test_set() +{ + vector s1; + check(s1.find_insert("GHI")); + check(s1.find_insert("ABC")); + check(s1.find_insert("DEF")); + check(!s1.find_insert("ABC")); + check(s1.size() == 3); + check(s1[0] == "ABC"); + check(s1[1] == "DEF"); + check(s1[2] == "GHI"); + s1.find_erase("DEF"); + check(s1.size() == 2); + check(s1[0] == "ABC"); + check(s1[1] == "GHI"); + check(s1.find("GHI")); +} + + +static void test_symtbl() +{ + symtbl_impl s1; + objptr p1 = new symbol("abc"); + s1.insert(0, p1.get()); + check(s1[0] == p1.get()); + check(s1.at(0) == p1.get()); + check(s1.back() == p1.get()); +} + + +void test_variant() +{ + { + variant v1; + check(v1.is(variant::VOID)); + } + { + variant v1 = variant::null; + check(v1.is_null()); + variant v2 = v1; + check(v2.is_null()); + variant v3; + v3 = v2; + check(v3.is_null()); + } + { + variant v1 = 10; check(v1.as_ord() == 10); + variant v2 = v1; check(v2.as_ord() == 10); + variant v3; v3 = v2; check(v3.as_ord() == 10); + } + { + variant v1 = "abc"; check(v1.as_str() == "abc"); + variant v2 = v1; check(v2.as_str() == "abc"); + variant v3; v3 = v2; check(v3.as_str() == "abc"); + str s = "def"; + variant v4 = s; check(v4.as_str() == "def"); + v4 = 20; check(v4.as_ord() == 20); + } + + { +// variant v1 = range(20, 50); check(v1.is(variant::RANGE)); check(v1.as_range().equals(20, 50)); +// variant v2 = v1; check(v2.is(variant::RANGE)); check(v2.as_range().equals(20, 50)); +// variant v3; v3 = v1; check(v3.is(variant::RANGE)); check(v3.as_range().equals(20, 50)); +// check(v2 == v3 && v1 == v3); + } + + { + variant v1 = varvec(); + check(v1.is(variant::VEC)); + variant v2 = v1; + check(v1 == v2); + v2.as_vec().push_back("ABC"); + check(v2.as_vec()[0].as_str() == "ABC"); + check(v1 != v2); + v1 = 20; + v1 = v2; + v2 = 30; + v1 = v2; + } + { + variant v1 = ordset(); check(v1.is(variant::ORDSET)); + } + { + variant v1 = vardict(); check(v1.is(variant::DICT)); + } + { + variant v1 = range(0, 10); + variant v2 = range(0, 20); + check(v1.as_range().left() == 0); + check(v1.as_range().right() == 10); + check(v2.as_range().left() == 0); + check(v2.as_range().right() == 20); + check(v1.compare(v2) == -1); + } +} + + +static void test_bidir_char_fifo(fifo& fc) +{ + check(fc.is_char_fifo()); + fc.enq("0123456789abcdefghijklmnopqrstuvwxy"); + fc.var_enq(variant('z')); + fc.var_enq(variant("./")); + check(fc.preview() == '0'); + check(fc.get() == '0'); + variant v; + fc.var_deq(v); + check(v.as_uchar() == '1'); + v.clear(); + fc.var_preview(v); + check(v.as_uchar() == '2'); + check(fc.deq(16) == "23456789abcdefgh"); + check(fc.deq(memfifo::CHAR_ALL) == "ijklmnopqrstuvwxyz./"); + check(fc.empty()); + + fc.enq("0123456789"); + fc.enq("abcdefghijklmnopqrstuvwxyz"); + check(fc.get() == '0'); + while (!fc.empty()) + fc.deq(fifo::CHAR_SOME); + + fc.enq("0123456789abcdefghijklmnopqrstuvwxyz"); + check(fc.deq("0-9") == "0123456789"); + check(fc.deq("a-z") == "abcdefghijklmnopqrstuvwxyz"); + check(fc.empty()); +} + + +static void test_fifos() +{ +#ifdef DEBUG + memfifo::CHUNK_SIZE = 2 * sizeof(variant); +#endif + + memfifo f(NULL, false); + varvec t; + t.push_back(0); + f.var_enq(t); + f.var_enq("abc"); + f.var_enq("def"); + variant w = varset(); + f.var_enq(w); + // f.dump(std::cout); std::cout << std::endl; + variant x; + f.var_deq(x); + check(x.is(variant::VEC)); + f.var_deq(w); + check(w.is(variant::STR)); + f.var_eat(); + variant vr; + f.var_preview(vr); + check(vr.is(variant::SET)); + + memfifo fc(NULL, true); + test_bidir_char_fifo(fc); + + strfifo fs(NULL); + test_bidir_char_fifo(fs); +} + + +static void test_parser() +{ + { + Parser p(new strfifo(NULL, + INTEGER_MAX_STR"\n "INTEGER_MAX_STR_PLUS"\n if\n aaa" + " 'asd\n'[\\t\\r\\n\\x41\\\\]' '\\xz'")); + check(p.next() == tokIntValue); + check(p.intValue == INTEGER_MAX); + check(p.getLineNum() == 1); + check(p.next() == tokSep); + check(p.getLineNum() == 1); + check(p.next() == tokIntValue); + check(p.getLineNum() == 2); + check(p.next() == tokSep); + check(p.getLineNum() == 2); + check(p.next() == tokIf); + check(p.getLineNum() == 3); + check(p.next() == tokSep); + check(p.getLineNum() == 3); + check(p.next() == tokIdent); + check_throw(p.next()); // unexpected end of line + check(p.next() == tokStrValue); + check(p.strValue == "[\t\r\nA\\]"); + check_throw(p.next()); // bad hex sequence + } + { +#ifdef XCODE + const char* filePath = "../../src/tests/stmtest.txt"; +#else + const char* filePath = "tests/stmtest.txt"; +#endif +#ifdef DEBUG + intext::BUF_SIZE = 16; +#endif + intext f(NULL, filePath); + f.deq(11); + InputRecorder rec; + f.set_bufevent(&rec); + f.deq(32); + f.set_bufevent(NULL); + check(rec.data == "careful with terms like readable"); // Yep! + } +} + + +void test_typesys() +{ +/* + { + State state(Type::MODULE, NULL, NULL); + objptr d1 = new Definition("abc", NULL, 0); + check(d1->name == "abc"); + state.addDefinition("def", NULL, 1, &state); + state.addDefinition("ghi", NULL, 2, &state); + Symbol* s = state.find("def"); + check(s != NULL); + check(s->isDef()); + check(s->name == "def"); + state.findShallow("def"); + } +*/ + strfifo s(NULL); + queenBee->defBool->dump(s); + check(s.all() == "(enum false, true)"); + check(defTypeRef->isTypeRef()); + check(defTypeRef->getType() == defTypeRef); + check(defVoid->isVoid()); + check(defVoid->getType() == defTypeRef); + check(queenBee->defInt->isInt()); + check(queenBee->defInt->getType() == defTypeRef); + check(queenBee->defInt->isAnyOrd()); + check(queenBee->defBool->isBool()); + check(queenBee->defBool->getType() == defTypeRef); + check(queenBee->defBool->isEnum()); + check(queenBee->defBool->isAnyOrd()); + check(queenBee->defBool->left == 0 && queenBee->defBool->right == 1); + check(queenBee->defChar->isChar()); + check(queenBee->defChar->getType() == defTypeRef); + check(queenBee->defChar->isAnyOrd()); + check(queenBee->defStr->hasByteElem()); + check(queenBee->defStr->getType() == defTypeRef); + check(queenBee->defStr->isAnyVec()); + + Symbol* b = queenBee->find("true"); + check(b != NULL && b->isDef()); + check(PDefinition(b)->value.as_ord() == 1); + check(PDefinition(b)->type->isBool()); +} + + +int main() +{ + sio << "short: " << sizeof(short) << " long: " << sizeof(long) + << " long long: " << sizeof(long long) << " int: " << sizeof(int) + << " void*: " << sizeof(void*) << " float: " << sizeof(float) + << " double: " << sizeof(double) << '\n'; + sio << "integer: " << sizeof(integer) << " memint: " << sizeof(memint) + << " real: " << sizeof(real) << " variant: " << sizeof(variant) + << " object: " << sizeof(object) << " rtobject: " << sizeof(rtobject) << '\n'; + sio << "stateobj: " << sizeof(stateobj) << " Type: " << sizeof(Type) + << " State: " << sizeof(State) << " opcodes: " << opMaxCode << '\n'; + + check(sizeof(memint) == sizeof(void*)); + check(sizeof(memint) == sizeof(size_t)); + +#ifdef SHN_64 + check(sizeof(integer) == 8); + check(sizeof(variant) <= 16); +#else + check(sizeof(integer) == 4); + check(sizeof(variant) <= 12); +#endif + + initRuntime(); + initTypeSys(); + initVm(); + + int exitcode = 0; + try + { + test_common(); + test_object(); + test_ordset(); + test_bytevec(); + test_string(); + test_strutils(); + test_podvec(); + test_vector(); + test_dict(); + test_set(); + test_symtbl(); + test_variant(); + test_fifos(); + test_parser(); +// test_typesys(); +// test_codegen(); + } + catch (exception& e) + { + fprintf(stderr, "Exception: %s\n", e.what()); + exitcode = 201; + } + + doneVm(); + doneTypeSys(); + doneRuntime(); + + if (object::allocated != 0) + { + fprintf(stderr, "Error: object::allocated = %d\n", object::allocated); + exitcode = 202; + } + + return exitcode; +} + diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..11e1ffc --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,87 @@ + +#include "common.h" +#include "runtime.h" +#include "parser.h" +#include "typesys.h" +#include "vm.h" +#include "compiler.h" + + +// ------------------------------------------------------------------------- // + + +#ifdef XCODE + const char* filePath = "../../src/tests/test.shn"; +#else + const char* filePath = "tests/test.shn"; +#endif + + +int main() +{ + sio << "Shannon " << SHANNON_VERSION_MAJOR << '.' << SHANNON_VERSION_MINOR << '.' << SHANNON_VERSION_FIX + << " (int" << sizeof(integer) * 8 << ')' + << ' ' << SHANNON_COPYRIGHT << endl << endl; + + int exitcode = 0; + + initRuntime(); + initTypeSys(); + initVm(); + + { + Context context; + + try + { + // context.options.setDebugOpts(false); + // context.options.compileOnly = true; + context.loadModule(filePath); + } + catch (exception& e) + { + serr << "Error: " << e.what() << endl; + exitcode = 201; + } + + if (exitcode == 0) + { + try + { + variant result = context.execute(); + if (result.is_null()) + exitcode = 0; + else if (result.is(variant::ORD)) + exitcode = int(result._int()); + else if (result.is(variant::STR)) + { + serr << result._str() << endl; + exitcode = 102; + } + else + exitcode = 103; + } + catch (exception& e) + { + serr << "Runtime error: " << e.what() << endl; + exitcode = 104; + } + } + } + + doneVm(); + doneTypeSys(); + doneRuntime(); + +#ifdef DEBUG + // TODO: make this a compiler option + if (object::allocated != 0) + { + fprintf(stderr, "object::allocated: %d\n", object::allocated); + _fatal(0xff01); + } +#endif + + return exitcode; +} + diff --git a/src/opcodes.sh b/src/opcodes.sh new file mode 100644 index 0000000..7bc2fa0 --- /dev/null +++ b/src/opcodes.sh @@ -0,0 +1,8 @@ + +# Make sure declaration of opcdes is in sync with the implementation in vm.cpp; +# display diffferences if any + +grep '^ *op[A-Za-z0-9]*,' vm.h|sed 's/^ *//;s/,.*$//' > OPS.decl +grep '^ *case *op[A-Za-z0-9]*:' vm.cpp|sed 's/^ *case *//;s/:.*$//' > OPS.impl +diff OPS.decl OPS.impl +rm OPS.impl OPS.decl diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..0ae6dcf --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,520 @@ + +#include "parser.h" + + +static class Keywords +{ + struct kwinfo { const char* kw; Token token; }; + static kwinfo keywords[]; + + int count; + + bool bsearch(const char* key, int& idx) + { + idx = 0; + int low = 0; + int high = count - 1; + while (low <= high) + { + idx = (low + high) / 2; + int comp = strcmp(keywords[idx].kw, key); + if (comp < 0) + low = idx + 1; + else if (comp > 0) + high = idx - 1; + else + return true; + } + idx = low; + return false; + } + +public: + Keywords() + { + for (kwinfo* k = keywords; k->kw != NULL; k++) + { +#ifdef DEBUG + if (count > 0) + if (strcmp(k->kw, (k - 1)->kw) <= 0) + fatal(0x4001, "Keyword verification failed"); +#endif + count++; + } + } + + Token find(const char* s) + { + int index; + if (bsearch(s, index)) + return keywords[index].token; + else + return tokUndefined; + } + +} keywords; + + +Keywords::kwinfo Keywords::keywords[] = + { + // NOTE: this list must be kept in sorted order + {"and", tokAnd}, + {"as", tokAs}, + {"assert", tokAssert}, + {"begin", tokBegin}, + {"break", tokBreak}, + {"case", tokCase}, + {"class", tokClass}, + {"const", tokConst}, + {"continue", tokContinue}, + {"def", tokDef}, + {"default", tokDefault}, + {"del", tokDel}, + {"dump", tokDump}, + {"elif", tokElif}, + {"else", tokElse}, + {"exit", tokExit}, + {"for", tokFor}, + {"if", tokIf}, + {"in", tokIn}, + {"ins", tokIns}, + {"is", tokIs}, + {"not", tokNot}, + {"or", tokOr}, + {"return", tokReturn}, + {"shl", tokShl}, + {"shr", tokShr}, + {"switch", tokSwitch}, + {"this", tokThis}, + {"typeof", tokTypeOf}, + {"var", tokVar}, + {"while", tokWhile}, + {"xor", tokXor}, + {NULL, tokUndefined} + }; + + +InputRecorder::InputRecorder() + : buf(NULL), offs(0), prevpos(0) { } + +InputRecorder::~InputRecorder() throw() + { } + +void InputRecorder::event(char* newbuf, memint newtail, memint) +{ + if (newbuf == buf && newtail > offs) + { + data.append(buf + offs, newtail - offs); + offs = newtail; + } + else { + buf = newbuf; + offs = newtail; + } +} + + +void InputRecorder::clear() +{ + buf = NULL; + offs = 0; + prevpos = 0; + data.clear(); +} + + +Parser::Parser(buffifo* inp) + : input(inp), linenum(1), + prevIdent(), saveToken(tokUndefined), + token(tokUndefined), strValue(), intValue(0) { } + + +Parser::~Parser() + { } + + +void Parser::error(const str& msg) + { throw emessage(msg); } + + +void Parser::error(const char* msg) + { error(str(msg)); } + + +str Parser::errorLocation() const +{ + str msg; + if (!strValue.empty()) + msg += " near '" + to_displayable(to_printable(strValue)) + "'"; + return msg; +} + + +const charset wsChars = "\t "; +const charset identFirst = "A-Za-z_"; +const charset identRest = "0-9A-Za-z_"; +const charset digits = "0-9"; +const charset printableChars = "~20-~7E~81-~FE"; +const charset commentChars = printableChars + wsChars; + + +inline bool is_eol_char(char c) + { return c == '\n' || c == '\r'; } + + +void Parser::skipWs() + { input->skip(wsChars); } + + +void Parser::skipEol() +{ + assert(input->eol()); + input->skip_eol(); + linenum++; +} + + +void Parser::parseStringLiteral() +{ + static const charset stringChars = printableChars - charset("'\\"); + static const charset hexDigits = "0-9A-Fa-f"; + strValue.clear(); + while (true) + { + strValue += input->token(stringChars); + if (input->eof()) + error("Unexpected end of file in string literal"); + char c = input->get(); + if (is_eol_char(c)) + error("Unexpected end of line in string literal"); + if (c == '\'') + return; + else if (c == '\\') + { + switch (c = input->get()) + { + case 't': strValue += '\t'; break; + case 'r': strValue += '\r'; break; + case 'n': strValue += '\n'; break; + case 'x': + { + str s; + if (hexDigits[input->preview()]) + { + s += input->get(); + if (hexDigits[input->preview()]) + s += input->get(); + bool e, o; + ularge value = from_string(s.c_str(), &e, &o, 16); + strValue += char(value); + } + else + error("Bad hex sequence"); + } + break; + default: strValue += c; break; + } + } + else + error("Illegal character in string literal " + to_printable(c)); + } +} + + +void Parser::skipMultilineComment() +{ + static const charset skipChars = commentChars - '*'; + while (true) + { + input->skip(skipChars); + if (input->eol()) + { + if (input->eof()) + error("Unexpected end of file in comments"); + skipEol(); + continue; + } + char e = input->get(); + if (e == '*') + { + if (input->preview() == '/') + { + input->get(); + break; + } + } + else + error("Illegal character in comments " + to_printable(e)); + } +} + + +void Parser::skipSinglelineComment() +{ + input->skip(commentChars); + if (!input->eol()) + error("Illegal character in comments " + to_printable(input->preview())); +} + + + +Token Parser::next() +{ + assert(token != tokPrevIdent); + + if (recorder.active()) + recorder.prevpos = input->tellg(); + +restart: + strValue.clear(); + intValue = 0; + + skipWs(); + int c = input->preview(); + + // --- EOF --- + if (c == -1) + { + strValue = ""; + return token = tokEof; + } + + // --- EOL --- + else if (is_eol_char(c)) + { + skipEol(); + skipWs(); + if (input->eol()) + goto restart; + strValue = ""; + return token = tokSep; + } + + // --- Identifier or keyword --- + if (identFirst[c]) + { + strValue = input->get(); + strValue += input->token(identRest); + Token tok = keywords.find(strValue.c_str()); + if (tok != tokUndefined) + return token = tok; + else + return token = tokIdent; + } + + // --- Number --- + else if (digits[c]) + { + bool e, o; + strValue = input->token(identRest); + str s = strValue; + bool isHex = s.size() > 2 && s[0] == '0' && s[1] == 'x'; + if (isHex) + s.erase(0, 2); + ularge v = from_string(s.c_str(), &e, &o, isHex ? 16 : 10); + if (e) + error("'" + strValue + "' is not a valid number"); + if (o || (v > ularge(INTEGER_MAX) + 1)) + error("Numeric overflow (" + strValue + ")"); + intValue = uinteger(v); + return token = tokIntValue; + } + + // --- Special chars and sequences --- + else + { + strValue = input->get(); + switch (c) + { + case '\\': + input->skip(wsChars); + if (!input->eol()) + error("New line expected after '\\'"); + skipEol(); + goto restart; + case ',': return token = tokComma; + case '.': + if (input->get_if('.')) + { + if (input->get_if('.')) + return token = tokEllipsis; + return token = tokRange; + } + return token = tokPeriod; + case '\'': parseStringLiteral(); return token = tokStrValue; + case ';': return token = tokSemi; + case ':': return token = tokColon; + case '+': return token = (input->get_if('=') ? tokAddAssign : tokPlus); + case '-': return token = (input->get_if('=') ? tokSubAssign : tokMinus); + case '*': return token = (input->get_if('=') ? tokMulAssign : tokMul); + case '/': + if (input->get_if('/')) + { + skipSinglelineComment(); + goto restart; + } + else if (input->get_if('*')) + { + skipMultilineComment(); + goto restart; + } + return token = (input->get_if('=') ? tokDivAssign : tokDiv); + case '%': return token = (input->get_if('=') ? tokModAssign : tokMod); + case '[': return token = tokLSquare; + case ']': return token = tokRSquare; + case '(': return token = tokLParen; + case ')': return token = tokRParen; + case '{': return token = tokLCurly; + case '}': return token = tokRCurly; + case '<': + if (input->get_if('=')) + return token = tokLessEq; + else if (input->get_if('<')) + return token = tokPush; + // else if (input->get_if('>')) + // return token = tokNotEq; + else + return token = tokLAngle; + case '>': + if (input->get_if('=')) + return token = tokGreaterEq; + else if (input->get_if('>')) + return token = tokPull; + else + return token = tokRAngle; + case '=': return token = (input->get_if('=') ? tokEqual : tokAssign); + case '|': return token = (input->get_if('=') ? tokCatAssign : tokCat); + case '^': return token = tokCaret; + case '@': return token = tokAt; + // case '#': return token = tokHash; + case '?': return token = tokQuestion; + case '!': return token = (input->get_if('=') ? tokNotEq : tokExclam); + } + } + + error("Illegal character " + to_printable(c)); + + return tokUndefined; +} + + +void Parser::undoIdent(const str& ident) +{ + prevIdent = ident; + saveToken = token; + token = tokPrevIdent; +} + + +void Parser::redoIdent() +{ + prevIdent.clear(); + token = saveToken; + saveToken = tokUndefined; +} + + +void Parser::skipMultiBlockBegin(const char* errmsg) +{ + skipWsSeps(); + expect(tokLCurly, errmsg); + skipWsSeps(); +} + + +void Parser::skipMultiBlockEnd() +{ + skipWsSeps(); + expect(tokRCurly, "'}'"); +} + + +str Parser::getIdentifier() +{ + if (token != tokIdent) + error("Identifier expected"); + str s = strValue; + next(); + return s; +} + + +void Parser::expect(Token tok, const char* errName) +{ + if (token != tok) + error(str(errName) + " expected"); + next(); +} + + +void Parser::skipLParen() + { expect(tokLParen, "'('"); } + +void Parser::skipRParen() + { expect(tokRParen, "')'"); } + + +bool Parser::isEos() +{ + return token == tokSep || token == tokSemi || eof() || token == tokRCurly; +} + + +void Parser::skipEos() +{ + if (token == tokSep || token == tokSemi) + next(); + else if (!eof() && token != tokRCurly) + error("End of statement expected"); +} + + +void Parser::skipToEos() +{ + while (!eof() && token != tokSep && token != tokSemi + && token != tokRCurly) + next(); +} + + +integer Parser::getLineNum() const +{ + if (token == tokSep) + return linenum - 1; + else + return linenum; +} + + +void Parser::beginRecording() +{ + assert(!recorder.active()); + skipWs(); + input->set_bufevent(&recorder); +} + + +str Parser::endRecording() +{ + assert(recorder.active()); + input->set_bufevent(NULL); + // Because the input stream is always ahead by one token, we need to trim it + recorder.data.pop(input->tellg() - recorder.prevpos); + str result = recorder.data; + recorder.clear(); + return result; +} + + +bool isValidIdent(const str& s) +{ + if (s.empty()) + return false; + if (!identFirst[s[0]]) + return false; + for (memint i = 1; i < s.size(); i++) + if (!identRest[s[i]]) + return false; + return true; +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..2315a09 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,134 @@ +#ifndef __PARSER_H +#define __PARSER_H + +#include "common.h" +#include "runtime.h" + + +enum Token +{ + tokUndefined = -1, + tokEof, tokSep, tokSemi, + tokIdent, tokPrevIdent, tokIntValue, tokStrValue, + + tokConst, tokDef, tokVar, tokClass, + tokDump, tokAssert, tokBegin, tokIf, tokElif, tokElse, tokDefault, + tokWhile, tokBreak, tokContinue, tokSwitch, tokCase, tokReturn, tokExit, + tokFor, + tokTypeOf, tokDel, tokIns, tokThis, + + // Term level + tokMul, tokDiv, tokMod, + // Arithm level + tokPlus, tokMinus, + // Cat level (simple expr) + tokCat, + // Rel level: the order should be in sync with comparison opcodes, except tokIn + tokEqual, tokNotEq, tokLessThan, tokLessEq, tokGreaterThan, tokGreaterEq, + tokIn, + // NOT level + tokNot, + // AND level + tokAnd, tokShl, tokShr, + // OR level + tokOr, tokXor, + + // Special chars and sequences + tokComma, tokPeriod, tokRange, tokEllipsis, tokCaret, tokAt, tokQuestion, tokExclam, + tokLSquare, tokRSquare, tokLParen, tokRParen, tokLCurly, tokRCurly, tokColon, + tokIs, tokAs, + + tokAssign, + // In-place operators, order is important, in sync with opAddAssign etc + tokAddAssign, tokSubAssign, tokMulAssign, tokDivAssign, tokModAssign, + // Other operators + tokCatAssign, tokPush, tokPull, + + // "Soft" keywords + + // Aliases; don't define new consts after this + tokLAngle = tokLessThan, tokRAngle = tokGreaterThan, + tokWildcard = tokMul, +}; + + +class InputRecorder: public bufevent +{ + friend class Parser; +protected: + char* buf; + memint offs; + memint prevpos; + void clear(); + bool active() { return buf != NULL; } +public: + str data; + InputRecorder(); + ~InputRecorder() throw(); + void event(char* buf, memint tail, memint head); +}; + + +class Parser: noncopyable +{ +protected: + objptr input; + integer linenum; + + str prevIdent; // undoIdent() + Token saveToken; + + InputRecorder recorder; // raw input recorder, for assert and dump + + str errorLocation() const; + void parseStringLiteral(); + void skipMultilineComment(); + void skipSinglelineComment(); + void skipWs(); + void skipEol(); + +public: + Token token; + str strValue; + uinteger intValue; + + Parser(buffifo*); + ~Parser(); + + Token next(); + bool eof() const + { return token == tokEof; } + void undoIdent(const str& ident); + void redoIdent(); + const str& getPrevIdent() + { return prevIdent; } + void error(const str& msg); + void error(const char*); + bool isEos(); + void skipEos(); + void skipToEos(); + void skipWsSeps() + { while (skipIf(tokSep)) ; } + void expect(Token tok, const char* errName); + void skipLParen(); + void skipRParen(); + bool skipIf(Token tok) + { if (token == tok) { next(); return true; } return false; } + bool skipIf(Token tokMin, Token tokMax) + { if (token >= tokMin && token <= tokMax) { next(); return true; } return false; } + void skipMultiBlockBegin(const char* errmsg); + void skipMultiBlockEnd(); + bool isBlockEnd() + { return token == tokRCurly; } + str getIdentifier(); + + str getFileName() const { return input->get_name(); } + integer getLineNum() const; + void beginRecording(); + str endRecording(); +}; + + +bool isValidIdent(const str&); + +#endif // __PARSER_H diff --git a/src/rtio.cpp b/src/rtio.cpp new file mode 100644 index 0000000..f53e8ca --- /dev/null +++ b/src/rtio.cpp @@ -0,0 +1,728 @@ + +#include "runtime.h" + + +#ifdef DEBUG +int memfifo::CHUNK_SIZE = 32 * _varsize; +int intext::BUF_SIZE = 4096 * int(sizeof(integer)); +#endif + + +charset non_eol_chars = ~charset("\r\n"); + + +fifo::fifo(Type* rt, bool is_char) throw() + : rtobject(rt), max_token(0), _is_char_fifo(is_char) { } + +fifo::~fifo() throw() + { } + +void fifo::dump(fifo& stm) const { stm << "fifo:" << get_name(); } +void fifo::_empty_err() { throw efifo("FIFO empty"); } +void fifo::_full_err() { throw efifo("FIFO full"); } +void fifo::_wronly_err() { throw efifo("FIFO is write-only"); } +void fifo::_rdonly_err() { throw efifo("FIFO is read-only"); } +void fifo::_fifo_type_err() { fatal(0x2001, "FIFO type mismatch"); } +void fifo::_token_err() { throw efifo("Token too long"); } +const char* fifo::get_tail() { _wronly_err(); return NULL; } +const char* fifo::get_tail(memint*) { _wronly_err(); return NULL; } +void fifo::deq_bytes(memint) { _wronly_err(); } +variant* fifo::enq_var() { _rdonly_err(); return NULL; } +void fifo::enq_char(char) { _rdonly_err(); } +memint fifo::enq_chars(const char*, memint) { _rdonly_err(); return 0; } +bool fifo::empty() const { _rdonly_err(); return true; } +void fifo::flush() { } + + +void fifo::_req_non_empty() const +{ + if (empty()) + _empty_err(); +} + + +void fifo::_req_non_empty(bool ch) const +{ + _req(ch); + if (empty()) + _empty_err(); +} + + +int fifo::preview() +{ + _req(true); + const char* p = get_tail(); + if (p == NULL) + return -1; + return *p; +} + + +uchar fifo::get() +{ + int c = preview(); + if (c == -1) + _empty_err(); + deq_bytes(1); + return c; +} + + +bool fifo::get_if(char c) +{ + int d = preview(); + if (d != -1 && d == c) + { + deq_bytes(1); + return true; + } + return false; +} + + +bool fifo::eol() +{ + _req(true); + const char* p = get_tail(); + if (p == NULL) + return true; + return *p == '\r' || *p == '\n'; +} + + +void fifo::skip_eol() +{ + // Support all 3 models: DOS, UNIX and MacOS + int c = preview(); + if (c == '\r') + { + get(); + c = preview(); + } + if (c == '\n') + get(); +} + + +void fifo::deq_var(variant* v) +{ + _req_non_empty(false); + *(podvar*)v = *(podvar*)get_tail(); + deq_bytes(_varsize); +} + + +#ifdef DEBUG +// Used only in unit tests +void fifo::var_eat() +{ + if (is_char_fifo()) + get(); + else + { + _req_non_empty(); + ((variant*)get_tail())->~variant(); + deq_bytes(_varsize); + } +} + + +void fifo::var_preview(variant& v) +{ + if (empty()) + v.clear(); + else if (is_char_fifo()) + v = *get_tail(); + else + v = *(variant*)get_tail(); +} + + +void fifo::var_deq(variant& v) +{ + if (is_char_fifo()) + v = get(); + else + { + v.clear(); + deq_var(&v); + } +} + + +void fifo::var_enq(const variant& v) +{ + if (is_char_fifo()) + { + if (v.is(variant::STR)) + enq(v._str()); + else if (v.is(variant::ORD)) + enq(v._uchar()); + else + variant::_type_err(); + } + else + ::new(enq_var()) variant(v); +} +#endif + + +str fifo::deq(memint count) +{ + _req_non_empty(true); + str result; + while (count > 0) + { + memint avail; + const char* p = get_tail(&avail); + if (p == NULL) + break; + if (count < avail) + avail = count; + result.append(p, avail); + deq_bytes(avail); + if (count == CHAR_SOME) + break; + count -= avail; + } + return result; +} + + +void fifo::_token(const charset& chars, str* result) +{ + _req(true); + memint total = 0; + while (1) + { + memint avail; + const char* b = get_tail(&avail); + if (b == NULL) + break; + const char* p = b; + const char* e = b + avail; + while (p < e && chars[*p]) + p++; + memint count = p - b; + if (count == 0) + break; + if (max_token > 0) + { + total += count; + if (total > max_token) + _token_err(); + } + if (result != NULL) + result->append(b, count); + deq_bytes(count); + if (count < avail) + break; + } +} + + +str fifo::line() +{ + str result; + _token(non_eol_chars, &result); + skip_eol(); + return result; +} + + +void fifo::enq(const char* s) { if (s != NULL) enq(s, strlen(s)); } +void fifo::enq(const str& s) { enq_chars(s.data(), s.size()); } +void fifo::enq(large i) { enq(to_string(i)); } + + +void fifo::enq(const varvec& v) +{ + _req(false); + for (memint i = 0; i < v.size() - 1; i++) + new(enq_var()) variant(v[i]); +} + + +// --- memfifo ------------------------------------------------------------- // + + +memfifo::memfifo(Type* rt, bool ch) throw() + : fifo(rt, ch), head(NULL), tail(NULL), head_offs(0), tail_offs(0) { } + + +memfifo::~memfifo() throw() { try { clear(); } catch(exception&) { } } +inline const char* memfifo::get_tail() { return tail ? (tail->data + tail_offs) : NULL; } +inline bool memfifo::empty() const { return tail == NULL; } +inline variant* memfifo::enq_var() { _req(false); return (variant*)enq_space(_varsize); } +str memfifo::get_name() const { return ""; } + + +void memfifo::clear() +{ + // TODO: also define fifos for POD variant types for faster destruction + if (is_char_fifo()) + { + while (tail != NULL) + { +#ifdef DEBUG + head_offs = tail_offs = CHUNK_SIZE; +#endif + deq_chunk(); + } + } + else + { + while (tail != NULL) + { + ((variant*)get_tail())->~variant(); + deq_bytes(_varsize); + } + } +} + + +void memfifo::deq_chunk() +{ + assert(tail != NULL && head != NULL); + chunk* c = tail; + tail = tail->next; + delete c; + if (tail == NULL) + { + assert(head_offs == tail_offs); + head = NULL; + head_offs = tail_offs = 0; + } + else + { + assert(tail_offs == CHUNK_SIZE); + tail_offs = 0; + } +} + + +void memfifo::enq_chunk() +{ + chunk* c = new chunk(); + if (head == NULL) + { + assert(tail == NULL && head_offs == 0); + head = tail = c; + } + else + { + assert(head_offs == CHUNK_SIZE); + head->next = c; + head = c; + head_offs = 0; + } +} + + +const char* memfifo::get_tail(memint* count) +{ + if (tail == NULL) + { + *count = 0; + return NULL; + } + if (tail == head) + *count = head_offs - tail_offs; + else + *count = CHUNK_SIZE - tail_offs; + assert(*count <= CHUNK_SIZE); + return tail->data + tail_offs; +} + + +void memfifo::deq_bytes(memint count) +{ + assert(tail != NULL && (tail_offs + count) <= ((tail == head) ? head_offs : CHUNK_SIZE)); + tail_offs += int(count); + if (tail_offs == ((tail == head) ? head_offs : CHUNK_SIZE)) + deq_chunk(); +} + + +memint memfifo::enq_avail() +{ + if (head == NULL || head_offs == CHUNK_SIZE) + return CHUNK_SIZE; + return CHUNK_SIZE - head_offs; +} + + +char* memfifo::enq_space(memint count) +{ + if (head == NULL || head_offs == CHUNK_SIZE) + enq_chunk(); + assert(count <= CHUNK_SIZE - head_offs); + char* result = head->data + head_offs; + head_offs += int(count); + return result; +} + + +void memfifo::enq_char(char c) +{ + _req(true); + *enq_space(1) = c; +} + + +memint memfifo::enq_chars(const char* p, memint count) +{ + _req(true); + memint save_count = count; + while (count > 0) + { + memint avail = enq_avail(); + if (count < avail) + avail = count; + memcpy(enq_space(avail), p, avail); + count -= avail; + p += avail; + } + return save_count; +} + + +// --- buffifo ------------------------------------------------------------- // + + +buffifo::buffifo(Type* rt, bool is_char) throw() + : fifo(rt, is_char), buffer(NULL), bufsize(0), bufhead(0), buftail(0), buforig(0), + event(NULL) { } + +buffifo::~buffifo() throw() { } +bool buffifo::empty() const { _wronly_err(); return true; } +void buffifo::flush() { _rdonly_err(); } + + +const char* buffifo::get_tail() +{ + assert(buftail <= bufhead && bufhead <= bufsize); + if (buftail == bufhead && empty()) + return NULL; + assert(bufhead > buftail); + return buffer + buftail; +} + + +const char* buffifo::get_tail(memint* count) +{ + assert(buftail <= bufhead && bufhead <= bufsize); + if (buftail == bufhead && empty()) + { + *count = 0; + return NULL; + } + *count = bufhead - buftail; + assert(*count >= (is_char_fifo() ? 1 : _varsize)); + return buffer + buftail; +} + + +void buffifo::deq_bytes(memint count) +{ + assert(buftail <= bufhead && bufhead <= bufsize); + assert(count <= bufhead - buftail); + buftail += count; +} + + +variant* buffifo::enq_var() +{ + _req(false); + assert(buftail <= bufhead && bufhead <= bufsize); + if (bufhead + _varsize > bufsize) + flush(); + assert(bufhead + _varsize <= bufsize); + variant* result = (variant*)(buffer + bufhead); + bufhead += _varsize; + return result; +} + + +memint buffifo::enq_avail() +{ + assert(buftail <= bufhead && bufhead <= bufsize); + if (bufhead == bufsize) + flush(); + assert(bufhead < bufsize); + return bufsize - bufhead; +} + + +char* buffifo::enq_space(memint count) +{ + assert(buftail <= bufhead && bufhead <= bufsize); + assert(count <= bufsize - bufhead); + char* result = buffer + bufhead; + bufhead += count; + return result; +} + + +void buffifo::enq_char(char c) +{ + _req(true); + assert(buftail <= bufhead && bufhead <= bufsize); + if (bufhead + 1 > bufsize) + flush(); + assert(bufhead + 1 <= bufsize); + buffer[bufhead] = c; + bufhead++; +} + + +memint buffifo::enq_chars(const char* p, memint count) +{ + _req(true); + memint save_count = count; + while (count > 0) + { + memint avail = enq_avail(); + if (count < avail) + avail = count; + memcpy(enq_space(avail), p, avail); + count -= avail; + p += avail; + } + return save_count; +} + + +void buffifo::call_bufevent() const +{ + if (event) + event->event(buffer, buftail, bufhead); +} + + +bufevent* buffifo::set_bufevent(bufevent* e) +{ + bufevent* prev = event; + call_bufevent(); + event = e; + call_bufevent(); + return prev; +} + + +// --- strfifo ------------------------------------------------------------- // + + +strfifo::strfifo(Type* rt) throw() : buffifo(rt, true), string() {} +strfifo::~strfifo() throw() { } +str strfifo::get_name() const { return ""; } + + +strfifo::strfifo(Type* rt, const str& s) throw() + : buffifo(rt, true), string(s) +{ + buffer = (char*)s.data(); + bufhead = bufsize = s.size(); +} + + +void strfifo::clear() +{ + string.clear(); + buffer = NULL; + buftail = bufhead = bufsize = 0; +} + + +bool strfifo::empty() const +{ + if (buftail == bufhead) + { + call_bufevent(); + if (!string.empty()) + ((strfifo*)this)->clear(); + return true; + } + return false; +} + + +void strfifo::flush() +{ + assert(bufhead == bufsize); + string.resize(string.size() + memfifo::CHUNK_SIZE); + buffer = (char*)string.data(); + bufsize += memfifo::CHUNK_SIZE; +} + + +str strfifo::all() const +{ + if (string.empty() || buftail == bufhead) + return str(); + return string.substr(buftail, bufhead - buftail); +} + + +// --- intext -------------------------------------------------------------- // + + +// *BSD/Darwin hack +#ifndef O_LARGEFILE +# define O_LARGEFILE 0 +#endif + + +intext::intext(Type* rt, const str& fn) throw() + : buffifo(rt, true), file_name(fn), _fd(-1), _eof(false) { } + +intext::~intext() throw() { if (_fd > 2) ::close(_fd); } +void intext::error(int code) { _eof = true; throw esyserr(code, file_name); } +str intext::get_name() const { return file_name; } + + +void intext::doopen() +{ + _fd = ::open(file_name.c_str(), O_RDONLY | O_LARGEFILE); + if (_fd < 0) + error(errno); + bufsize = bufhead = buftail = 0; +} + + +void intext::doread() +{ + call_bufevent(); + filebuf.resize(intext::BUF_SIZE); + buffer = (char*)filebuf.data(); + memint result = ::read(_fd, buffer, intext::BUF_SIZE); + if (result < 0) + error(errno); + buforig += bufhead; + buftail = 0; + bufsize = bufhead = result; + _eof = result == 0; + call_bufevent(); +} + + +bool intext::empty() const +{ + if (_eof) + return true; + if (_fd < 0) + ((intext*)this)->doopen(); + if (buftail == bufhead) + ((intext*)this)->doread(); + return _eof; +} + + +// --- outtext -------------------------------------------------------------- // + + +outtext::outtext(Type* rt, const str& fn) throw() + : buffifo(rt, true), file_name(fn), _fd(-1), _err(false) +{ + filebuf.resize(outtext::BUF_SIZE); + buffer = (char*)filebuf.data(); + bufsize = outtext::BUF_SIZE; +} + + +outtext::~outtext() throw() +{ + try + { flush(); } + catch (exception&) + { } + if (_fd > 2) + ::close(_fd); +} + + +void outtext::error(int code) + { _err = true; throw esyserr(code, file_name); } + + +str outtext::get_name() const + { return file_name; } + + +void outtext::flush() +{ + if (_err) + return; + if (bufhead > 0) + { + if (_fd < 0) + { + _fd = ::open(file_name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (_fd < 0) + error(errno); + } + memint ret = ::write(_fd, buffer, bufhead); + if (ret < 0) + error(errno); + buforig += bufhead; + bufhead = 0; + } +} + + +// --- stdfile ------------------------------------------------------------- // + + +stdfile::stdfile(int infd, int outfd) throw() + : intext(NULL, ""), _ofd(outfd) +{ + _fd = infd; + if (infd == -1) + _eof = true; + _mkstatic(); +} + +stdfile::~stdfile() throw() + { } + +void stdfile::enq_char(char c) + { if (::write(_ofd, &c, 1) < 1) _full_err(); } + +memint stdfile::enq_chars(const char* p, memint count) + { return ::write(_ofd, p, count); } + + + +stdfile sio(STDIN_FILENO, STDOUT_FILENO); +stdfile serr(-1, STDERR_FILENO); + + +// --- System utilities ---------------------------------------------------- // + + +enum FileType +{ + FT_FILE, + FT_DIRECTORY, + FT_OTHER, // device or pipe + FT_ERROR = -1 +}; + + +static FileType getFileType(const char* path) +{ + struct stat st; + if (stat(path, &st) != 0) + return FT_ERROR; + if ((st.st_mode & S_IFDIR) == S_IFDIR) + return FT_DIRECTORY; + if ((st.st_mode & S_IFREG) == S_IFREG) + return FT_FILE; + return FT_OTHER; +} + + +bool isFile(const char* path) + { return getFileType(path) == FT_FILE; } + diff --git a/src/runtime.cpp b/src/runtime.cpp new file mode 100644 index 0000000..27d5b84 --- /dev/null +++ b/src/runtime.cpp @@ -0,0 +1,1340 @@ + + +#include "runtime.h" +#include "typesys.h" // circular reference + + +// --- charset ------------------------------------------------------------- // + + +static unsigned char lbitmask[8] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80}; +static unsigned char rbitmask[8] = {0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + +const char charsetesc = '~'; + + +void charset::include(int min, int max) throw() +{ + if (uchar(min) > uchar(max)) + return; + int lidx = uchar(min) / 8; + int ridx = uchar(max) / 8; + uchar lbits = lbitmask[uchar(min) % 8]; + uchar rbits = rbitmask[uchar(max) % 8]; + + if (lidx == ridx) + { + data[lidx] |= lbits & rbits; + } + else + { + data[lidx] |= lbits; + for (int i = lidx + 1; i < ridx; i++) + data[i] = uchar(-1); + data[ridx] |= rbits; + } +} + + +static unsigned hex4(unsigned c) +{ + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= '0' && c <= '9') + return c - '0'; + return 0; +} + + +static unsigned parsechar(const char*& p) +{ + unsigned ret = *p; + if (ret == unsigned(charsetesc)) + { + p++; + ret = *p; + if ((ret >= '0' && ret <= '9') || (ret >= 'a' && ret <= 'f') || (ret >= 'A' && ret <= 'F')) + { + ret = hex4(ret); + p++; + if (*p != 0) + ret = (ret << 4) | hex4(*p); + } + } + return ret; +} + + +void charset::assign(const char* p) throw() +{ + if (*p == '*' && *(p + 1) == 0) + fill(); + else + { + clear(); + for (; *p != 0; p++) { + uchar left = parsechar(p); + if (*(p + 1) == '-') + { + p += 2; + uchar right = parsechar(p); + include(left, right); + } + else + include(left); + } + } +} + + +void charset::assign(const charset& s) throw() + { memcpy(data, s.data, BYTES); } + + +bool charset::empty() const throw() +{ + for(int i = 0; i < WORDS; i++) + if (((word*)data)[i] != 0) + return false; + return true; +} + + +void charset::unite(const charset& s) +{ + for(int i = 0; i < WORDS; i++) + ((word*)data)[i] |= ((word*)s.data)[i]; +} + + +void charset::subtract(const charset& s) +{ + for(int i = 0; i < WORDS; i++) + ((word*)data)[i] &= ~((word*)s.data)[i]; +} + + +void charset::intersect(const charset& s) +{ + for(int i = 0; i < WORDS; i++) + ((word*)data)[i] &= ((word*)s.data)[i]; +} + + +void charset::invert() +{ + for(int i = 0; i < WORDS; i++) + ((word*)data)[i] = ~((word*)data)[i]; +} + + +bool charset::le(const charset& s) const +{ + for (int i = 0; i < WORDS; i++) + { + word w1 = ((word*)data)[i]; + word w2 = ((word*)s.data)[i]; + if ((w2 | w1) != w2) + return false; + } + return true; +} + + +// --- object -------------------------------------------------------------- // + + +atomicint object::allocated = 0; + + +object::~object() throw() { } + + +void _del_obj(object* o) +{ + delete o; +} + + +#ifndef SHN_FASTER +atomicint object::release() throw() +{ + if (this == NULL) + return 0; + assert(_refcount > 0); + atomicint r = pdecrement(&_refcount); + if (r == 0) + _del_obj(this); + return r; +} +#endif + + +void object::_assignto(object*& p) throw() +{ + if (p != this) + { + p->release(); + p = this; + if (this) + this->grab(); + } +} + + +void* object::operator new(size_t self) +{ + void* p = ::pmemalloc(self); +#ifdef DEBUG + pincrement(&object::allocated); +#endif + return p; +} + + +void* object::operator new(size_t self, memint extra) +{ + assert(self + extra > 0); + void* p = ::pmemalloc(self + extra); +#ifdef DEBUG + pincrement(&object::allocated); +#endif + return p; +} + + +void object::operator delete(void* p) +{ + assert(((object*)p)->_refcount == 0); +#ifdef DEBUG + pdecrement(&object::allocated); +#endif + ::pmemfree(p); +} + + +object* object::_dup(size_t self, memint extra) +{ + assert(self + extra > 0); + assert(self >= sizeof(*this)); + object* o = (object*)::pmemalloc(self + extra); +#ifdef DEBUG + pincrement(&object::allocated); +#endif + memcpy(o, this, self); + o->_refcount = 0; + return o; +} + + +object* object::reallocate(object* p, size_t self, memint extra) +{ + assert(p->_refcount == 1); + assert(self > 0 && extra >= 0); + return (object*)::pmemrealloc(p, self + extra); +} + + +rtobject::~rtobject() throw() + { } + + +// --- container ----------------------------------------------------------- // + + +void container::overflow() + { throw econtainer("Container overflow"); } + + +void container::idxerr() + { throw econtainer( "Container index error"); } + + +void container::keyerr() + { throw econtainer( "Dictionary key error"); } + + +container::~container() throw() + { } // must call finalize() in descendant classes + + +void container::finalize(void*, memint) throw() + { } + + +inline void container::copy(void* dest, const void* src, memint len) throw() + { ::memcpy(dest, src, len); } + + +container* container::allocate(memint cap, memint siz) throw() +{ + assert(siz <= cap); + assert(siz >= 0); + if (cap == 0) + return NULL; + return new(cap) container(cap, siz); +} + + +inline memint container::_calc_prealloc(memint newsize) +{ + if (newsize <= memint(8 * sizeof(memint))) + return 12 * sizeof(memint); // 96 on 64-bit systems, maybe too much? + else + return newsize + newsize / 2; +} + + +container* container::reallocate(container* p, memint newsize) +{ + if (newsize < 0) + overflow(); + if (newsize == 0) + { + delete p; + return NULL; + } + assert(p); + assert(p->isunique()); + assert(newsize > p->_capacity || newsize < p->_size); + p->_capacity = newsize > p->_capacity ? _calc_prealloc(newsize) : newsize; + if (p->_capacity <= 0) + overflow(); + p->_size = newsize; + return (container*)object::reallocate(p, sizeof(*p), p->_capacity); +} + + +container* container::_dup(memint cap, memint siz) +{ + assert(cap > 0); + assert(siz > 0 && siz <= cap); + container* c = (container*)object::_dup(sizeof(container), cap); + c->_capacity = cap; + c->_size = siz; + return c; +} + + +// --- bytevec ------------------------------------------------------------- // + + +char* bytevec::_init(memint len) throw() +{ + chknonneg(len); + if (len == 0) + { + obj._init(); + return NULL; + } + else + { + obj._init(container::allocate(len, len)); + return obj->data(); + } +} + + +void bytevec::_init(memint len, char fill) throw() +{ + if (len) + ::memset(_init(len), fill, len); +} + + +void bytevec::_init(const char* buf, memint len) throw() +{ + if (len) + ::memcpy(_init(len), buf, len); +} + + +void bytevec::_init(const bytevec& v, memint pos, memint len, alloc_func alloc) throw() +{ + v.chkidx(pos); + v.chkidxa(pos + len); + if (len <= 0) + return; + obj._init(alloc(len, len)); + obj->copy(obj->data(), v.data(pos), len); +} + + +void bytevec::_dounique() +{ + // Called only on non-empty, non-unique objects + assert(!_isunique()); + memint siz = obj->size(); + container* c = obj->_dup(siz, siz); + c->copy(c->data(), obj->data(), siz); + obj = c; +} + + +void bytevec::assign(const char* buf, memint len) + { obj._fin(); _init(buf, len); } + + +void bytevec::clear() +{ + if (!empty()) + { + // finalize() is not needed for POD data, but we put it here so that clear() works + // for descendant non-POD containers. Same applies to insert()/append(). + obj->finalize(obj->data(), obj->size()); + obj.clear(); + } +} + + +char* bytevec::_insert(memint pos, memint len, alloc_func alloc) +{ + assert(len > 0); + chkidxa(pos); + memint oldsize = size(); + memint newsize = oldsize + len; + memint remain = oldsize - pos; + if (empty() || !_isunique()) + { + // Note: first allocation sets capacity = size + container* c = alloc(newsize, newsize); // _cont()->dup(newsize, newsize); + if (pos > 0) // copy the first chunk, before 'pos' + c->copy(c->data(), obj->data(), pos); + if (remain) // copy the the remainder + c->copy(c->data(pos + len), obj->data(pos), remain); + obj = c; + } + else // if unique + { + if (newsize > capacity()) + obj._reinit(container::reallocate(obj, newsize)); + else + obj->set_size(newsize); + if (remain) + ::memmove(obj->data(pos + len), obj->data(pos), remain); + } + return obj->data(pos); +} + + +void bytevec::_insert(memint pos, const bytevec& v, alloc_func alloc) +{ + if (empty()) + { + if (pos) + container::idxerr(); + _init(v); + } + else if (!v.empty()) + { + memint len = v.size(); + // Note: should be done in two steps so that the case (v == *this) works + char* p = _insert(pos, len, alloc); + obj->copy(p, v.data(), len); + } +} + + +char* bytevec::_append(memint len, alloc_func alloc) +{ + // _insert(0, len) would do, but we want a faster function + assert(len > 0); + memint oldsize = size(); + memint newsize = oldsize + len; + if (empty() || !_isunique()) + { + // Note: first allocation sets capacity = size + container* c = alloc(newsize, newsize); // _cont()->dup(newsize, newsize); + if (oldsize > 0) + c->copy(c->data(), obj->data(), oldsize); + obj = c; + } + else // if unique + { + if (newsize > capacity()) + obj._reinit(container::reallocate(obj, newsize)); + else + obj->set_size(newsize); + } + return obj->data(oldsize); +} + + +void bytevec::_erase(memint pos, memint len) +{ + assert(len > 0); + chkidx(pos); + memint oldsize = size(); + memint epos = pos + len; + chkidxa(epos); + memint newsize = oldsize - len; + memint remain = oldsize - epos; + if (newsize == 0) // also if empty, because newsize < oldsize + clear(); + else if (!_isunique()) + { + container* c = obj->_dup(newsize, newsize); + if (pos) + obj->copy(c->data(), obj->data(), pos); + if (remain) + obj->copy(c->data(pos), obj->data(epos), remain); + obj = c; + } + else // if unique + { + char* p = obj->data(pos); + obj->finalize(p, len); + if (remain) + ::memmove(p, p + len, remain); + obj->set_size(newsize); + } +} + + +void bytevec::_pop(memint len) +{ + assert(len > 0); + memint oldsize = size(); + memint newsize = oldsize - len; + chkidx(newsize); + if (newsize == 0) + clear(); + else if (!_isunique()) + { + container* c = obj->_dup(newsize, newsize); + c->copy(c->data(), obj->data(), newsize); + obj = c; + } + else // if unique + { + obj->finalize(obj->data(newsize), len); + obj->set_size(newsize); + } +} + + +void bytevec::insert(memint pos, const char* buf, memint len) +{ + if (len > 0) + { + char* p = _insert(pos, len, container::allocate); + obj->copy(p, buf, len); + } +} + + +void bytevec::append(const char* buf, memint len) +{ + if (len > 0) + { + char* p = _append(len, container::allocate); + obj->copy(p, buf, len); + } +} + + +void bytevec::append(const bytevec& v) +{ + if (empty()) + _init(v); + else if (!v.empty()) + { + memint len = v.size(); + // Note: should be done in two steps so that the case (v == *this) works + char* p = _append(len, container::allocate); + obj->copy(p, v.data(), len); + } +} + + +void bytevec::erase(memint pos, memint len) +{ + chkidxa(pos); + if (len > 0) + _erase(pos, len); +} + + +char* bytevec::_resize(memint newsize, alloc_func alloc) +{ + chknonneg(newsize); + memint oldsize = size(); + if (newsize == oldsize) + return NULL; + else if (newsize == 0) + { + clear(); + return NULL; + } + else if (newsize < oldsize) + { + _pop(oldsize - newsize); + return NULL; + } + else + return _append(newsize - oldsize, alloc); +} + + +void bytevec::resize(memint newsize, char fill) +{ + memint oldsize = size(); + char* p = resize(newsize); + if (p) + ::memset(p, fill, newsize - oldsize); +} + + +// --- str ----------------------------------------------------------------- // + + +void str::_init(const char* buf) throw() + { bytevec::_init(buf, pstrlen(buf)); } + + +const char* str::c_str() +{ + if (empty()) + return ""; + if (obj->isunique() && obj->size() < obj->capacity()) + *obj->end() = 0; + else + { + push_back(char(0)); + obj->dec_size(); + } + return data(); +} + + +void str::operator= (const char* s) + { obj._fin(); _init(s); } + +void str::operator= (char c) + { obj._fin(); _init(c); } + + +memint str::find(char c) const +{ + if (empty()) + return npos; + const char* p = data(); + const char* f = (const char*)::memchr(p, c, size()); + if (f == NULL) + return npos; + return f - p; +} + + +memint str::rfind(char c) const +{ + if (empty()) + return npos; + const char* b = data(); + const char* p = b + size() - 1; + do + { + if (*p == c) + return p - b; + p--; + } + while (p >= b); + return npos; +} + + +memint str::compare(const char* s, memint blen) const +{ + memint alen = size(); + memint len = imin(alen, blen); + if (len == 0) + return alen - blen; + int result = ::memcmp(data(), s, len); + if (result == 0) + return alen - blen; + else + return result; +} + + +bool str::operator== (const char* s) const + { return compare(s, pstrlen(s)) == 0; } + + +void str::operator+= (const char* s) + { append(s, pstrlen(s)); } + + +void str::insert(memint pos, const char* s) + { bytevec::insert(pos, s, pstrlen(s)); } + + +void str::replace(memint pos, memint len, const str& s) +{ + erase(pos, len); + insert(pos, s); +} + + +str str::substr(memint pos, memint len) const +{ + if (pos == 0 && len == size()) + return *this; + if (len <= 0) + return str(); + chkidx(pos); + chkidxa(pos + len); + return str(data(pos), len); +} + + +str str::substr(memint pos) const +{ + if (pos == 0) + return *this; + chkidxa(pos); + return str(data(pos), size() - pos); +} + + +// --- string utilities ---------------------------------------------------- // + + +static const char* _itobase(large value, char* buf, int base, int& len, bool _signed) +{ + // internal conversion routine: converts the value to a string + // at the end of the buffer and returns a pointer to the first + // character. this is to get rid of copying the string to the + // beginning of the buffer, since finally the string is supposed + // to be copied to a dynamic string in itostring(). the buffer + // must be at least 65 bytes long. + + static char digits[65] = + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + char* pdigits; + if (base > 36) + pdigits = digits; // start from '.' + else + pdigits = digits + 2; // start from '0' + + int i = 64; + buf[i] = 0; + + bool neg = false; + ularge v = value; + if (_signed && base == 10 && value < 0) + { + v = -value; + // since we can't handle the lowest signed value, we just return a built-in string. + if (large(v) < 0) // a minimum value negated results in the same value + { + if (sizeof(value) == 8) + { + len = 20; + return "-9223372036854775808"; + } + else + abort(); + } + neg = true; + } + + do + { + buf[--i] = pdigits[unsigned(v % base)]; + v /= base; + } while (v > 0); + + if (neg) + buf[--i] = '-'; + + len = 64 - i; + return buf + i; +} + + +static void _itobase2(str& result, large value, int base, int width, char padchar, bool _signed) +{ + result.clear(); + + if (base < 2 || base > 64) + return; + + char buf[65]; // the longest possible string is when base=2 + int reslen; + const char* p = _itobase(value, buf, base, reslen, _signed); + + if (width > reslen) + { + if (padchar == 0) + { + // default pad char + if (base == 10) + padchar = ' '; + else if (base > 36) + padchar = '.'; + else + padchar = '0'; + } + + bool neg = *p == '-'; + if (neg) { p++; reslen--; } + width -= reslen; + if (width > 0) + result.resize(width, padchar); + result.append(p, reslen); + if (neg) + result.replace(0, '-'); + } + else + result.assign(p, reslen); +} + + +str _to_string(large value, int base, int width, char padchar) +{ + str result; + _itobase2(result, value, base, width, padchar, true); + return result; + ; +} + + +str _to_string(large value) +{ + str result; + _itobase2(result, value, 10, 0, ' ', true); + return result; +} + +/* +str _to_string(memint value) +{ + str result; + _itobase2(result, value, 10, 0, ' ', false); + return result; +} +*/ + +ularge from_string(const char* p, bool* error, bool* overflow, int base) +{ + *error = false; + *overflow = false; + + if (p == 0 || *p == 0 || base < 2 || base > 64) + { *error = true; return 0; } + + ularge result = 0; + + do + { + int c = *p++; + + if (c >= 'a') + { + // for the numeration bases that use '.', '/', digits and + // uppercase letters the letter case is insignificant. + if (base <= 38) + c -= 'a' - '9' - 1; + else // others use both upper and lower case letters + c -= ('a' - 'Z' - 1) + ('A' - '9' - 1); + } + else if (c > 'Z') + { *error = true; return 0; } + else if (c >= 'A') + c -= 'A' - '9' - 1; + else if (c > '9') + { *error = true; return 0; } + + c -= (base > 36) ? '.' : '0'; + if (c < 0 || c >= base) + { *error = true; return 0; } + + ularge t = result * unsigned(base); + if (t / base != result) + { *overflow = true; return 0; } + result = t; + t = result + unsigned(c); + if (t < result) + { *overflow = true; return 0; } + result = t; + + } + while (*p != 0); + + return result; +} + + +str remove_filename_path(const str& fn) +{ + memint i = fn.rfind('/'); + if (i == str::npos) + { + i = fn.rfind('\\'); + if (i == str::npos) + return fn; + } + return fn.substr(i + 1); +} + + +str remove_filename_ext(const str& fn) +{ + memint i = fn.rfind('.'); + if (i == str::npos) + return fn; + return fn.substr(0, i); +} + + +static const charset printable_chars = "~20-~7E~80-~FE"; + + +static void _to_printable(char c, str& s) +{ + if (c == '\\') + s += "\\\\"; + else if (c == '\'') + s += "\\\'"; + else if (printable_chars[c]) + s.append(&c, 1); + else + { + s += "\\x"; + s += to_string(uchar(c), 16, 2, '0'); + } +} + + +str to_printable(char c) +{ + str result; + _to_printable(c, result); + return result; +} + + +str to_printable(const str& s) +{ + str result; + for (memint i = 0; i < s.size(); i++) + _to_printable(s[i], result); + return result; +} + + +str to_quoted(char c) + { return "'" + to_printable(c) + "'"; } + + +str to_quoted(const str& s) + { return "'" + to_printable(s) + "'"; } + + +str to_displayable(const str& s) +{ + if (s.size() > 40) + return s.substr(0, 37) + "..."; + else + return s; +} + + +// --- ordset -------------------------------------------------------------- // + + +charset ordset::empty_charset; + +ordset::ordset(integer v) throw() + : obj(new setobj()) { obj->set.include(int(v)); } + + +ordset::ordset(integer l, integer r) throw() + : obj(new setobj()) { obj->set.include(int(l), int(r)); } + + +charset& ordset::_getunique() +{ + if (obj.empty()) + obj = new setobj(); + else if (!obj.isunique()) + obj = new setobj(*obj); + return obj->set; +} + + +memint ordset::compare(const ordset& s) const +{ + if (empty()) + return s.empty() ? 0 : -1; + else if (s.empty()) + return 1; + else + return obj->set.compare(s.obj->set); +} + + +void ordset::find_insert(integer v) { _getunique().include(int(v)); } +void ordset::find_insert(integer l, integer h) { _getunique().include(int(l), int(h)); } +void ordset::find_erase(integer v) { if (!empty()) _getunique().exclude(int(v)); } + + +// --- range --------------------------------------------------------------- // + + +range::range(integer l, integer r) throw() + : obj(l > r ? NULL : new rangeobj(l, r)) { } + +range::~range() throw() + { } + + +memint range::compare(const range& r) const +{ + if (empty()) + return r.empty() ? 0 : -1; + if (r.empty()) + return 1; + integer d = obj->left - r.obj->left; + if (d < 0) + return -1; + else if (d > 0) + return 1; + else + { + d = obj->right - r.obj->right; + return d < 0 ? -1 : d > 0 ? 1 : 0; + } +} + + +bool range::operator ==(const range& r) const +{ + if (empty()) + return r.empty(); + if (r.empty()) + return false; + return obj->left == r.obj->left && obj->right == r.obj->right; +} + + +// --- object collections -------------------------------------------------- // + + +// template class podvec; + + +void objvec_impl::release_all() throw() +{ + // TODO: more optimal destruction + for (memint i = size(); i--; ) + operator[](i)->release(); +} + + +symbol::~symbol() throw() { } + + +symbol* symtbl_impl::find(const str& name) const +{ + memint i; + if (bsearch(name, i)) + return operator[](i); + else + return NULL; +} + + +bool symtbl_impl::add(symbol* s) +{ + if (s->name.empty()) + fatal(0x1003, "Empty symbol in symbol table"); + memint i; + if (bsearch(s->name, i)) + return false; + insert(i, s); + return true; +} + + +bool symtbl_impl::replace(symbol* s) +{ + memint i; + if (!bsearch(s->name, i)) + return false; + parent::replace(i, s); + return true; +} + + +bool symtbl_impl::bsearch(const str& key, memint& idx) const +{ + idx = 0; + memint low = 0; + memint high = size() - 1; + while (low <= high) + { + idx = (low + high) / 2; + memint comp = operator[](idx)->name.compare(key); + if (comp < 0) + low = idx + 1; + else if (comp > 0) + high = idx - 1; + else + return true; + } + idx = low; + return false; +} + + +// --- Exceptions ---------------------------------------------------------- // + + +emessage::emessage(const str& _msg) throw(): msg(_msg) { } +emessage::emessage(const char* _msg) throw(): msg(_msg) { } +emessage::~emessage() throw() { } +const char* emessage::what() throw() { return msg.c_str(); } + + +static str sysErrorStr(int code, const str& arg) +{ + // For some reason strerror_r() returns garbage on my 64-bit Ubuntu. That's unfortunately + // not the only strange thing about this computer and OS. Could be me, could be hardware + // or could be libc. Or all. + // Upd: so I updated both hardware and OS, still garbage on 64 bit, but OK on 32-bit. + // What am I doing wrong? +// char buf[1024]; +// strerror_r(code, buf, sizeof(buf)); + str result = strerror(code); + if (!arg.empty()) + result += " (" + arg + ")"; + return result; +} + + +esyserr::esyserr(int code, const str& arg) throw() + : emessage(sysErrorStr(code, arg)) { } + + +esyserr::~esyserr() throw() { } + + +void nullptrerr() + { throw emessage("Uninitialized object"); } + + +// --- variant ------------------------------------------------------------- // + +/* +template class vector; +template class set; +template class dict; +template class podvec; +*/ + +variant::_Void variant::null; + + +void variant::_type_err() { throw evariant("Variant type mismatch"); } +void variant::_range_err() { throw evariant("Variant range error"); } + + +#ifndef SHN_FASTER +void variant::_init(const variant& v) throw() +{ + type = v.type; + val = v.val; + if (is_anyobj() && val._obj) + val._obj->grab(); +} + + +void variant::operator= (const variant& v) throw() +{ + if (type != v.type || val._all != v.val._all) + { _fin(); _init(v); } +} +#endif + + +memint variant::compare(const variant& v) const +{ + if (type == v.type) + { + switch(type) + { + case VOID: + return 0; + case ORD: + { + integer d = val._ord - v.val._ord; + return d < 0 ? -1 : d > 0 ? 1 : 0; + } + case REAL: + return val._real < v.val._real ? -1 : (val._real > v.val._real ? 1 : 0); + case VARPTR: + return val._ptr - v.val._ptr; + case STR: + return _str().compare(v._str()); + case RANGE: + return _range().compare(v._range()); + // TODO: define "deep" comparison, at least for vectors? + case VEC: + case SET: + case ORDSET: + case DICT: + case REF: + case RTOBJ: + return memint(_anyobj()) - memint(v._anyobj()); + } + } + return int(type - v.type); +} + + +bool variant::operator== (const variant& v) const +{ + if (type == v.type) + { + switch(type) + { + case VOID: return true; + case ORD: return val._ord == v.val._ord; + case REAL: return val._real == v.val._real; + case VARPTR: return val._ptr == v.val._ptr; + case STR: return _str() == v._str(); + case RANGE: return _range() == v._range(); + case VEC: return _vec() == v._vec(); + case SET: return _set() == v._set(); + case ORDSET: return _ordset() == v._ordset(); + case DICT: return _dict() == v._dict(); + case REF: return _ref() == v._ref(); + case RTOBJ: return _rtobj() == v._rtobj(); + } + } + return false; +} + + +bool variant::empty() const +{ + switch(type) + { + case VOID: return true; + case ORD: return val._ord == 0; + case REAL: return val._real == 0; + case VARPTR: return val._ptr == NULL; + case STR: return _str().empty(); + case RANGE: return _range().empty(); + case VEC: return _vec().empty(); + case SET: return _set().empty(); + case ORDSET: return _ordset().empty(); + case DICT: return _dict().empty(); + case REF: return _ref()->var.empty(); + case RTOBJ: return _rtobj() == NULL || _rtobj()->empty(); + } + return false; +} + + +// --- runtime objects ----------------------------------------------------- // + + +#ifdef DEBUG +void stateobj::idxerr() + { fatal(0x1005, "Object access error"); } +#endif + + +reference::~reference() throw() + { } + + +stateobj::~stateobj() throw() + { collapse(); } + + +bool stateobj::empty() const + { return false; } + + +void stateobj::dump(fifo& stm) const +{ + // TODO: full dump + stm << ""; +} + + +void stateobj::collapse() +{ + // TODO: this is not thread-safe. An atomic exchnage for pointers is needed. + if (getType() != NULL) + { + for (memint count = getType()->varCount; count--; ) + member(count)->clear(); + clearType(); +#ifdef DEBUG + varcount = 0; +#endif + } +} + + +funcptr::funcptr(stateobj* d, stateobj* o, State* s) throw() + : rtobject(s->prototype), dataseg(d), outer(o), state(s) { } + +funcptr::~funcptr() throw() + { } + +bool funcptr::empty() const + { return state == NULL; } + + +void funcptr::dump(fifo& stm) const +{ + // TODO: full dump + stm << (empty() ? "" : ""); +} + + +rtstack::rtstack(memint maxSize) throw() +{ + if (maxSize) + _init(maxSize * sizeof(variant)); +} + + +// ------------------------------------------------------------------------- // + + +// template class vector; + + +void initRuntime() +{ + // Some critical build integrity tests, unfortunately can't be done with macros: + if ( + // Make sure all containers occupy exactly one pointer statically + sizeof(str) == sizeof(void*) && sizeof(symtbl_impl) == sizeof(void*) + && sizeof(vardict) == sizeof(void*) && sizeof(range) == sizeof(void*) + // memint is equivalent of ssize_t + && sizeof(memint) == sizeof(void*) + // Container indexes are memint, we keep them in integer vars, thus: + && sizeof(memint) <= sizeof(integer) + // the following is needed because we initialize the variant to 0 via `integer _all` + && sizeof(variant::_val_union) == sizeof(integer)) + ; + else + fatal(0x1004, "Broken build"); +} + + +void doneRuntime() +{ +} + diff --git a/src/runtime.h b/src/runtime.h new file mode 100644 index 0000000..e363caa --- /dev/null +++ b/src/runtime.h @@ -0,0 +1,1604 @@ +#ifndef __RUNTIME_H +#define __RUNTIME_H + + +#include "common.h" + + +// --- charset ------------------------------------------------------------- // + + +class charset +{ +public: + typedef uinteger word; + enum + { + BITS = 256, + BYTES = BITS / 8, + WORDS = BYTES / int(sizeof(word)) + }; + +protected: + typedef uint8_t uchar; + + uchar data[BYTES]; + +public: + charset() throw() { clear(); } + charset(const charset& s) throw() { assign(s); } + charset(const char* setinit) throw() { assign(setinit); } + + void assign(const charset& s) throw(); + void assign(const char* setinit) throw(); + bool empty() const throw(); + void clear() throw() { memset(data, 0, BYTES); } + void fill() { memset(data, -1, BYTES); } + void include(int b) throw() { data[uchar(b) / 8] |= uchar(1 << (uchar(b) % 8)); } + void include(int min, int max) throw(); + void exclude(int b) { data[uchar(b) / 8] &= uchar(~(1 << (uchar(b) % 8))); } + void unite(const charset& s); + void subtract(const charset& s); + void intersect(const charset& s); + void invert(); + bool contains(int b) const { return (data[uchar(b) / 8] & (1 << (uchar(b) % 8))) != 0; } + bool compare(const charset& s) const { return memcmp(data, s.data, BYTES); } + bool eq(const charset& s) const { return compare(s) == 0; } + bool le(const charset& s) const; + + charset& operator= (const charset& s) { assign(s); return *this; } + charset& operator+= (const charset& s) { unite(s); return *this; } + charset& operator+= (int b) { include(b); return *this; } + charset operator+ (const charset& s) const { charset t = *this; return t += s; } + charset operator+ (int b) const { charset t = *this; return t += b; } + charset& operator-= (const charset& s) { subtract(s); return *this; } + charset& operator-= (int b) { exclude(b); return *this; } + charset operator- (const charset& s) const { charset t = *this; return t -= s; } + charset operator- (int b) const { charset t = *this; return t -= b; } + charset& operator*= (const charset& s) { intersect(s); return *this; } + charset operator* (const charset& s) const { charset t = *this; return t *= s; } + charset operator~ () const { charset t = *this; t.invert(); return t; } + bool operator== (const charset& s) const { return compare(s) == 0; } + bool operator!= (const charset& s) const { return compare(s) != 0; } + bool operator<= (const charset& s) const { return le(s); } + bool operator>= (const charset& s) const { return s.le(*this); } + bool operator[] (int b) const { return contains(b); } +}; + + +// --- object -------------------------------------------------------------- // + + +// object: reference-counted memory block with a virtual destructor + +class object +{ + object(const object&) throw(); + void operator= (const object&) throw(); + +protected: + atomicint _refcount; + + bool _release(); + +public: + + void _mkstatic() + { + // Prevent this object from being free'd by release() and also from + // being counted against memory leaks. + _refcount = 1; +#ifdef DEBUG + pdecrement(&object::allocated); +#endif + } + + void* operator new(size_t self); + void* operator new(size_t self, memint extra); + void operator delete(void*); + + // Dirty trick that duplicates an object and hopefully preserves the + // dynamic type (actually the VMT). Only 'self' bytes is copied; 'extra' + // remains uninitialized. + object* _dup(size_t self, memint extra); + + void _assignto(object*& p) throw(); + static object* reallocate(object* p, size_t self, memint extra); + + static atomicint allocated; // used only in DEBUG mode + + bool isunique() const { return _refcount == 1; } + atomicint release() throw(); + object* grab() throw() { pincrement(&_refcount); return this; } + template + T* grab() { object::grab(); return (T*)(this); } + template + void assignto(T*& p) throw() { _assignto((object*&)p); } + + object() throw(): _refcount(0) { } + virtual ~object() throw(); +}; + + +void _del_obj(object* o); + + +#ifdef SHN_FASTER +inline atomicint object::release() +{ + if (this == NULL) + return 0; + assert(_refcount > 0); + atomicint r = pdecrement(&_refcount); + if (r == 0) + _del_obj(this); + return r; +} +#endif + + +// objptr: "smart" pointer + +template +class objptr +{ +protected: + T* obj; +public: + objptr() : obj(NULL) { } + objptr(const objptr& p) : obj(p.obj) { if (obj) obj->grab(); } + objptr(T* o) : obj(o) { if (o) o->grab(); } + ~objptr() { obj->release(); } + void clear() { obj->release(); obj = NULL; } + bool empty() const { return obj == NULL; } + bool isunique() const { return empty() || obj->isunique(); } + bool operator== (const objptr& p) { return obj == p.obj; } + bool operator!= (const objptr& p) { return obj != p.obj; } + bool operator== (T* o) { return obj == o; } + bool operator!= (T* o) { return obj != o; } + void operator= (const objptr& p) throw() { p.obj->assignto(obj); } + void operator= (T* o) throw() { o->assignto(obj); } + T& operator* () { return *obj; } + const T& operator* () const { return *obj; } + T* operator-> () const { return obj; } + operator T*() const { return obj; } + T* get() const { return obj; } + + // Internal + void _init() { obj = NULL; } + void _init(T* o) { obj = o; if (o) o->grab(); } + void _fin() { obj->release(); } + void _reinit(T* o) { obj = o; } +}; + + +class Type; // defined in typesys.h +class fifo; + +// rtobject: a ref-counted object with runtime type information + +class rtobject: public object +{ +private: + Type* _type; +public: + rtobject(Type* t) throw(): _type(t) { } + ~rtobject() throw(); + Type* getType() const { return _type; } + void setType(Type* t) { assert(_type == NULL); _type = t; } + void clearType() throw() { _type = NULL; } + virtual bool empty() const = 0; + virtual void dump(fifo&) const = 0; +}; + + +// --- container ----------------------------------------------------------- // + + +// container: resizable ref-counted container of POD data; also base for +// non-POD containers that override finalize() and copy() (and the dtor) + +class container: public object +{ +protected: + memint _capacity; + memint _size; + // char _data[0]; + +public: + // Note: allocate() creates an instance of 'container' while reallocate() + // never does that and thus it can be used for descendant classes too. + static container* allocate(memint cap, memint siz) throw(); // (*) + static container* reallocate(container* p, memint newsize); + + // Creates a duplicate of a given container without copying the data; + // leaves actual copying to the virtual method copy() + container* _dup(memint cap, memint siz); + + // TODO: compact() + + static memint _calc_prealloc(memint); + container(memint cap, memint siz) throw() + : object(), _capacity(cap), _size(siz) { } + + static void overflow(); + static void idxerr(); + static void keyerr(); + + ~container() throw(); + virtual void finalize(void*, memint) throw(); + virtual void copy(void* dest, const void* src, memint) throw(); + + char* data() const { return (char*)(this + 1); } + char* data(memint i) const { return data() + i; } + char* end() const { return data(_size); } + static container* cont(char* d) { return ((container*)d) - 1; } + memint size() const { return _size; } + void set_size(memint newsize) + { assert(newsize > 0 && newsize <= _capacity); _size = newsize; } + void dec_size() { assert(_size > 0); _size--; } + memint capacity() const { return _capacity; } +}; + + +// --- bytevec ------------------------------------------------------------- // + + +// bytevec: byte vector, implements copy-on-write; the structure itself +// occupies only sizeof(void*); base class for strings and vectors + +class bytevec +{ + friend class variant; + friend class CodeGen; + + friend void test_bytevec(); + friend void test_podvec(); + +protected: + objptr obj; + + typedef container* (*alloc_func)(memint cap, memint siz); + + void chkidx(memint i) const { if (umemint(i) >= umemint(size())) container::idxerr(); } + void chkidxa(memint i) const { if (umemint(i) > umemint(size())) container::idxerr(); } + static void chknonneg(memint v) { if (v < 0) container::overflow(); } + void chknz() const { if (empty()) container::idxerr(); } + bool _isunique() const { return empty() || obj->isunique(); } + void _dounique(); + char* mkunique() { if (!obj->isunique()) _dounique(); return obj->data(); } + char* _init(memint len) throw(); // (*) + void _init(memint len, char fill) throw(); // (*) + void _init(const char*, memint) throw(); // (*) + void _init(const bytevec& v) throw() { obj._init(v.obj); } + char* _init(memint pos, memint len, alloc_func) throw(); + void _init(const bytevec& v, memint pos, memint len, alloc_func) throw(); + + char* _insert(memint pos, memint len, alloc_func); + void _insert(memint pos, const bytevec&, alloc_func); + char* _append(memint len, alloc_func); + void _erase(memint pos, memint len); + void _pop(memint len); + char* _resize(memint newsize, alloc_func); + +public: + bytevec() throw(): obj() { } + bytevec(const bytevec& v) throw() { _init(v); } + bytevec(const char* buf, memint len) throw() { _init(buf, len); } // (*) + bytevec(memint len, char fill) throw() { _init(len, fill); } // (*) + ~bytevec() throw() { } + + void operator= (const bytevec& v) throw() { obj = v.obj; } + bool operator== (const bytevec& v) const { return obj == v.obj; } + void assign(const char*, memint); + void clear(); + + bool empty() const { return obj.empty(); } + memint size() const { return empty() ? 0 : obj->size(); } + memint capacity() const { return empty() ? 0 : obj->capacity(); } + const char* data() const { return obj->data(); } + const char* data(memint i) const { return obj->data(i); } + const char* at(memint i) const { chkidx(i); return obj->data(i); } + char* atw(memint i) { chkidx(i); return mkunique() + i; } + const char* begin() const { return empty() ? NULL : obj->data(); } + const char* end() const { return empty() ? NULL : obj->end(); } + const char* back(memint i) const { chkidxa(i); return obj->end() - i; } + const char* back() const { return back(1); } + char* backw(memint i) { chkidxa(i); return obj->end() - i; } + char* backw() { return backw(1); } + + void insert(memint pos, const char* buf, memint len); // (*) + void insert(memint pos, const bytevec& s) // (*) + { _insert(pos, s, container::allocate); } + void append(const char* buf, memint len); // (*) + void append(const bytevec& s); + void erase(memint pos, memint len); + void pop(memint len) { if (len > 0) _pop(len); } + char* resize(memint newsize) { return _resize(newsize, container::allocate); } // (*) + void resize(memint, char); // (*) + + // Mostly used internally + template + const T* data(memint i) const { return (T*)data(i * sizeof(T)); } + template + const T* at(memint i) const { return (T*)at(i * sizeof(T)); } + template + T* atw(memint i) { return (T*)atw(i * sizeof(T)); } + template + const T* begin() const { return (T*)begin(); } + template + const T* end() const { return (T*)end(); } + template + const T* back() const { return (T*)back(sizeof(T)); } + template + const T* back(memint i) const { return (T*)back(sizeof(T) * i); } + template + T* backw() { return (T*)backw(sizeof(T)); } + template + T* backw(memint i) { return (T*)backw(sizeof(T) * i); } + template + void pop_back() { pop(sizeof(T)); } + template + void pop_back(T& t) { t = *back(); pop_back(); } +}; + +// (*) -- works only with the POD container; should be overridden or hidden in +// descendant classes. The rest works magically on any descendant of +// 'container'. Pure magic. I like this! + + +// --- str ----------------------------------------------------------------- // + + +class str: public bytevec +{ +protected: + friend void test_string(); + + void _init(const char*) throw(); + void _init(char c) throw() { bytevec::_init(&c, 1); } + +public: + str() throw(): bytevec() { } + str(const str& s)throw(): bytevec(s) { } + str(const char* buf, memint len) throw(): bytevec(buf, len) { } + str(const char* s) throw() { _init(s); } + str(memint len, char fill) throw() { bytevec::_init(len, fill); } + str(char c) throw() { _init(c); } + + const char* c_str(); // can actually modify the object + void push_back(char c) { *_append(1, container::allocate) = c; } + void push_front(char c) { *_insert(0, 1, container::allocate) = c; } + char operator[] (memint i) const { return *data(i); } + char at(memint i) const { return *bytevec::at(i); } + char back() const { return *bytevec::back(); } + void replace(memint pos, char c) { *bytevec::atw(pos) = c; } + void insert(memint pos, char c) { *_insert(pos, 1, container::allocate) = c; } + void insert(memint pos, const str& s) { bytevec::insert(pos, s); } + void insert(memint pos, const char* s); + void operator= (const char* c); + void operator= (char c); + void replace(memint pos, memint len, const str& s); + + enum { npos = -1 }; + memint find(char c) const; + memint rfind(char c) const; + + memint compare(const char*, memint) const; + memint compare(const str& s) const { return compare(s.data(), s.size()); } + bool operator== (const char* s) const; + bool operator== (const str& s) const { return compare(s.data(), s.size()) == 0; } + bool operator== (char c) const { return size() == 1 && *data() == c; } + bool operator!= (const char* s) const { return !(*this == s); } + bool operator!= (const str& s) const { return !(*this == s); } + bool operator!= (char c) const { return !(*this == c); } + + void operator+= (const char* s); + void operator+= (const str& s) { append(s); } + void operator+= (char c) { push_back(c); } + str operator+ (const char* s) const { str r = *this; r += s; return r; } + str operator+ (const str& s) const { str r = *this; r += s; return r; } + str operator+ (char c) const { str r = *this; r += c; return r; } + str substr(memint pos, memint len) const; + str substr(memint pos) const; +}; + + +inline str operator+ (const char* s1, const str& s2) + { str r = s1; r += s2; return r; } + +inline str operator+ (char c, const str& s2) + { str r = c; r += s2; return r; } + + +// --- string utilities ---------------------------------------------------- // + + +str _to_string(large value, int base, int width, char fill); +str _to_string(large); +template + inline str to_string(const T& value, int base, int width = 0, char fill = '0') + { return _to_string(large(value), base, width, fill); } +template + inline str to_string(const T& value) + { return _to_string(large(value)); } + +ularge from_string(const char*, bool* error, bool* overflow, int base = 10); + +str remove_filename_path(const str&); +str remove_filename_ext(const str&); +str to_printable(char); +str to_printable(const str&); +str to_quoted(char c); +str to_quoted(const str&); +str to_displayable(const str&); // shortens to 40 chars + "..." + + +// --- podvec -------------------------------------------------------------- // + + +template + struct comparator + { memint operator() (const T& a, const T& b) { return memint(a - b); } }; + +template <> + struct comparator + { memint operator() (const str& a, const str& b) { return a.compare(b); } }; + +template <> + struct comparator + { memint operator() (const char* a, const char* b) { return strcmp(a, b); } }; + + +// Vector template for POD elements (int, pointers, et al). Used internally +// by the compiler itself. Also podvec is a basis for the universal vector. +// This hopefully generates minimal static code. + +template +class podvec: protected bytevec +{ + friend void test_podvec(); + +protected: + enum { Tsize = int(sizeof(T)) }; + typedef bytevec parent; + +public: + podvec() throw(): parent() { } + podvec(const podvec& v) throw(): parent(v) { } + + bool empty() const { return parent::empty(); } + memint size() const { return parent::size() / Tsize; } + bool operator== (const podvec& v) const { return parent::operator==(v); } + const T& operator[] (memint i) const { return *parent::data(i); } + const T& at(memint i) const { return *parent::at(i); } + T& atw(memint i) { return *parent::atw(i); } + const T& back() const { return *parent::back(); } + const T& back(memint i) const { return *parent::back(i); } + T& backw() { return *parent::backw(); } + T& backw(memint i) { return *parent::backw(i); } + const T* begin() const { return parent::begin(); } + const T* end() const { return parent::end(); } + void clear() { parent::clear(); } + void operator= (const podvec& v) throw() { parent::operator= (v); } + void push_back(const T& t) { new(_append(Tsize, container::allocate)) T(t); } // (*) + void pop_back() { parent::pop_back(); } + void pop_back(T& t) { parent::pop_back(t); } + void append(const podvec& v) { parent::append(v); } + void insert(memint pos, const T& t) { new(parent::_insert(pos * Tsize, Tsize, container::allocate)) T(t); } // (*) + void insert(memint pos, const podvec& v) { parent::_insert(pos * Tsize, v, container::allocate); } // (*) + void replace(memint pos, const T& t) { *parent::atw(pos) = t; } + void erase(memint pos, memint len) { parent::_erase(pos * Tsize, len * Tsize); } + void erase(memint pos) { parent::_erase(pos * Tsize, Tsize); } + + // If you keep the vector sorted, the following will provide set-like + // functionality: + bool find(const T& item) const + { + memint index; + return bsearch(item, index); + } + + bool find_insert(const T& item) // (*) + { + memint index; + if (!bsearch(item, index)) + { + insert(index, item); + return true; + } + else + return false; + } + + void find_erase(const T& item) + { + memint index; + if (bsearch(item, index)) + erase(index); + else + container::keyerr(); + } + + // Internal method, but should be public for technical reasons + bool bsearch(const T& elem, memint& idx) const + { + comparator comp; + idx = 0; + memint low = 0; + memint high = size() - 1; + while (low <= high) + { + idx = (low + high) / 2; + memint c = comp(operator[](idx), elem); + if (c < 0) + low = idx + 1; + else if (c > 0) + high = idx - 1; + else + return true; + } + idx = low; + return false; + } +}; + + +// --- vector -------------------------------------------------------------- // + + +template +class vector: public podvec +{ +protected: + enum { Tsize = int(sizeof(T)) }; + typedef podvec parent; + typedef T* Tptr; + typedef Tptr& Tref; + + class cont: public container + { + protected: + + void finalize(void* p, memint len) throw() + { + (char*&)p += len - Tsize; + for ( ; len; len -= Tsize, Tref(p)--) + Tptr(p)->~T(); + } + + void copy(void* dest, const void* src, memint len) throw() + { + for ( ; len; len -= Tsize, Tref(dest)++, Tref(src)++) + new(dest) T(*Tptr(src)); + } + + cont(memint cap, memint siz) throw(): container(cap, siz) { } + + public: + static container* allocate(memint cap, memint siz) throw() + { return new(cap) cont(cap, siz); } + + ~cont() throw() + { if (_size) { finalize(data(), _size); _size = 0; } } + }; + + vector(const vector& v, memint pos, memint len) + { parent::_init(v, pos * Tsize, len * Tsize, cont::allocate); } + +public: + vector() throw(): parent() { } + + // Override stuff that requires allocation of 'vector::cont' + void insert(memint pos, const T& t) + { new(bytevec::_insert(pos * Tsize, Tsize, cont::allocate)) T(t); } + void insert(memint pos, const vector& v) + { bytevec::_insert(pos * Tsize, v, cont::allocate); } + void push_back(const T& t) + { new(bytevec::_append(Tsize, cont::allocate)) T(t); } + void resize(memint); // not implemented + + void replace(memint pos, memint len, const vector& v) + { + parent::erase(pos, len); + insert(pos, v); + } + + void grow(memint extra_items) + { + memint extra_mem = extra_items * Tsize; + char* p = bytevec::_resize(bytevec::size() + extra_mem, cont::allocate); + memset(p, 0, extra_mem); + } + + vector subvec(memint pos, memint len) const + { + if (pos == 0 && len == parent::size()) + return *this; + return vector(*this, pos, len); + } + + // Give a chance to alternative constructors, e.g. str can be constructed + // from (const char*). Without these templates below temp objects are + // created and then copied into the vector. Though these are somewhat + // dangerous too. + template + void insert(memint pos, const U& u) + { new(bytevec::_insert(pos * Tsize, Tsize, cont::allocate)) T(u); } + template + void push_back(const U& u) + { new(bytevec::_append(Tsize, cont::allocate)) T(u); } + template + void replace(memint i, const U& u) + { parent::atw(i) = u; } + + bool find_insert(const T& item) + { + if (parent::empty()) + { push_back(item); return true; } + else + return parent::find_insert(item); + } +}; + + +// This is a clone of vector<> but declared separately for overloaded variant +// constructors. (Is there a better way?) +template +class set: public vector +{ +protected: + enum { Tsize = int(sizeof(T)) }; + typedef vector parent; + typedef T* Tptr; + typedef Tptr& Tref; +public: + set() throw(): parent() { } +}; + + +// --- dict ---------------------------------------------------------------- // + +// dict: internally a dict variable is a pointer to dictobj which in its turn +// contains two separate vectors for keys and for values. This way we +// (1) re-use the existing instances of certain templates +// (2) more importantly, we simplify methods of getting the keys or values +// as vectors and reusing them on the ref-count basis + +template +class dict +{ + friend class variant; + +protected: + + void chkidx(memint i) const { if (umemint(i) >= umemint(size())) container::idxerr(); } + + class dictobj: public object + { + public: + vector keys; + vector values; + dictobj(): keys(), values() { } + dictobj(const dictobj& d): object(), keys(d.keys), values(d.values) { } + }; + + objptr obj; + + void _mkunique() + { if (!obj.empty() && !obj.isunique()) obj = new dictobj(*obj); } + + bool _bsearch(const Tkey& k, memint& i) const + { i = 0; return !empty() && obj->keys.bsearch(k, i); } + +public: + dict() throw() : obj() { } + dict(const dict& d) throw() : obj(d.obj) { } + ~dict() throw() { } + + dict(const Tkey& k, const Tval& v) throw() + : obj(new dictobj()) + { + obj->keys.push_back(k); + obj->values.push_back(v); + } + + bool empty() const { return obj.empty(); } + memint size() const { return !empty() ? obj->keys.size() : 0; } + bool operator== (const dict& d) const { return obj == d.obj; } + bool operator!= (const dict& d) const { return obj != d.obj; } + + void clear() { obj.clear(); } + void operator= (const dict& d) { obj = d.obj; } + + const Tkey& key(memint i) const { chkidx(i); return obj->keys[i]; } + const Tval& value(memint i) const { chkidx(i); return obj->values[i]; } + + const vector& keys() const { return empty() ? vector() : obj->keys; } + const vector& values() const { return empty() ? vector() : obj->values; } + + void replace(memint i, const Tval& v) + { + chkidx(i); + _mkunique(); + obj->values.replace(i, v); + } + + void erase(memint i) + { + chkidx(i); + _mkunique(); + obj->keys.erase(i); + obj->values.erase(i); + if (obj->keys.empty()) + clear(); + } + + const Tval* find(const Tkey& k) const + { + memint i; + if (_bsearch(k, i)) + return &obj->values[i]; + else + return NULL; + } + + bool find_key(const Tkey& k) const + { memint i; return _bsearch(k, i); } + + void find_replace(const Tkey& k, const Tval& v) + { + memint i; + if (!_bsearch(k, i)) + { + if (empty()) + obj = new dictobj(); + else + _mkunique(); + obj->keys.insert(i, k); + obj->values.insert(i, v); + } + else + replace(i, v); + assert(obj->keys.size() == obj->values.size()); + } + + void find_erase(const Tkey& k) + { + memint i; + if (_bsearch(k, i)) + erase(i); + else + container::keyerr(); + } + +#ifdef DEBUG + // for unit tests only + struct item_type + { + const Tkey& key; + Tval& value; + item_type(const Tkey& k, Tval& v): key(k), value(v) { } + }; + + item_type at(memint i) const + { chkidx(i); return item_type(obj->keys[i], obj->values.atw(i)); } +#endif +}; + + +// --- ordset -------------------------------------------------------------- // + + +class ordset +{ + friend class variant; +protected: + static charset empty_charset; + struct setobj: public object + { + charset set; + setobj(): set() { } + setobj(const setobj& s): object(), set(s.set) { } + }; + objptr obj; + charset& _getunique(); +public: + ordset() throw() : obj() { } + ordset(const ordset& s) throw() : obj(s.obj) { } + ordset(integer v) throw(); + ordset(integer l, integer r) throw(); + ~ordset() throw() { } + bool empty() const { return obj.empty() || obj->set.empty(); } + memint compare(const ordset& s) const; + bool operator== (const ordset& s) const { return compare(s) == 0; } + bool operator!= (const ordset& s) const { return compare(s) != 0; } + void clear() { obj.clear(); } + void operator= (const ordset& s) { obj = s.obj; } + bool find(integer v) const { return !obj.empty() && obj->set[int(v)]; } + void find_insert(integer v); + void find_insert(integer l, integer h); + void find_erase(integer v); + const charset& get_charset() const { return obj.empty() ? empty_charset : obj->set; } +}; + + +// --- range --------------------------------------------------------------- // + + +class range +{ + friend class variant; +protected: + struct rangeobj: public object + { + integer left; + integer right; + rangeobj(integer l, integer r): left(l), right(r) { } + }; + objptr obj; +public: + range(integer, integer) throw(); + ~range() throw(); + bool empty() const + { return obj.empty(); } + integer left() const + { return obj->left; } + integer right() const + { return obj->right; } + bool contains(integer v) + { return !obj.empty() && (v >= obj->left && v <= obj->right); } + memint compare(const range& r) const; + bool operator ==(const range& r) const; +}; + + +// --- object collections -------------------------------------------------- // + + +// extern template class podvec; + + +class objvec_impl: public podvec +{ +protected: + typedef podvec parent; +public: + objvec_impl() throw(): parent() { } + objvec_impl(const objvec_impl& s) throw(): parent(s) { } + void release_all() throw(); +}; + + +template +class objvec: public objvec_impl +{ +protected: + typedef objvec_impl parent; +public: + objvec() throw(): parent() { } + objvec(const objvec& s) throw(): parent(s) { } + T* operator[] (memint i) const { return cast(parent::operator[](i)); } + T* at(memint i) const { return cast(parent::at(i)); } + T* back() const { return cast(parent::back()); } + T* back(memint i) const { return cast(parent::back(i)); } + T* push_back(T* t) { parent::push_back(t); return t; } + void insert(memint pos, T* t) { parent::insert(pos, t); } + void replace(memint pos, T* t) { parent::replace(pos, t); } +}; + + +class symbol: public object +{ +public: + str const name; + symbol(const str& s) throw(): name(s) { } + symbol(const char* s) throw(): name(s) { } + ~symbol() throw(); +}; + + +class symtbl_impl: public objvec +{ +protected: + typedef objvec parent; + bool bsearch(const str& key, memint& index) const; +public: + symtbl_impl() throw(): parent() { } + symtbl_impl(const symtbl_impl& s) throw(); + symbol* find(const str& name) const; // NULL or symbol* + bool add(symbol*); + bool replace(symbol*); +}; + + +template +class symtbl: public symtbl_impl +{ +protected: + typedef symtbl_impl parent; +public: + symtbl() throw(): parent() { } + memint size() const { return parent::size(); } + T* find(const str& name) const { return cast(parent::find(name)); } + bool add(T* t) { return parent::add(t); } + bool replace(T* t) { return parent::replace(t); } + void release_all() throw() { parent::release_all(); } +}; + + +// --- Exceptions ---------------------------------------------------------- // + + +// For dynamically generated strings +class emessage: public exception +{ +public: + str msg; + emessage(const emessage&) throw(); // not defined + emessage(const str& _msg) throw(); + emessage(const char* _msg) throw(); + ~emessage() throw(); + const char* what() throw(); +}; + + +// TODO: define these as separate classes +typedef emessage econtainer; +typedef emessage evariant; +typedef emessage efifo; + + +// UNIX system errors +class esyserr: public emessage +{ +public: + esyserr(int icode, const str& iArg = "") throw(); + ~esyserr() throw(); +}; + + +void nullptrerr(); + +template + inline T* CHKPTR(T* p) + { if (p == NULL) nullptrerr(); return p; } + + +// --- variant ------------------------------------------------------------- // + + +class variant; +class reference; +class funcptr; +class stateobj; +struct podvar; + +typedef vector varvec; +typedef set varset; +typedef dict vardict; + + +class variant +{ + friend void test_variant(); + friend void initRuntime(); + +private: + void _init(void*); // compiler traps + void _init(const void*); + void _init(object*); + void _init(bool); + +public: + enum Type + { VOID, ORD, REAL, VARPTR, + STR, RANGE, VEC, SET, ORDSET, DICT, REF, RTOBJ, + ANYOBJ = STR }; + + struct _Void { int dummy; }; + static _Void null; + +protected: + Type type; + union _val_union + { + integer _all; // should be the biggest in this union + integer _ord; // int, char and bool + real _real; // not implemented in the VM yet + variant* _ptr; // POD pointer to a variant + object* _obj; // str, vector, set, map and their variants + reference* _ref; // reference object + rtobject* _rtobj; // runtime objects with the "type" field + } val; + + void _req(Type t) const { if (type != t) _type_err(); } + void _req_anyobj() const { if (!is_anyobj()) _type_err(); } +#ifdef DEBUG + void _dbg(Type t) const { _req(t); } + void _dbg_anyobj() const { _req_anyobj(); } +#else + void _dbg(Type) const { } + void _dbg_anyobj() const { } +#endif + void _init() throw() { type = VOID; val._all = 0; } + void _init(_Void) throw() { _init(); } + void _init(Type t) throw() { type = t; val._all = 0; } + void _init(char v) throw() { type = ORD; val._ord = uchar(v); } + void _init(uchar v) throw() { type = ORD; val._ord = v; } + void _init(int v) throw() { type = ORD; val._ord = v; } +#ifdef SHN_64 + void _init(large v) throw() { type = ORD; val._ord = v; } +#endif + void _init(real v) throw() { type = REAL; val._real = v; } + void _init(variant* v) throw() { type = VARPTR; val._ptr = v; } + void _init(Type t, object* o) throw() { type = t; val._obj = o; if (o) o->grab(); } + void _init(const str& v) throw() { _init(STR, v.obj); } + void _init(const char* s) throw() { type = STR; ::new(&val._obj) str(s); } + void _init(const range& v) throw() { _init(RANGE, v.obj); } + void _init(integer l, integer r) throw() { type = RANGE; ::new(&val._obj) range(l, r); } + void _init(const varvec& v) throw() { _init(VEC, v.obj); } + void _init(const varset& v) throw() { _init(SET, v.obj); } + void _init(const ordset& v) throw() { _init(ORDSET, v.obj); } + void _init(const vardict& v) throw() { _init(DICT, v.obj); } + void _init(reference* o) throw(); + void _init(rtobject* o) throw() { _init(RTOBJ, o); } + void _init(fifo* o) throw(); + void _init(stateobj* o) throw(); + void _init(const variant& v) throw(); + void _init(const podvar* v) throw(); + + void _fin() throw() { if (is_anyobj()) val._obj->release(); } + +public: + variant() throw() { _init(); } + variant(Type t) throw() { _init(t); } + variant(integer l, integer r) throw() { _init(l, r); } + variant(const variant& v) throw() { _init(v); } + template + variant(const T& v) throw() { _init(v); } + variant(Type t, object* o) throw() { _init(t, o); } + ~variant() throw() { _fin(); } + + template + void operator= (const T& v) throw() { _fin(); _init(v); } + void operator= (const variant& v) throw(); + void clear() throw() { _fin(); _init(); } + bool empty() const; + + memint compare(const variant&) const; + bool operator== (const variant&) const; + bool operator!= (const variant& v) const { return !(operator==(v)); } + + Type getType() const { return Type(type); } + bool is(Type t) const { return type == t; } + bool is_str() const { return type == STR; } + bool is_vec() const { return type == VEC; } + bool is_null() const { return type == VOID; } + bool is_anyobj() const throw() { return type >= ANYOBJ; } + bool is_null_obj() const { return is_anyobj() && val._obj == NULL; } + + // Fast "unsafe" access methods; checked for correctness only in DEBUG mode + bool _bool() const { _dbg(ORD); return val._ord; } + uchar _uchar() const { _dbg(ORD); return (uchar)val._ord; } + integer _int() const { _dbg(ORD); return val._ord; } + variant* _ptr() const { _dbg(VARPTR); return val._ptr; } + const str& _str() const { _dbg(STR); return *(str*)&val._obj; } + const range& _range() const { _dbg(RANGE); return *(range*)&val._obj; } + const varvec& _vec() const { _dbg(VEC); return *(varvec*)&val._obj; } + const varset& _set() const { _dbg(SET); return *(varset*)&val._obj; } + const ordset& _ordset() const { _dbg(ORDSET); return *(ordset*)&val._obj; } + const vardict& _dict() const { _dbg(DICT); return *(vardict*)&val._obj; } + reference* _ref() const { _dbg(REF); return CHKPTR(val._ref); } + rtobject* _rtobj() const { _dbg(RTOBJ); return val._rtobj; } + stateobj* _stateobj() const; + funcptr* _funcptr() const; + fifo* _fifo() const; // checks for NULL + object* _anyobj() const { _dbg_anyobj(); return val._obj; } + integer& _int() { _dbg(ORD); return val._ord; } + str& _str() { _dbg(STR); return *(str*)&val._obj; } + range& _range() { _dbg(RANGE); return *(range*)&val._obj; } + varvec& _vec() { _dbg(VEC); return *(varvec*)&val._obj; } + varset& _set() { _dbg(SET); return *(varset*)&val._obj; } + ordset& _ordset() { _dbg(ORDSET); return *(ordset*)&val._obj; } + vardict& _dict() { _dbg(DICT); return *(vardict*)&val._obj; } + + // Safer access methods; may throw + bool as_bool() const { _req(ORD); return _bool(); } + uchar as_uchar() const { _req(ORD); return _uchar(); } + integer as_ord() const { _req(ORD); return _int(); } + variant* as_ptr() const { _req(VARPTR); return val._ptr; } + const str& as_str() const { _req(STR); return _str(); } + const range& as_range() const { _req(RANGE); return _range(); } + const varvec& as_vec() const { _req(VEC); return _vec(); } + const varset& as_set() const { _req(SET); return _set(); } + const ordset& as_ordset() const { _req(ORDSET); return _ordset(); } + const vardict& as_dict() const { _req(DICT); return _dict(); } + reference* as_ref() const { _req(REF); return val._ref; } + rtobject* as_rtobj() const { _req(RTOBJ); return _rtobj(); } + object* as_anyobj() const { _req_anyobj(); return val._obj; } + integer& as_ord() { _req(ORD); return _int(); } + str& as_str() { _req(STR); return _str(); } + range& as_range() { _req(RANGE); return _range(); } + varvec& as_vec() { _req(VEC); return _vec(); } + varset& as_set() { _req(SET); return _set(); } + ordset& as_ordset() { _req(ORDSET); return _ordset(); } + vardict& as_dict() { _req(DICT); return _dict(); } + + static void _type_err(); + static void _range_err(); +}; + + +#ifdef SHN_FASTER +inline void variant::_init(const variant& v) throw() +{ + type = v.type; + val = v.val; + if (is_anyobj() && val._obj) + val._obj->grab(); +} + + +inline void variant::operator= (const variant& v) throw() +{ + if (type != v.type || val._all != v.val._all) + { _fin(); _init(v); } +} +#endif + + +struct podvar { char data[sizeof(variant)]; }; + +inline void variant::_init(const podvar* v) throw() + { *(podvar*)this = *v; } + +template <> + struct comparator + { memint operator() (const variant& a, const variant& b) { return a.compare(b); } }; + +/* +extern template class vector; +extern template class set; +extern template class dict; +extern template class podvec; +*/ + +// --- runtime objects ----------------------------------------------------- // + + +// reference: is a ref-counted object that encapsulates a variant; this way a +// variant can be shared between multiple other variables and mimic +// "references" as commonly defined in other languages + +class reference: public object +{ +public: + variant var; + reference() throw() { } + reference(const variant& v) throw(): var(v) { } + reference(const podvar* v) throw(): var(v) { } + ~reference() throw(); +}; + + +inline void variant::_init(reference* o) throw() { _init(REF, o); } + + +class State; // defined in typesys.h +class CodeSeg; // defined in vm.h + + +// sateobj: a run-time ref-counted object, actually a structure with variant +// member fields. The actual size is passed to operator new() defined below. + +class stateobj: public rtobject +{ + friend class State; + typedef rtobject parent; + friend void runRabbitRun(variant*, stateobj*, stateobj*, variant*, CodeSeg*); + +protected: +#ifdef DEBUG + memint varcount; + static void idxerr(); +#endif + stateobj(State*) throw(); // defined in typesys.h as an inline function + + // Get zeroed memory so that the destructor works correctly even if the + // constructor failed in the middle. A zeroed variant is a null variant. + void* operator new(size_t s, memint extra) + { +#ifdef DEBUG + pincrement(&object::allocated); +#endif + return pmemcalloc(s + extra * sizeof(variant)); + } + + // In place operator new for stateobj: for creating pseudo-objects on the stack + void* operator new(size_t, void* p) + { +#ifdef DEBUG + pincrement(&object::allocated); +#endif + // memset(pchar(p) + s, 0, extra * sizeof(variant)); + return p; + } + +public: + ~stateobj() throw(); + State* getType() const { return (State*)parent::getType(); } + + bool empty() const; // override + void dump(fifo&) const; // override + + variant* member(memint index) + { +#ifdef DEBUG + if (umemint(index) >= umemint(varcount)) + idxerr(); +#endif + return (variant*)(this + 1) + index; + } + + void collapse(); +}; + + +inline void variant::_init(stateobj* o) throw() { _init(RTOBJ, o); } +inline stateobj* variant::_stateobj() const { return cast(_rtobj()); } + + +class funcptr: public rtobject +{ +public: + stateobj* dataseg; + objptr outer; + State* state; + funcptr(stateobj* dataseg, stateobj* outer, State* state) throw(); + ~funcptr() throw(); + bool empty() const; + void dump(fifo&) const; +}; + +inline funcptr* variant::_funcptr() const { return cast(_rtobj()); } + + +class rtstack: protected bytevec +{ +public: + rtstack(memint maxSize) throw(); + variant* base() + { return (variant*)begin(); } +}; + + +// --- FIFO ---------------------------------------------------------------- // + + +const int _varsize = int(sizeof(variant)); + + +// The abstract FIFO interface. There are 2 modes of operation: variant FIFO +// and character FIFO. Destruction of variants is basically not handled by +// this class to give more flexibility to implementations (e.g. there may be +// buffers shared between 2 fifos or other container objects). If you implement, +// say, only input methods, the default output methods will throw an exception +// with a message "FIFO is read-only", and vice versa. Iterators may be +// implemented in descendant classes but are not supported by default. +// Powerful text parsing methods are provided that work on any derived FIFO +// implementation (see "Characetr FIFO operations" below). +class fifo: public rtobject +{ + friend void runRabbitRun(variant*, stateobj*, stateobj*, variant*, CodeSeg*); + + fifo& operator<< (bool); // compiler traps + fifo& operator<< (void*); + fifo& operator<< (object*); + fifo& operator<< (rtobject* o); // { o->dump(*this); return *this; } + +protected: + memint max_token; // 4096 + bool _is_char_fifo; + + static void _empty_err(); + static void _full_err(); + static void _wronly_err(); + static void _rdonly_err(); + static void _fifo_type_err(); + static void _token_err(); + void _req(bool req_char) const { if (req_char != _is_char_fifo) _fifo_type_err(); } + void _req_non_empty() const; + void _req_non_empty(bool _char) const; + + // Minimal set of methods required for both character and variant FIFO + // operations. Implementations should guarantee variants will never be + // fragmented, so that a buffer returned by get_tail() always contains at + // least sizeof(variant) bytes (8, 12 or 16 bytes depending on the config. + // and platform) in variant mode, or at least 1 byte in character mode. + virtual const char* get_tail(); // Get a pointer to tail data + virtual const char* get_tail(memint*); // ... also return the length + virtual void deq_bytes(memint); // Discard n consecutive bytes returned by get_tail() + virtual variant* enq_var(); // Reserve uninitialized space for a variant + virtual void enq_char(char); // Push one char, char fifo only + virtual memint enq_chars(const char*, memint); // Push arbitrary number of bytes, return actual number, char fifo only + + void _token(const charset& chars, str* result); + void deq_var(variant*); // dequeue variant to uninitialized area, for internal use + +public: + fifo(Type*, bool is_char) throw(); + ~fifo() throw(); + + enum { CHAR_ALL = MEMINT_MAX - 2, CHAR_SOME = MEMINT_MAX - 1 }; + + void dump(fifo&) const; + + bool empty() const; // override, throws + virtual void flush(); // empty, overridden in file fifos + virtual str get_name() const = 0; + + // Main FIFO operations, work on both char and variant fifos; for char + // fifos the variant is read as either a char or a string. + void var_enq(const variant&); + void var_preview(variant&); + void var_deq(variant&); + void var_eat(); + + // Character FIFO operations + bool is_char_fifo() const { return _is_char_fifo; } + int preview(); // returns -1 on eof + char look() + { int c = preview(); if (c == -1) _empty_err(); return c; } + uchar get(); + bool get_if(char c); + str deq(memint); // CHAR_ALL, CHAR_SOME can be specified + str deq(const charset& c) { str s; _token(c, &s); return s; } + str token(const charset& c) { return deq(c); } // alias + void eat(const charset& c) { _token(c, NULL); } + void skip(const charset& c) { eat(c); } // alias + str line(); + bool eol(); + void skip_eol(); + bool eof() const { return empty(); } + + memint enq(const char* p, memint count) { return enq_chars(p, count); } + void enq(const char* s); + void enq(const str& s); + void enq(char c) { enq_char(c); } + void enq(uchar c) { enq_char(c); } + void enq(large i); + void enq(const varvec&); + + fifo& operator<< (const char* s) { enq(s); return *this; } + fifo& operator<< (const str& s) { enq(s); return *this; } + fifo& operator<< (char c) { enq(c); return *this; } + fifo& operator<< (uchar c) { enq(c); return *this; } + fifo& operator<< (large i) { enq(large(i)); return *this; } + fifo& operator<< (int i) { enq(large(i)); return *this; } + fifo& operator<< (long i) { enq(large(i)); return *this; } + fifo& operator<< (size_t i) { enq(large(i)); return *this; } +}; + +const char endl = '\n'; +extern charset non_eol_chars; + +inline void variant::_init(fifo* f) throw() { _init(RTOBJ, f); } +inline fifo* variant::_fifo() const { return cast(CHKPTR(_rtobj())); } + + +// The memfifo class implements a linked list of "chunks" in memory, where +// each chunk is the size of 32 * sizeof(variant). Both enqueue and deqeue +// operations are O(1), and memory usage is better than that of a plain linked +// list of elements, as "next" pointers are kept for bigger chunks of elements +// rather than for each element. Can be used both for variants and chars. This +// class "owns" variants, i.e. proper construction and desrtuction is done. +class memfifo: public fifo +{ +public: +#ifdef DEBUG + static int CHUNK_SIZE; // settable from unit tests +#else + enum { CHUNK_SIZE = 32 * _varsize }; +#endif + +protected: + struct chunk: noncopyable + { + chunk* next; + char data[0]; +#ifdef DEBUG + chunk() throw(): next(NULL) { pincrement(&object::allocated); } + ~chunk() throw() { pdecrement(&object::allocated); } +#else + chunk() throw(): next(NULL) { } +#endif + void* operator new(size_t) { return ::pmemalloc(sizeof(chunk) + CHUNK_SIZE); } + void operator delete(void* p) { ::pmemfree(p); } + }; + + chunk* head; // in + chunk* tail; // out + int head_offs; + int tail_offs; + + void enq_chunk(); + void deq_chunk(); + + // Overrides + const char* get_tail(); + const char* get_tail(memint*); + void deq_bytes(memint); + variant* enq_var(); + void enq_char(char); + memint enq_chars(const char*, memint); + + char* enq_space(memint); + memint enq_avail(); + +public: + memfifo(Type*, bool is_char) throw(); + ~memfifo() throw(); + + void clear(); + bool empty() const; // override + str get_name() const; // override +}; + + +// Buffer read event handler (write events aren't implemented yet) +class bufevent: public object +{ +public: + virtual void event(char* buf, memint tail, memint head) = 0; +}; + + +// This is an abstract buffered fifo class. Implementations should validate the +// buffer in the overridden empty() and flush() methods, for input and output +// fifos respectively. To simplify things, buffifo objects are not supposed to +// be reusable, i.e. once the end of file is reached, the implementation is not +// required to reset its state. Variant fifo implementations should guarantee +// at least sizeof(variant) bytes in calls to get_tail() and enq_var(). +class buffifo: public fifo +{ +protected: + char* buffer; + memint bufsize; + memint bufhead; + memint buftail; + memint buforig; + + bufevent* event; + + const char* get_tail(); + const char* get_tail(memint*); + void deq_bytes(memint); + variant* enq_var(); + void enq_char(char); + memint enq_chars(const char*, memint); + + char* enq_space(memint); + memint enq_avail(); + + void call_bufevent() const; + +public: + buffifo(Type*, bool is_char) throw(); + ~buffifo() throw(); + + bool empty() const; // throws efifowronly + void flush(); // throws efifordonly + + memint tellg() const { return buforig + buftail; } + memint tellp() const { return buforig + bufhead; } + bufevent* set_bufevent(bufevent*); +}; + + +// Analog of std::strstream; a buffifo-based implementation that uses +// a 'str' object for storing data. +class strfifo: public buffifo +{ +protected: + str string; + void clear(); +public: + strfifo(Type*) throw(); + strfifo(Type*, const str&) throw(); + ~strfifo() throw(); + bool empty() const; // override + void flush(); // override + str get_name() const; // override + str all() const; +}; + + +// TODO: varfifo, a variant vector wrapper based on buffifo + +class intext: public buffifo +{ +public: +#ifdef DEBUG + static int BUF_SIZE; // settable from unit tests +#else + enum { BUF_SIZE = 4096 * sizeof(integer) }; +#endif + +protected: + str file_name; + str filebuf; + int _fd; + bool _eof; + + void error(int code); // throws esyserr + void doopen(); + void doread(); + +public: + intext(Type*, const str& fn) throw(); + ~intext() throw(); + + bool empty() const; // override + str get_name() const; // override + void open() { empty(); /* attempt to open */ } +}; + + +class outtext: public buffifo +{ +protected: + enum { BUF_SIZE = 2048 * sizeof(integer) }; + + str file_name; + str filebuf; + int _fd; + bool _err; + + void error(int code); // throws esyserr + +public: + outtext(Type*, const str& fn) throw(); + ~outtext() throw(); + + void flush(); // override + str get_name() const; // override + void open() { flush(); /* attempt to open */ } +}; + + +// Standard input/output object, a two-way fifo. In case of stderr it is write-only. +class stdfile: public intext +{ +protected: + int _ofd; + void enq_char(char); + memint enq_chars(const char*, memint); +public: + stdfile(int infd, int outfd) throw(); + ~stdfile() throw(); +}; + + +extern stdfile sio; +extern stdfile serr; + + +// System utilities + + +bool isFile(const char*); + + +// ------------------------------------------------------------------------- // + + +typedef vector strvec; +// extern template class vector; + + +void initRuntime(); +void doneRuntime(); + + +#endif // __RUNTIME_H diff --git a/src/shannon.syntax b/src/shannon.syntax new file mode 100644 index 0000000..966d521 --- /dev/null +++ b/src/shannon.syntax @@ -0,0 +1,137 @@ +# +# Syntax highlighting rules for Midnight Commander +# +# +# Add these lines to /etc/mc/Syntax : +# file ..\*\\.shn$ Shannon\sProgram +# include shn.syntax +# +# The syntax files themselves are usually in /usr/share/mc/syntax +# + +context default + # Keywords + keyword whole and yellow + keyword whole as yellow + keyword whole assert yellow + keyword whole begin yellow + keyword whole break yellow + keyword whole case yellow + keyword whole class yellow + keyword whole const yellow + keyword whole continue yellow + keyword whole def yellow + keyword whole default yellow + keyword whole del yellow + keyword whole dump yellow + keyword whole elif yellow + keyword whole else yellow + keyword whole exit yellow + keyword whole for yellow + keyword whole if yellow + keyword whole in yellow + keyword whole ins yellow + keyword whole is yellow + keyword whole not yellow + keyword whole or yellow + keyword whole return yellow + keyword whole shl yellow + keyword whole shr yellow + keyword whole switch yellow + keyword whole this yellow + keyword whole typeof yellow + keyword whole var yellow + keyword whole while yellow + keyword whole xor yellow + + # Most common functions + + # Constants + keyword whole false brightgreen + keyword whole null brightgreen + keyword whole true brightgreen + + # Comments + keyword /\* brown + keyword \*/ brown + keyword // brown + + # Numbers + wholechars abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_. + + keyword whole 0\{xX\}\{0123456789abcdefABCDEF\}\[0123456789abcdefABCDEF\] green + keyword whole \{0123456789\}\[0123456789\] green + keyword whole \[0123456789\]\.\{0123456789\}\[0123456789\] green + keyword whole \{0123456789\}\[0123456789\]\.\[0123456789\] green + keyword whole \{0123456789\}\[0123456789\]\{eE\}\{0123456789\}\[0123456789\] green + keyword whole \{0123456789\}\[0123456789\]\{eE\}\{\-\+\}\{0123456789\}\[0123456789\] green + keyword whole \{0123456789\}\[0123456789\]\.\{0123456789\}\[0123456789\]\{eE\}\{0123456789\}\[0123456789\] green + keyword whole \{0123456789\}\[0123456789\]\.\{0123456789\}\[0123456789\]\{eE\}\{\-\+\}\{0123456789\}\[0123456789\] green + + # Special symbols + keyword \\ yellow + keyword \. yellow + keyword \* yellow + keyword \+ yellow + keyword - yellow + keyword / yellow + keyword % yellow + keyword = yellow + keyword & yellow + keyword | yellow + keyword ^ yellow + keyword @ yellow + keyword > yellow + keyword < yellow + keyword # yellow + keyword ? yellow + keyword ! yellow + + # Separators + keyword { brightcyan + keyword } brightcyan + keyword ( brightcyan + keyword ) brightcyan + keyword [ brightcyan + keyword ] brightcyan + keyword , brightcyan + keyword : brightcyan + keyword ; brightmagenta + + +# Comments + +context exclusive /\* \*/ brown + spellcheck + keyword whole BUG brightred + keyword whole FixMe brightred + keyword whole FIXME brightred + keyword whole Note brightred + keyword whole NOTE brightred + keyword whole ToDo brightred + keyword whole TODO brightred + keyword !!\[!\] brightred + keyword ??\[?\] brightred + + +context exclusive // \n brown + spellcheck + keyword whole BUG brightred + keyword whole FixMe brightred + keyword whole FIXME brightred + keyword whole Note brightred + keyword whole NOTE brightred + keyword whole ToDo brightred + keyword whole TODO brightred + keyword !!\[!\] brightred + keyword ??\[?\] brightred + + +# Strings + +context ' ' green + spellcheck + keyword \\\{\\'"tnr\} brightgreen + keyword \\\{0123\}\{01234567\}\{01234567\} brightgreen + keyword \\x\{0123456789abcdef\}\{0123456789abcdef\} brightgreen + diff --git a/src/sysmodule.cpp b/src/sysmodule.cpp new file mode 100644 index 0000000..e65fa85 --- /dev/null +++ b/src/sysmodule.cpp @@ -0,0 +1,82 @@ + +#include "sysmodule.h" +#include "vm.h" +#include "compiler.h" + + +void compileLen(Compiler* c, Builtin*) + { c->codegen->length(); } + +void compileLo(Compiler* c, Builtin*) + { c->codegen->lo(); } + +void compileHi(Compiler* c, Builtin*) + { c->codegen->hi(); } + +void compileToStr(Compiler* c, Builtin*) + { c->codegen->toStr(); } + +void compileEnq(Compiler* c, Builtin*) + { c->codegen->fifoEnq(); } + +void compileDeq(Compiler* c, Builtin*) + { c->codegen->fifoDeq(); } + +void compileToken(Compiler* c, Builtin*) + { c->codegen->fifoToken(); } + + +void compileSkip(Compiler* c, Builtin* b) +{ + // TODO: maybe more possibilities, e.g. skip(n), skip({...}) for any fifo + Type* fifoType = c->codegen->getTopType(2); + if (!fifoType->isByteFifo()) + c->error("'skip' is only applicable to small ordinal fifos"); + Type* setType = c->codegen->getTopType(); + if (!setType->isByteSet()) + c->error("Small ordinal set expected"); + if (!PContainer(setType)->index->canAssignTo(PFifo(fifoType)->elem)) + c->error("Incompatible set element type"); + c->codegen->staticCall(b->staticFunc); +} + + +void shn_skipset(variant*, stateobj*, variant args[]) +{ + args[-2]._fifo()->skip(args[-1]._ordset().get_charset()); +} + + +void shn_eol(variant* result, stateobj*, variant args[]) +{ + new(result) variant((int)args[-1]._fifo()->eol()); +} + + +void shn_line(variant* result, stateobj*, variant args[]) +{ + new(result) variant(args[-1]._fifo()->line()); +} + + +void shn_skipln(variant*, stateobj*, variant args[]) +{ + fifo* f = args[-1]._fifo(); + int c = f->preview(); + if (c != -1 && c != '\r' && c != '\n') + f->skip(non_eol_chars); + f->skip_eol(); +} + + +void shn_look(variant* result, stateobj*, variant args[]) +{ + new(result) variant((uchar)args[-1]._fifo()->look()); +} + +void shn_strfifo(variant* result, stateobj*, variant args[]) +{ + new(result) variant(new strfifo(queenBee->defCharFifo, args[-1]._str())); +} + + diff --git a/src/sysmodule.h b/src/sysmodule.h new file mode 100644 index 0000000..0303a51 --- /dev/null +++ b/src/sysmodule.h @@ -0,0 +1,35 @@ +#ifndef __BUILTINS_H +#define __BUILTINS_H + +#include "runtime.h" +#include "typesys.h" + + +// Defined in typesys.h: +// typedef void (*CompileFunc)(Compiler*, Builtin*); -- for builtins +// typedef void (*ExternFuncProto)(variant* result, stateobj* outerobj, variant args[]); + + +// --- BUILTINS ------------------------------------------------------------ // + +class Compiler; + +void compileLen(Compiler*, Builtin*); +void compileLo(Compiler*, Builtin*); +void compileHi(Compiler*, Builtin*); +void compileToStr(Compiler*, Builtin*); +void compileEnq(Compiler*, Builtin*); +void compileDeq(Compiler*, Builtin*); +void compileToken(Compiler*, Builtin*); +void compileSkip(Compiler*, Builtin*); + +void shn_skipset(variant*, stateobj*, variant[]); +void shn_eol(variant*, stateobj*, variant[]); +void shn_line(variant*, stateobj*, variant[]); +void shn_skipln(variant*, stateobj*, variant[]); +void shn_look(variant*, stateobj*, variant[]); + +void shn_strfifo(variant*, stateobj*, variant[]); + + +#endif // __BUILTINS_H diff --git a/src/tests/.svnignore b/src/tests/.svnignore new file mode 100644 index 0000000..0c1c0f1 --- /dev/null +++ b/src/tests/.svnignore @@ -0,0 +1 @@ +test.lst diff --git a/src/tests/stmtest.txt b/src/tests/stmtest.txt new file mode 100644 index 0000000..695e785 --- /dev/null +++ b/src/tests/stmtest.txt @@ -0,0 +1,10 @@ +We must be careful with terms like readable, user-friendly, and so +forth. They are vague at best, and often refer to taste and established +habits. But what is conventional need not also be convenient. In the +context of programming languages, perhaps "readable" should be replaced +by "amenable to formal reasoning." For example, mathematical formulas +are hardly what we might praise as easily readable, but they allow the +formal derivation of properties that could not be obtained from a vague, +fuzzy, informal, user-friendly circumscription. + +- Niklaus Wirth - From an interview in Software Development, June 1997. diff --git a/src/tests/test.shn b/src/tests/test.shn new file mode 100644 index 0000000..5856b93 --- /dev/null +++ b/src/tests/test.shn @@ -0,0 +1,900 @@ +// EXPRESSION +assert 1 > 0 +assert system.true +assert 1 + 2 * 2 == 5 +assert 10 % 3 == 1 +assert 10 / 3 == 3 +assert 'o' | 'ne' | ' tw' | 'o' == 'one two' +assert 'three' | ' four' == 'three four' +assert not true == false +assert not 1 == -2 +assert not (1 == 1 and 2 == 1) +assert 1 == 1 and 2 == 2 and 3 == 2 + 1 and 'a' == 'a' +assert true or false +assert not (false or false) +assert (1 xor 3) == 2 +assert (1 or 2) == 3 + +// DEFINITIONS +def type nums = (one, two, three) +// error: def nums badnum = 5 as nums +def dow = (Mon, Tue, Wed, Thu, Fri, Sat, Sun) +def intvec = int *[] +def numvec = int *[nums] +def charset = void *^[char] +def matrix0 = int *[str]^[int] +def type charset2 = void *[char] +def CharIntMap = int *[char] +def charfifo = char *<> +def sign = -1..1 + +// CONSTANTS +def n0 = two +def s0 = 'abc' +def s1 = '' +def i0 = 2 * 3 + 4 +def v1 = 'a' | 'b' +def v2 = 'abc' | 'def' | 'g' +def v3 = 3 | 4 +def v4 = [] +def v5 = [5] +def v5a = v5 +def v6 = [5, 6] +def vnull = [] +def int *[] v7 = [] +def v8 = v7 | 10 | 11 +def v9 = 12 | v4 | 13 | vnull +def sign b0 = -1 +def t0 = {1} +def t1 = {1, 5, 6, 7, 10} +def t3 = {} +def identChars = {'A'..'Z', 'a'..'z', '0'..'9', '_'} +def t4 = {(5 as byte)..15, 17} +def t5 = {two..three} +def d0 = {'one' = 1, 'two' = 2} +def d1 = {'a' = 'Letter A', 'b' = 'Letter B'} +def byte cbv[] = [0, 1] | 2 | 3 | [] | [4] + +assert true ; assert s0 == 'abc' +assert i0 == 10 +assert v5 == v5a +assert v1 == 'ab' and len(v1) == 2 +assert v2[0] == 'a' and v2[1] == 'b' and v2[3] == 'd' and v2[6] == 'g' +assert v2.len() == 7 +assert -len(v2) == -7 +assert len(v4) == 0 and vnull.len() == 0 +assert v6[0] == 5 and 6 == v6[1] and len(v6) == 2 +assert v8[0] == 10 and v8[1] == 11 +assert v9[0] == 12 and v9[1] == 13 +assert d0['one'] == 1 and d0['two'] == 2 +assert d1['a'] == 'Letter A' and d1['b'] == 'Letter B' +assert i0? and v5? and n0? and not one? and not false? and not s1? and v2? +assert 'a' in identChars and not '?' in identChars +assert (int *[] *[str]) == (intvec *[str]) +assert lo(v1) == 0 and v1.hi() == 1 +assert lo(char) == 0 and char.hi() == 255 +assert [].hi() == -1 + +def cc1 = 1 +def cc2 = __result.cc1 +assert cc2 == 1 + +// ASSIGNMENTS, DEL, INS + +var a = 2 +var int b = 1 +assert a == 2 and b == 1 +var char ch1 = 'z' +var c = 'abcd' +var d = [1, 2, 3] +var (byte *[][]) e = [[], [1, 2, 3], [4, 5]] +var int *[][] er = [[6, 7], [8, 9, 10], []] +var byte ei[] = [0, 1] | 2 | 3 | [] | [4] +var int*^ r1 = 2 +var r2 = @3 +var str r3^ = 'abc' +var typeof r1 r4 = 10 +a = r1 +assert a == 2 +r1 = 12 +assert r1 == 12 +@r2 = r1 +r1 = 1 +assert r2 == 1 +assert len(d) == 3 +del d[1] +assert len(d) == 2 and d[0] == 1 and d[1] == 3 +// error: del d[2] +del r3[0] +assert r3 == 'bc' + +def int r5^ = 4 +// error: r5 = 5 +// error: @r5 = 5 + +var str s3 = 'XYZxyz' +del s3[0..1] +assert s3 == 'Zxyz' +del s3[2..] +assert s3 == 'Zx' +ins s3[1] = 'a' +assert s3 == 'Zax' +ins s3[3] = 'bc' +assert s3 == 'Zaxbc' +ins s3[1..1] = 'so' +assert s3 == 'Zsoxbc' +ins s3[2..3] = 'p' +assert s3 == 'Zspbc' +ins s3[3..] = 'def' +assert s3 == 'Zspdef' +ins s3[2..4] = '' +assert s3 == 'Zsf' + +a = 3 +a += 1 +assert a == 4 +a -= 2 +assert a == 2 +a *= 6 +assert a == 12 +a /= 2 +assert a == 6 +a %= 4 +assert a == 2 + +var inpcat = 'ab' +inpcat |= 'c' +assert inpcat == 'abc' +inpcat |= 'de' +assert inpcat == 'abcde' +var inpcat2 = [1] +inpcat2 |= 2 +assert len(inpcat2) == 2 and inpcat2[1] == 2 +inpcat2 |= [3, 4] +assert len(inpcat2) == 4 and inpcat2[3] == 4 + +var v11 = ['abc', 'def', 'ghi', 'jkl', 'mno'] +del v11[1..2] +assert len(v11) == 3 and v11[0] == 'abc' and v11[1] == 'jkl' and v11[2] == 'mno' +del v11[1..] +assert len(v11) == 1 and v11[0] == 'abc' +ins v11[0] = ['pqr', 'stu'] +assert len(v11) == 3 and v11[0] == 'pqr' and v11[1] == 'stu' and v11[2] == 'abc' +ins v11[len(v11)] = 'vwx' +assert len(v11) == 4 and v11[0] == 'pqr' and v11[1] == 'stu' and v11[2] == 'abc' \ + and v11[3] == 'vwx' + +ins v11[1..] = ['def', 'ghi'] +assert len(v11) == 3 and v11[0] == 'pqr' and v11[1] == 'def' \ + and v11[2] == 'ghi' +ins v11[2..2] = [] +assert len(v11) == 2 and v11[0] == 'pqr' and v11[1] == 'def' + +assert typeof c == str and typeof d == (int *[]) and typeof e == byte *[][] \ + and typeof er == int *[][] + +var void words[str] = {'as', 'is'} +var void chars1[char] = {'a'..'z'} +var dic1 = {'one' = 1, 'two' = 0, 'three' = 3} +var byte dic2[str] = {'o' = 1, 'two' = 0, 'three' = 3} +var int ints1[]^[] = [[], [10, 11, 12], @[13, 14], @[]] +var int dics1[][str] = [dic1, {}] +var int dics2[]^[str] = [dic1, {'e' = 0}, {}] +var int dic3[char] = {'a' = 10, 'b' = 20} + +// Should give range check errors: +var char dic4[0..2] = {0 = 'a', 2 = 'c', 255 = 'd'} +var void set2[0..2] = {-1, 0..2, 3, 4, 10} + +assert typeof dic1 == int *[str] and typeof dic1['www'] == int and \ + typeof ints1[100] == int *[] and \ + typeof ints1[1 + 2 * 3][4 + 5 * 6] == int + +a = 10 + b +assert a == 11 +assert len(e[0]) == 0 and e[1][1] == 2 and e[2][0] == 4 +assert len(ei) == 5 and ei[1] == 1 +assert dic1['one'] == 1 and dic1['two'] == 0 + +system.__program_result = 'OK' +assert __program_result == 'OK' +__program_result = null +c[1] = ':' +assert c == 'a:cd' +(c[1 + 1]) = '$' +assert c == 'a:$d' +var didx = 3 +d[12 - didx * 4] = 4 +assert d[0] == 4 +// error: e[0][1] = 6 +// error: 1 = 2 +// error: s0 = '' +assert dic1['one'] == 1 and dic1['two'] == 0 +dic1['two'] = 2 +assert dic1['one'] == 1 and dic1['two'] == 2 +ints1[1][2] = 111 +assert ints1[1][2] == 111 +dics2[0]['three'] = 33 +assert dics2[0]['three'] == 33 +assert 'as' in words and not 'kuku' in words +assert dic2['o'] == 1 and dic2['two'] == 0 and dic2['three'] == 3 +dic2['two'] = 2 +assert 'two' in dic2 and dic2['two'] == 2 +del dic2['two'] +assert not 'two' in dic2 +assert not 'z' in dic3 and 'a' in dic3 and dic3['a'] == 10 +dic3['z'] = 30 +assert 'z' in dic3 and dic3['z'] == 30 and dic3['b'] == 20 +del dic3['z'] +del dic3['a'] +assert not 'a' in dic3 and not 'z' in dic3 +assert 'as' in words and 'is' in words +del words['is'] +assert not 'is' in words +assert typeof words['as'] == void +assert 'a' in chars1 +del chars1['a'] +assert not 'a' in chars1 + +assert 2 in nums and 96 in char and not 256 in char +assert 1 in 0..2 and ints1[1][2] in 110..111 and 10 in 0..a and not 12 in 0..a +assert if(true, true, false) and if(a in 10..1000, 'abc', 'def') == 'abc' + +var rng1 = [0..999] +var rng2 = rng1 +assert 1 in rng1 and not -1 in rng1 and not 1000 in rng1 +assert 1 in 0..10 and 1 in [0..10] +assert not -1 in 0..10 and not -1 in [0..10] +assert rng1 == [0..999] +assert 1 in rng2 +assert rng1.hi() == 999 and lo(rng2) == 0 + +var char rng3[..] = ['A'..'Z'] +assert 'B' in rng3 +def rng5 = [1..7] +assert 1 in rng5 + +var str1 = 'abc' +assert str1[0..1] == 'ab' and str1[0..2] == 'abc' and str1[2..2] == 'c' +assert str1[0..] == 'abc' and str1[1..] == 'bc' and str1[3..] == '' +var vec1 = ['abc', 'def', 'ghi'] +assert len(vec1[0..1]) == 2 and vec1[0..1][0] == 'abc' and vec1[0..1][1] == 'def' +assert len(vec1[1..]) == 2 and vec1[1..][1] == 'ghi' +assert vec1[0..] == vec1 + +var void ncont1()... = {} + +assert 1._str() == '1' +assert {'A'..'Z'}._str() == '{\'A\'..\'Z\'}' +assert rng1._str() == '[0..999]' + +var char rng4[..] = [] +assert not 'a' in rng4 +// exception: dump rng4.lo() +var char ref4^ = [] +assert (ref4 as int) == 0 +var (void *()...) funcp4 = [] +// exception: funcp4() + +// TYPECASTS +begin: assert (1 as char) == '\x01' +begin: + assert ('a' as int) == 97 +var any v10 = 0 +begin +// Hm +{ + assert (v10 as nums) == one + assert (1 is system.int) and ('abc' is str) + assert not (1 is str) and not ('abc' is int) + assert (v10 is int) and not (v10 is str) + assert v10 is any + assert (v10 as nums) is nums + begin { assert ('a' as str) == 'a' and (ch1 as str) == 'z' } + + var any anyv = [0, 1, 2] + var int intv[] = anyv as int*[] + assert intv[1] == 1 + def enumv = int *[(hoo, haa, hee)] + def enumv enumvv = {hoo = 1, haa = 0, hoo = 3} + assert typeof enumvv[hoo] == int +} + +// exit 'Hmmmmmm' + +if true: + ; +else: + assert false + +// LOCAL BLOCKS +var b1 = 0 +begin { + var b1 = 1 + assert b1 == 1 +} +begin +{ + var b1 = 2 + var b2 = 3 + def s0 = 'xyz' + def nums = (zero, one, two, three, four) + assert b1 == 2 and b2 == 3 and s0 == 'xyz' and one == 1 as nums +} +assert b1 == 0 and s0 == 'abc' and one == 0 as nums + +// BRANCHING +if b1 == 0: + assert true +if b1 != 0: assert false +if true +{ + var b1 = 4 + assert b1 == 4 +} +assert b1 == 0 + +if b1 == 0: +// Huh? + assert true +else: assert false +if b1 != 0 + { assert false } +// Hmmmm +else { + +// Hm? + assert true + } + +if b1 == 0: assert true +elif b1 == 1: assert false +elif b1 == 2 +{ + // Hm... + + assert false; +} +else: + assert false + +if b1 == 1: assert false +elif b1 == 0: assert true +elif b1 == 2: assert false +else: + assert false + +if b1 == 1: assert false +elif b1 == 2: assert false +elif b1 == 0: assert true +else: assert false + +if b1 == 1: assert false +elif b1 == 2: assert false +elif b1 == 3: assert false +else: assert true + +if b1 == 1: assert false +elif b1 == 0: assert true +elif b1 == 3: assert false + +switch b1 +{ + case 1: + assert false + case 0 + { + assert true + } + default + { + assert false + } +} + +switch b1 +{ + case 1 + { + assert false + } + case 0, 2: + assert true + default: + assert false +} + +switch b1 +{ + case 0..2, 5: assert true + case 1: assert false +} + +switch b1 +{ + case 1: assert false + case 2 + { assert false } + default: assert true +} + +switch s0 +{ + case 'abc': assert true + case 'xyz': assert false +} + +switch typeof s0 +{ + case str: assert true + case int: assert false + default { + assert false + } +} + + +// WHILE LOOP + +var wi = 0 +while wi < 3 +{ + wi = wi + 1 + continue + assert false +} +assert wi == 3 + +while wi < 3: + assert false + +while wi < 6 +{ + var wl1 = 'asd' + begin + { + var wl2 = wi + wi = wi + 1 + continue + } + assert false +} + +assert wi == 6 + +while wi < 100 +{ + var wl1 = 'zxc' + if wi == 8 + { + var wl2 = 'qwe' + break + assert false + } + wi = wi + 1 +} + +assert wi == 8 + + +// FOR LOOP + +var fori = 10 +for i = 10..20 +{ + assert fori == i + fori += 1 +} +assert fori == 21 + +for i = 1..1: + fori += 1 +assert fori == 22 +for i = 0..-1: assert false + +for i = one..three: + fori += 1 +assert fori == 25 + +for i = 'A'..'Z': + fori += 1 +assert fori == 51 + +var fors = 'ABCDEF' +for i = fors +{ + if i == 4: assert fors[i] == 'E' + elif i == 5: assert fors[i] == 'F' + fori += 1 +} +assert fori == 57 +for i = '': assert false +for i = []: assert false + +for i, j = 'GHIJKL' +{ + // TODO: collect the string and compare + if i == 4 { assert j == 'K' } + fori += 1 +} +assert fori == 63 +for i, j = '': assert false + +for i, j = [10, 20, 30] +{ + if i == 1 { assert j == 20 } + fori += 1 +} +assert fori == 66 + +var int fornullvec[] = [] +for i, j = fornullvec: assert false +for i, j = []: assert false + +for i = {'A'..'C'} +{ + fori += 1 +} +assert fori == 69 +for i = {}: assert false + +for i, j = {} +{ assert false; dump typeof j } + +for i = {one..three} +{ + fori += 1 +} +assert fori == 72 + + +for i = {'A' = 10, 'B' = 20} +{ + fori += 1 +} +assert fori == 74 + +for i, j = {'A' = 10, 'B' = 20} +{ + fori += 1 + if i == 'B': assert j == 20 +} +assert fori == 76 + +for i = {1000, 2000, 3000} +{ + fori += 1 +} +assert fori == 79 + +for i = {'one' = one, 'two' = two, 'three' = three} +{ + fori += 1 +} +assert fori == 82 + +for i, j = {'one' = one, 'two' = two, 'three' = three} +{ + if i == 'two': assert j == two + fori += 1 +} + + +// STATES + +def proto1 = int *(int a, int b) ... + +var inc_call_count = 0 + +def inc = int *(int i) +{ + __result = i + 1 + inc_call_count = inc_call_count + 1 +} + +var incv = int *(int i) +{ + inc_call_count = inc_call_count + 1 + return i + 1 + // error: i = 0 +} + +var rr1 = inc(5) +assert rr1 == 6 +assert inc_call_count == 1 + +inc(9) +assert inc_call_count == 2 + +def void nested_inc_test() +{ + var t = inc(10) + assert t == 11 + assert inc_call_count == 3 +} + +nested_inc_test() + +incv(10) +assert inc_call_count == 4 + +def confusing_proto = int *[]() + { } + +var str2 = '' + +def void vfunc(str s) + { str2 = s } + +def int avg(int a, int b, str s) +{ + var t = wi + assert t == wi + str2 = s + return (a + b) / 2 +} + + +vfunc('Hey') +assert str2 == 'Hey' + +var rr2 = avg(5, 10, 'Um') +assert rr2 == 7 +assert str2 == 'Um' + +var loc2 = 1 +begin +{ + var loc1 = 0 + def void vfunc() { } + def void vfunc2() + { + assert this.loc2 == 1 + assert loc2 == 1 + // error: loc1 = 1 + } + vfunc2() + loc2 = inc(loc2) +} +assert loc2 == 2 + +var int loc3^ = 3 +def void vinc(int i^) { i = i + 1 } +vinc(loc3) +assert loc3 == 4 + + +var statv = 1 +def int nonstatf(): + return statv +assert nonstatf() == 1 + +def void shouldnt_be_statf(): assert nonstatf() == 1 +shouldnt_be_statf() + +// Nested functions + +var vn1 = 1 + +def void nested1(int vn2) +{ + var vn6 = __program_result + var vn3 = 3 + def void nested2(int vn4) + { + var vn5 = vn1 + assert vn5 == 1 + vn1 = 2 + inc(1) + } + nested2(5) + // this.vfunc(); +} +nested1(4) +assert vn1 == 2 + +// Return +def char rettest1() +{ + var x = 1 + begin { + var y = 2 + return 'a' + } + if x == 1: + return +} + +// Default args + +def void defarg1(int i, int j = 1, str k = 'hoohoo') +{ + if j == 1: + assert k == 'hoohoo' + else: + assert k == 'moomoo' +} + +defarg1(0, 0, 'moomoo') +defarg1(0, ,) +defarg1(0, 1) +defarg1(0) + +// Argument reclamation + +var argrec1 = 0 +def void argrec(int a) +{ + var a + argrec1 = a +} +argrec(149) +assert argrec1 == 149 + + +// Function pointers + +var fpstr = '' + +def intfunc = int *(int, int)... +def voidfunc = void *(str)... + +def int min(int a, int b) { return if(a < b, a, b) } +def int max(int a, int b): __result = if(a > b, a, b) +def void assignstr(str s) { fpstr = s } + +assert min(2, 1) == 1 +var intfp = min +var intfunc intfp1 = max +assert intfp(20, 10) == 10 +assert intfp1(20, 10) == 20 + +assignstr('klm') +assert fpstr == 'klm' +var voidfp = assignstr +voidfp('nop') +assert fpstr == 'nop' + +var cpint = 0 +class intclass(int i) +{ + var i + cpint = i +} + +def classptr = intclass *(int)... +var classptr cp = intclass +assert cpint == 0 +var intclass ic = cp(1) +assert cpint == 1 + +def incfunc = int *(int)... +def incd = int *(int i) { return i + 1 } +var incvar = int *(int i) { __result = i + 1 } +assert incvar(100) == 101 +assert incvar is incfunc + +// static call via an object +assert __result.incd(2) == 3 + +// static call at compile time +def ctcall = max(10, 20) +assert ctcall == 20 + +// var args + +def void varf2(var int a): a = 2 +var varfv2 = 1 +varf2(varfv2) +assert(varfv2 == 2) + +def void varf1(str a, var str b, str c) +{ + b = a | c +} + +var varf1v = 'd?' +varf1('aa', varf1v, 'bb') +assert(varf1v == 'aabb') + + +// OOP + +var pst = 2 + +class point(int x, int y) +{ + def cpt = 50 + var x + var y + + begin { + var x = 0 + } + + def void move(int dx, int dy) + { + x += dx + y += dy + } + + def void move2(int dx, int dy) + { + def int double(int i) + { return i * pst; } + + move(double(dx), double(dy)) + } + + def int pstatic(int): + return 10 +} + +var p = point(10, 20) +var point p2 = point(100, 200) +var point p3 = __result.point(1000, 2000) + +assert p.x == 10 and p.y == 20 +assert p2.x == 100 and p2.y == 200 +assert p3.x == 1000 and p3.y == 2000 +assert p.cpt == 50 and p2.cpt == 50 +def cpt = point.cpt +assert cpt == 50 +// error: var pmove = point.move +// error: point.move(0, 1) + +var point pnull = {} +assert not pnull? and p? + +p.move(5, 10) +assert p.x == 15 and p.y == 30 +// point.move(5, 5) +p.move2(5, 10) +assert p.x == 25 and p.y == 50 + +def static_point = point(1, 5) +// error: assert static_point.x == 1 and static_point.y == 5 +// error: static_point.move(1, 2) + +assert point.pstatic(0) == 10 +assert p.pstatic(0) == 10 + + +// FIFOs + +var char chfz<> = <> +var chf = <'a', 'b', 'c'> +// dump chfz, chf, sio +assert not chfz? and chf? +assert chf.deq() == 'a' and deq(chf) == 'b' +chf.enq('d').enq('e') +assert chf.deq() == 'c' and deq(chf) == 'd' and chf.deq() == 'e' +assert not chf? +chf << 'fgh' << 'i' << 'jk' +assert chf.deq() == 'f' + +// "fifo empty" error: var deqv = chfz.deq() +var strf = <'one', 'two', 'three'> +assert strf.deq() == 'one' and strf.deq() == 'two' + +var chf2 = strfifo('Hello, FIFO!\nWell, hello, Hovik.\nHey! Howdy man?\nLike a FIFO') +assert chf2.deq() == 'H' +assert chf2.token({'a'..'z'}) == 'ello' +chf2.skip({',', ' '}) +assert chf2.deq() == 'F' +assert chf2.token({'A'..'Z', '!'}) == 'IFO!' +assert chf2.eol() +chf2.skipln() +assert chf2.line() == 'Well, hello, Hovik.' +assert chf2.deq() == 'H' +chf2.skipln() +assert chf2.look() == 'L' +assert chf2.line() == 'Like a FIFO' +assert not chf2? + +var numf = +assert numf.deq() == one and numf.deq() == three +var numft = numf.token({three}) +assert numft.len() == 1 and numft[0] == three + +dump system.__program_result diff --git a/src/typesys.cpp b/src/typesys.cpp new file mode 100644 index 0000000..11501df --- /dev/null +++ b/src/typesys.cpp @@ -0,0 +1,1376 @@ + + +#include "sysmodule.h" +#include "typesys.h" +#include "vm.h" + + +static void error(const char* msg) + { throw emessage(msg); } + + +// --- Symbols & Scope ----------------------------------------------------- // + + +Symbol::Symbol(const str& n, SymbolId id, Type* t, State* h) throw() + : symbol(n), symbolId(id), type(t), host(h) { } + + +Symbol::~Symbol() throw() + { } + + +void Symbol::fqName(fifo& stm) const +{ + if (host && host != queenBee) + { + host->fqName(stm); + stm << '.'; + } + stm << name; +} + + +void Symbol::dump(fifo& stm) const +{ + if (type) + type->dumpDef(stm); + else + stm << "var"; + if (!name.empty()) + stm << ' ' << name; +} + + +// --- // + + +Definition::Definition(const str& n, Type* t, const variant& v, State* h) throw() + : Symbol(n, DEFINITION, t, h), value(v) { } + + +Definition::~Definition() throw() + { } + + +Type* Definition::getAliasedType() const +{ + if (type->isTypeRef()) + return cast(value._rtobj()); + else + return NULL; +} + + +// --- // + + +Variable::Variable(const str& n, SymbolId sid, Type* t, memint i, State* h) throw() + : Symbol(n, sid, t, h), id(i) { } + +Variable::~Variable() throw() + { } + +StkVar::StkVar(const str& n, Type* t, memint i, State* h) throw() + : Variable(n, STKVAR, t, i, h) { assert(i >= 0); } + +ArgVar::ArgVar(const str& n, Type* t, memint i, State* h) throw() + : Variable(n, ARGVAR, t, i, h) { assert(i >= 1); } + +PtrVar::PtrVar(const str& n, Type* t, memint i, State* h) throw() + : Variable(n, PTRVAR, t, i, h) { assert(i >= 1); } + +ResultVar::ResultVar(Type* t, State* h) throw() + : Variable("__result", RESULTVAR, t, 0, h) { } + +InnerVar::InnerVar(const str& n, Type* t, memint i, State* h) throw() + : Variable(n, INNERVAR, t, i, h) { assert(i >= 0); } + +FormalArg::FormalArg(const str& n, Type* t, memint i, bool p, variant* d) throw() + : Symbol(n, FORMALARG, t, NULL), id(i), isPtr(p), hasDefValue(d), defValue() +{ + assert(i >= 0); + assert(!isPtr || !hasDefValue); + if (d) + defValue = *d; +} + +FormalArg::~FormalArg() throw() + { } + + +// --- // + + +Builtin::Builtin(const str& n, CompileFunc f, FuncPtr* p, State* h) throw() + : Symbol(n, BUILTIN, NULL, h), compile(f), staticFunc(NULL), prototype(p) { } + +Builtin::Builtin(const str& n, CompileFunc f, State* s, State* h) throw() + : Symbol(n, BUILTIN, NULL, h), compile(f), staticFunc(s), prototype(s->prototype) { } + +Builtin::~Builtin() throw() + { } + + +void Builtin::dump(fifo& stm) const +{ + if (prototype) + { + prototype->dump(stm); + stm << " {}"; + } + else + stm << "builtin." << name; +} + + + +// --- // + + +EDuplicate::EDuplicate(const str& _ident) throw(): ident(_ident) { } +EDuplicate::~EDuplicate() throw() { } +const char* EDuplicate::what() throw() { return "Duplicate identifier"; } + +EUnknownIdent::EUnknownIdent(const str& _ident) throw(): ident(_ident) { } +EUnknownIdent::~EUnknownIdent() throw() { } +const char* EUnknownIdent::what() throw() { return "Unknown identifier"; } + + +// --- // + + +void Scope::addUnique(Symbol* s) +{ + if (!symbols.add(s)) + throw EDuplicate(s->name); +} + + +void Scope::replaceSymbol(Symbol* s) +{ + if (!symbols.replace(s)) + throw EUnknownIdent(s->name); +} + + +Symbol* Scope::findShallow(const str& ident) const +{ + Symbol* s = find(ident); + if (s == NULL) + throw EUnknownIdent(ident); + return s; +} + + + +// --- // + + +BlockScope::BlockScope(Scope* _outer, CodeGen* _gen) throw() + : Scope(_outer), startId(_gen->getLocals()), varCount(0), gen(_gen) { } + + +BlockScope::~BlockScope() throw() + { stkVars.release_all(); } + + +void BlockScope::deinitLocals() +{ + for (memint i = stkVars.size(); i--; ) + gen->deinitLocalVar(stkVars[i]); +} + + +StkVar* BlockScope::addStkVar(const str& n, Type* t) +{ + memint varid = startId + varCount; + varCount += t->getMemSize(); + if (varCount > 254) + error("Too many local variables"); + objptr v = new StkVar(n, t, varid, gen->getCodeOwner()); + addUnique(v); // may throw + stkVars.push_back(v->grab()); + return v; +} + + +// --- Type ---------------------------------------------------------------- // + + +Type::Type(TypeId id) throw() + : rtobject(id == TYPEREF ? this : defTypeRef), refType(NULL), // ptrType(NULL), + host(NULL), defName(), typeId(id) +{ + if (id != REF) + refType = new Reference(this); +} + + +Type::~Type() throw() + { } + + +bool Type::isByte() const + { return isAnyOrd() && POrdinal(this)->isByte(); } + +bool Type::isBit() const + { return isAnyOrd() && POrdinal(this)->isBit(); } + +bool Type::isFullChar() const + { return isChar() && POrdinal(this)->isFullChar(); } + +bool Type::isByteVec() const + { return isAnyVec() && PContainer(this)->hasByteElem(); } + +bool Type::isByteSet() const + { return isAnySet() && PContainer(this)->hasByteIndex(); } + +bool Type::isByteDict() const + { return isAnyDict() && PContainer(this)->hasByteIndex(); } + +bool Type::isByteFifo() const + { return isAnyFifo() && PFifo(this)->elem->isByte(); } + +bool Type::isFifo(Type* elem) const + { return isAnyFifo() && elem->identicalTo(PFifo(this)->elem); } + +bool Type::isContainer(Type* idx, Type* elem) const + { return isAnyCont() && elem->identicalTo(PContainer(this)->elem) + && idx->identicalTo(PContainer(this)->index); } + +bool Type::isVectorOf(Type* elem) const + { return isAnyVec() && elem->identicalTo(PContainer(this)->elem); } + +bool Type::identicalTo(Type* t) const + { return t == this; } + +bool Type::canAssignTo(Type* t) const + { return identicalTo(t); } + + +bool Type::isCompatibleWith(const variant& v) +{ + switch (v.getType()) + { + case variant::VOID: return isVoid(); + case variant::ORD: return isAnyOrd(); + case variant::REAL: notimpl(); return false; + case variant::VARPTR: return false; + case variant::STR: return isByteVec(); + case variant::RANGE: return isRange(); + case variant::VEC: return (isAnyVec() && !isByteVec()) || isByteDict(); + case variant::SET: return isAnySet() && !isByteSet(); + case variant::ORDSET: return isByteSet(); + case variant::DICT: return isAnyDict() && !isByteDict(); + case variant::REF: return isReference(); + case variant::RTOBJ: + rtobject* o = v._rtobj(); + return (o == NULL) || o->getType()->canAssignTo(this); + } + return false; +} + + +bool Type::empty() const + { return false; } + + +void Type::dump(fifo& stm) const +{ + if (defName.empty()) + fatal(0x3003, "Invalid type alias"); + stm << "builtin." << defName; +} + + +void Type::dumpDef(fifo& stm) const +{ + if (defName.empty()) + dump(stm); + else + { + if (host && host != queenBee) + { + host->fqName(stm); + stm << '.'; + } + stm << defName; + } +} + + +Container* Type::deriveVec(State* h) +{ + if (isVoid()) + return queenBee->defNullCont; + else if (isFullChar()) + return queenBee->defStr; + else + return h->getContainerType(defVoid, this); +} + + +Container* Type::deriveSet(State* h) +{ + if (isReference()) + error("Reference type not allowed in set"); + if (isVoid()) + return queenBee->defNullCont; + else if (isFullChar()) + return queenBee->defCharSet; + else + return h->getContainerType(this, defVoid); +} + + +Container* Type::deriveContainer(State* h, Type* idx) +{ + if (idx->isReference()) + error("Reference type not allowed in dict/set"); + if (isVoid()) + return idx->deriveSet(h); + else if (idx->isVoid()) + return deriveVec(h); + else + return h->getContainerType(idx, this); +} + + +Fifo* Type::deriveFifo(State* h) +{ + if (isFullChar()) + return queenBee->defCharFifo; + else + // TODO: lookup existing fifo types in h + return h->getFifoType(this); +} + + +// --- Printing ------------------------------------------------------------ // + + +void Type::dumpValue(fifo& stm, const variant& v) const +{ + // Default is to print raw variant value + dumpVariant(stm, v, NULL); +} + + +static void dumpVec(fifo& stm, const varvec& vec, bool curly, Type* elemType = NULL) +{ + stm << (curly ? '{' : '['); + for (memint i = 0; i < vec.size(); i++) + { + if (i) stm << ", "; + dumpVariant(stm, vec[i], elemType); + } + stm << (curly ? '}' : ']'); +} + + +static void dumpOrdVec(fifo& stm, const str& s, Type* elemType = NULL) +{ + stm << '['; + for (memint i = 0; i < s.size(); i++) + { + if (i) stm << ", "; + dumpVariant(stm, s[i], elemType); + } + stm << ']'; +} + + +static void dumpOrdDict(fifo& stm, const varvec& v, Type* keyType = NULL, Type* elemType = NULL) +{ + stm << '{'; + int count = 0; + for (memint i = 0; i < v.size(); i++) + { + if (v[i].is_null()) continue; + if (count++) stm << ", "; + dumpVariant(stm, integer(i), keyType); + stm << " = "; + dumpVariant(stm, v[i], elemType); + } + stm << '}'; +} + + +static void dumpOrdSet(fifo& stm, const ordset& s, Ordinal* elemType = NULL) +{ + stm << '{'; + if (!s.empty()) + { + int i = elemType ? int(imin(elemType->left, 0)) : 0; + int right = elemType ? int(imax(elemType->right, 255)) : 255; + int count = 0; + while (i <= right) + { + if (s.find(i)) + { + if (count++) + stm << ", "; + dumpVariant(stm, i, elemType); + int l = ++i; + while (i <= right && s.find(i)) + i++; + if (i > l) + { + stm << ".."; + dumpVariant(stm, i - 1, elemType); + } + } + else + i++; + } + } + stm << '}'; +} + + +static void dumpDict(fifo& stm, const vardict& d, Type* keyType = NULL, Type* valType = NULL) +{ + stm << '{'; + for (memint i = 0; i < d.size(); i++) + { + if (i) stm << ", "; + dumpVariant(stm, d.key(i), keyType); + stm << " = "; + dumpVariant(stm, d.value(i), valType); + } + stm << '}'; +} + + +void dumpVariant(fifo& stm, const variant& v, Type* type) +{ + if (v.is_null_obj()) + stm << "[]"; + else if (type) + type->dumpValue(stm, v); + else + { + switch (v.getType()) + { + case variant::VOID: stm << "null"; break; + case variant::ORD: stm << v._int(); break; + case variant::REAL: notimpl(); break; + case variant::VARPTR: stm << "@@"; if (v._ptr()) dumpVariant(stm, v._ptr()); break; + case variant::STR: stm << to_quoted(v._str()); break; + case variant::RANGE: stm << v._range().left() << ".." << v._range().right(); break; + case variant::VEC: dumpVec(stm, v._vec(), false); break; + case variant::SET: dumpVec(stm, v._set(), true); break; + case variant::ORDSET: dumpOrdSet(stm, v._ordset()); break; + case variant::DICT: dumpDict(stm, v._dict()); break; + case variant::REF: stm << '@'; dumpVariant(stm, v._ref()->var); break; + case variant::RTOBJ: if (v._rtobj()) v._rtobj()->dump(stm); else stm << "{}"; break; + } + } +} + + +// --- General Types ------------------------------------------------------- // + + +TypeReference::TypeReference() throw(): Type(TYPEREF) { } +TypeReference::~TypeReference() throw() { } + +void TypeReference::dumpValue(fifo& stm, const variant& v) const +{ + Type* type = cast(v.as_rtobj()); + type->dump(stm); +} + + +Void::Void() throw(): Type(VOID) { } +Void::~Void() throw() { } + + +Variant::Variant() throw(): Type(VARIANT) { } +Variant::~Variant() throw() { } + + +Reference::Reference(Type* _to) throw() + : Type(REF), to(_to) { } + + +Reference::~Reference() throw() + { } + + +void Reference::dump(fifo& stm) const + { stm << '('; to->dumpDef(stm); stm << " *^)"; } + + +void Reference::dumpValue(fifo& stm, const variant& v) const + { stm << '@'; dumpVariant(stm, v.as_ref()->var, to); } + + +bool Reference::identicalTo(Type* t) const + { return this == t || (t->isReference() + && to->identicalTo(PReference(t)->to)); } + + +bool Reference::canAssignTo(Type* t) const + { return this == t || (t->isReference() + && to->canAssignTo(PReference(t)->to)); } + + +// --- Ordinals ------------------------------------------------------------ // + + +Ordinal::Ordinal(TypeId id, integer l, integer r) throw() + : Type(id), rangeType(new Range(this)), left(l), right(r) + { assert(isAnyOrd()); } + + +Ordinal::~Ordinal() throw() + { } + + +Ordinal* Ordinal::_createSubrange(integer l, integer r) + { return new Ordinal(typeId, l, r); } + + +Ordinal* Ordinal::createSubrange(integer l, integer r) +{ + if (l == left && r == right) + return this; + if (l < left || r > right) + error("Subrange can't be bigger than original"); + return _createSubrange(l, r); +} + + +void Ordinal::dump(fifo& stm) const +{ + if (isInt()) + stm << '(' << to_string(left) << ".." << to_string(right) << ')'; + else if (isChar()) + stm << '(' << to_quoted(uchar(left)) << ".." << to_quoted(uchar(right)) << ')'; + else + notimpl(); +} + + +void Ordinal::dumpValue(fifo& stm, const variant& v) const +{ + if (isInt()) + stm << v.as_ord(); + else if (isChar()) + stm << to_quoted(uchar(v.as_ord())); + else + notimpl(); +} + + +bool Ordinal::identicalTo(Type* t) const + { return this == t || (t->typeId == typeId + && left == POrdinal(t)->left && right == POrdinal(t)->right); } + + +bool Ordinal::canAssignTo(Type* t) const + { return t->typeId == typeId; } + + +// --- // + + +Enumeration::Enumeration(TypeId id) + : Ordinal(id, 0, -1) { } + + +Enumeration::Enumeration(const EnumValues& v, integer l, integer r) + : Ordinal(ENUM, l, r), values(v) { } + + +Enumeration::Enumeration() throw() + : Ordinal(ENUM, 0, -1) { } + + +Enumeration::~Enumeration() throw() + { } + + +Ordinal* Enumeration::_createSubrange(integer l, integer r) + { return new Enumeration(values, l, r); } + + +void Enumeration::addValue(State* state, Scope* scope, const str& ident) +{ + integer n = integer(values.size()); + if (n >= 256) // TODO: maybe this is not really necessary + error("Maximum number of enum constants reached"); + Definition* d = state->addDefinition(ident, this, n, scope); + values.push_back(d); + reassignRight(n); +} + + +void Enumeration::dump(fifo& stm) const +{ + if (left > 0 || right < values.size() - 1) // subrange? + stm << '(' << values[0]->name << ".." << values[memint(right)]->name << ')'; + else + { + stm << '('; + for (memint i = 0; i < values.size(); i++) + stm << (i ? ", " : "") << values[i]->name; + stm << ')'; + } +} + + +void Enumeration::dumpValue(fifo& stm, const variant& v) const +{ + integer i = v.as_ord(); + if (isInRange(i)) + stm << values[memint(i)]->name; + else + stm << i; +} + + +bool Enumeration::identicalTo(Type* t) const + { return this == t; } + + +bool Enumeration::canAssignTo(Type* t) const + { return t->typeId == typeId && values == PEnumeration(t)->values; } + + +// --- // + + +Range::Range(Ordinal* e) throw() + : Type(RANGE), elem(e) { } + +Range::~Range() throw() + { } + + +void Range::dump(fifo& stm) const +{ + stm << '('; + elem->dumpDef(stm); + stm << " *[..])"; +} + + +void Range::dumpValue(fifo& stm, const variant& v) const +{ + stm << '['; + elem->dumpValue(stm, v.as_range().left()); + stm << ".."; + elem->dumpValue(stm, v.as_range().right()); + stm << ']'; +} + + +bool Range::identicalTo(Type* t) const + { return t->isRange() && elem->identicalTo(PRange(t)->elem); } + +bool Range::canAssignTo(Type* t) const + { return t->isRange() && elem->canAssignTo(PRange(t)->elem); } + + +// --- Containers ---------------------------------------------------------- // + + +Type::TypeId Type::contType(Type* i, Type* e) throw() +{ + if (i->isVoid()) + if (e->isVoid()) + return NULLCONT; + else + return VEC; + else if (e->isVoid()) + return SET; + else + return DICT; +} + + +Container::Container(Type* i, Type* e) throw() + : Type(contType(i, e)), index(i), elem(e) { } + + +Container::~Container() throw() + { } + + +void Container::dump(fifo& stm) const +{ + stm << '('; + elem->dumpDef(stm); + stm << " *["; + if (!isAnyVec()) + index->dumpDef(stm); + stm << "])"; +} + + +void Container::dumpValue(fifo& stm, const variant& v) const +{ + if (isNullCont()) + stm << "[]"; + else if (isAnyVec()) + { + if (elem->isChar()) + stm << to_quoted(v.as_str()); + else if (isByteVec()) + dumpOrdVec(stm, v.as_str(), elem); + else + dumpVec(stm, v.as_vec(), false, elem); + } + else if (isAnySet()) + { + if (isByteSet()) + dumpOrdSet(stm, v.as_ordset(), POrdinal(index)); + else + dumpVec(stm, v.as_set(), true, index); + } + else if (isAnyDict()) + { + if (isByteDict()) + dumpOrdDict(stm, v.as_vec(), index, elem); + else + dumpDict(stm, v.as_dict(), index, elem); + } + else + notimpl(); +} + + +bool Container::identicalTo(Type* t) const +{ + return this == t || (t->isAnyCont() + && elem->identicalTo(PContainer(t)->elem) + && index->identicalTo(PContainer(t)->index)); +} + + +// --- Fifo ---------------------------------------------------------------- // + + +Fifo::Fifo(Type* e) throw() + : Type(FIFO), elem(e) { } + + +Fifo::~Fifo() throw() + { } + + +void Fifo::dump(fifo& stm) const + { stm << '('; elem->dumpDef(stm); stm << " *<>)"; } + + +bool Fifo::identicalTo(Type* t) const + { return this == t || (t->isAnyFifo() && elem->identicalTo(PFifo(t)->elem)); } + + +// --- Prototype ----------------------------------------------------------- // + + +FuncPtr::FuncPtr(Type* r) throw() + : Type(FUNCPTR), returnType(r), popArgCount(0), returns(!r->isVoid()) { } + + +FuncPtr::~FuncPtr() throw() + { formalArgs.release_all(); } + + +void FuncPtr::dump(fifo& stm) const +{ + stm << '('; + returnType->dumpDef(stm); + stm << " *("; + for (int i = 0; i < formalArgs.size(); i++) + { + if (i) + stm << ", "; + formalArgs[i]->dump(stm); + } + stm << ")...)"; +} + + +bool FuncPtr::identicalTo(Type* t) const + { return this == t || (t->isFuncPtr() && identicalTo(PFuncPtr(t))); } + + +bool FuncPtr::identicalTo(FuncPtr* t) const +{ + if (this == t) + return true; + if (!returnType->identicalTo(t->returnType) + || formalArgs.size() != t->formalArgs.size()) + return false; + for (memint i = formalArgs.size(); i--; ) + if (!t->formalArgs[i]->type->identicalTo(formalArgs[i]->type)) + return false; + return true; +} + + +bool FuncPtr::canAssignTo(Type* t) const + { return this == t || (t->isFuncPtr() && canAssignTo(PFuncPtr(t))); } + + +bool FuncPtr::canAssignTo(FuncPtr* t) const +{ + if (this == t) + return true; + if (!returnType->canAssignTo(t->returnType) + || formalArgs.size() != t->formalArgs.size()) + return false; + for (memint i = formalArgs.size(); i--; ) + // Note how canAssignTo() check is reversed for arguments + if (!t->formalArgs[i]->type->canAssignTo(formalArgs[i]->type)) + return false; + return true; +} + + +FormalArg* FuncPtr::addFormalArg(const str& n, Type* t, bool isPtr, variant* defValue) +{ + FormalArg* arg = new FormalArg(n, t, popArgCount, isPtr, defValue); + formalArgs.push_back(arg->grab()); + popArgCount += arg->getMemSize(); + if (popArgCount > 254) + error("Too many formal arguments defined"); + return arg; +} + + +// --- SelfStub ------------------------------------------------------------ // + + +SelfStub::SelfStub() throw() + : Type(SELFSTUB) { } + +SelfStub::~SelfStub() throw() + { } + +bool SelfStub::identicalTo(Type*) const + { error("'self' incomplete"); return false; } + +bool SelfStub::canAssignTo(Type*) const + { error("'self' incomplete"); return false; } + + +// --- State --------------------------------------------------------------- // + + +State::State(State* par, FuncPtr* proto, State* b) throw() + : Type(STATE), Scope(par), + complete(false), innerObjUsed(0), outsideObjectsUsed(0), + parent(par), parentModule(getParentModule(this)), + prototype(proto), resultVar(NULL), + codeseg(new CodeSeg(this)), externFunc(NULL), base(b), + varCount(0) { _setup(); } + + +State::State(State* par, FuncPtr* proto, ExternFuncProto func, State* b) throw() + : Type(STATE), Scope(par), + complete(true), innerObjUsed(0), outsideObjectsUsed(0), + parent(par), parentModule(getParentModule(this)), + prototype(proto), resultVar(NULL), + codeseg(), externFunc(func), base(b), + varCount(0) { _setup(); } + + +void State::_setup() +{ + // Is this a 'self' state? + isCtor = prototype->returnType->isSelfStub() || prototype->returnType == this; + if (isCtor) + { + useInnerObj(); + prototype->resolveSelfType(this); + } + if (externFunc == NULL) + { + // Register all formal args as actual args within the local scope, + // including the return var (not needed for external functions) + if (prototype->returns) + addResultVar(prototype->returnType); + for (memint i = prototype->formalArgs.size(); i--; ) + addArgument(prototype->formalArgs[i]); + } + if (base) + { + if (!base->isComplete()) + error("Base type incomplete"); + if (!base->isCtor) + error("Base type should be constructor"); + varCount = base->varCount; + } +} + + +State::~State() throw() +{ + args.release_all(); + innerVars.release_all(); + defs.release_all(); + types.release_all(); +} + + +void State::fqName(fifo& stm) const +{ + if (parent) + { + parent->fqName(stm); + stm << '.'; + } + if (defName.empty()) + stm << '*'; + else + stm << defName; +} + + +Module* State::getParentModule(State* m) throw() +{ + while (m->parent) + m = m->parent; + // assert(m->isModule()); + return PModule(m); // TODO: cast<> doesn't work! why? +} + + +void State::dump(fifo& stm) const +{ + stm << '('; + prototype->dump(stm); + stm << " {})"; +} + + +void State::dumpAll(fifo& stm) const +{ + // Print all registered types (except states) in comments + for (memint i = 0; i < types.size(); i++) + { + Type* type = types[i]; + if (type->isAnyState() || type->isReference()) + continue; + stm << "type "; + types[i]->dump(stm); + stm << endl; + } + // Print definitions + for (memint i = 0; i < defs.size(); i++) + { + Definition* def = defs[i]; + stm << "def "; + def->type->dumpDef(stm); + stm << ' '; + def->fqName(stm); + stm << " = "; + Type* typeDef = def->getAliasedType(); + if (typeDef && (def->name != typeDef->defName || typeDef->host != this)) + typeDef->dumpDef(stm); // just the name if this is not the definition of this type + else + dumpVariant(stm, def->value, def->type); + stm << endl; + } + for (memint i = 0; i < innerVars.size(); i++) + { + InnerVar* var = innerVars[i]; + stm << "var "; + var->type->dumpDef(stm); + stm << ' '; + var->fqName(stm); + stm << endl; + } +} + + +bool State::canAssignTo(Type* t) const + { return t == this || (t->isAnyState() && canAssignTo(PState(t))); } + + +bool State::canAssignTo(State* s) const +{ + return s == this || (base && base->canAssignTo(s)); +} + + +Type* State::_registerType(Type* t, Definition* d) throw() +{ + if (t->host == NULL) + { + types.push_back(t->grab()); + t->host = this; + // Also register the bundled reference type, or in case this is a reference, + // register its bundled value type. + if (isReference()) + _registerType(t->getValueType(), NULL); + else + _registerType(t->getRefType(), NULL); + } + // Also assign the diagnostic type alias, if appropriate + if (t->host == this && d && t->defName.empty()) + t->defName = d->name; + return t; +} + + +Definition* State::addDefinition(const str& n, Type* t, const variant& v, Scope* scope) +{ + if (n.empty()) + fatal(0x3001, "Empty identifier"); + objptr d = new Definition(n, t, v, this); + scope->addUnique(d); // may throw + defs.push_back(d->grab()); + if (t->isTypeRef()) + { + // In case this def is a type definition, also register the type with this state, + // and bind the type object to this def for better diagnostic output (dump() family). + _registerType(cast(v._rtobj()), d); + } + return d; +} + + +void State::addTypeAlias(const str& n, Type* t) + { addDefinition(n, t->getType(), t, this); } + + +Variable* State::addArgument(FormalArg* f) +{ + // Notice how var id becomes a relative offset + memint id = prototype->popArgCount - f->id; + objptr arg; + if (f->isPtr) + arg = new PtrVar(f->name, f->type, id, this); + else + arg = new ArgVar(f->name, f->type, id, this); + if (!f->name.empty()) + addUnique(arg); + args.push_back(arg->grab()); + return arg; +} + + +void State::addResultVar(Type* t) +{ + assert(resultVar.empty()); + resultVar = new ResultVar(t, this); + if (!resultVar->name.empty()) // currently set to "__result" + addUnique(resultVar); +} + + +InnerVar* State::addInnerVar(InnerVar* var) +{ + varCount += var->getMemSize(); + if (varCount > 254) + error("Too many variables"); + return innerVars.push_back(var->grab()); +} + + +InnerVar* State::addInnerVar(const str& n, Type* t) +{ + if (n.empty()) + fatal(0x3002, "Empty identifier"); + objptr v = new InnerVar(n, t, varCount, this); + addUnique(v); + return addInnerVar(v); +} + + +InnerVar* State::reclaimArg(ArgVar* arg, Type* t) +{ + objptr v = new InnerVar(arg->name, t, varCount, this); + replaceSymbol(v); + return addInnerVar(v); +} + + +stateobj* State::newInstance() +{ + if (varCount == 0) + return NULL; + stateobj* obj = new(varCount) stateobj(this); + return obj; +} + + +Container* State::getContainerType(Type* idx, Type* elem) +{ + assert(!idx->isReference()); + // TODO: replace linear search with something faster? + for (memint i = 0; i < types.size(); i++) + { + Type* t = types[i]; + if (t->isContainer(idx, elem)) + return PContainer(t); + } + return registerType(new Container(idx, elem)); +} + + +Fifo* State::getFifoType(Type* elem) +{ + // TODO: replace linear search with something faster? + for (memint i = 0; i < types.size(); i++) + { + Type* t = types[i]; + if (t->isFifo(elem)) + return PFifo(t); + } + return registerType(new Fifo(elem)); +} + + +FuncPtr* State::registerProto(Type* ret) +{ + return registerType(new FuncPtr(ret)); +} + + +FuncPtr* State::registerProto(Type* ret, Type* arg1) +{ + FuncPtr* proto = registerProto(ret); + proto->addFormalArg("", arg1, false, NULL); + return proto; +} + + +FuncPtr* State::registerProto(Type* ret, Type* arg1, Type* arg2) +{ + FuncPtr* proto = registerProto(ret, arg1); + proto->addFormalArg("", arg2, false, NULL); + return proto; +} + + +// --- Module -------------------------------------------------------------- // + + +Module::Module(const str& n, const str& f) throw() + : State(NULL, new FuncPtr(this)), filePath(f) +{ + defName = n; + registerType(prototype); +} + + +Module::~Module() throw() +{ + codeSegs.release_all(); +} + + +void Module::dump(fifo& stm) const +{ + stm << endl << "#MODULE_DUMP " << getName() << endl << endl; + dumpAll(stm); + for (memint i = 0; i < codeSegs.size(); i++) + { + CodeSeg* c = codeSegs[i]; + stm << endl << "#CODE_DUMP "; + c->getStateType()->fqName(stm); + stm << endl << endl; + c->dump(stm); + } +} + + +void Module::addUsedModule(Module* m) + { usedModuleVars.push_back(addInnerVar(m->getName(), m)); } + + +InnerVar* Module::findUsedModuleVar(Module* m) +{ + for (memint i = usedModuleVars.size(); i--; ) + { + InnerVar* var = usedModuleVars[i]; + if (var->type == m) + return var; + } + return NULL; +} + + +void Module::registerString(str& s) +{ + if (s.empty()) + return; + constStrings.push_back(s); + // TODO: make finding duplicates a compiler option? +/* + memint i; + if (constStrings.bsearch(s, i)) + s = constStrings[i]; + else + constStrings.insert(i, s); +*/ +} + + +void Module::registerCodeSeg(CodeSeg* c) + { codeSegs.push_back(c->grab()); } + + +// --- QueenBee ------------------------------------------------------------ // + + +QueenBee::QueenBee() + : Module("system", ""), + defVariant(new Variant()), + defInt(new Ordinal(Type::INT, INTEGER_MIN, INTEGER_MAX)), + defChar(new Ordinal(Type::CHAR, 0, 255)), + defByte(new Ordinal(Type::INT, 0, 255)), + defBool(new Enumeration(Type::BOOL)), + defNullCont(new Container(defVoid, defVoid)), + defStr(new Container(defVoid, defChar)), + defCharSet(new Container(defChar, defVoid)), + defCharFifo(new Fifo(defChar)), + defSelfStub(new SelfStub()) +{ + // Fundamentals + addTypeAlias("type", defTypeRef); + addTypeAlias("void", defVoid); + addDefinition("null", defVoid, variant::null, this); + addTypeAlias("any", defVariant); + addTypeAlias("int", defInt); + addTypeAlias("char", defChar); + addTypeAlias("byte", defByte); + addTypeAlias("bool", defBool); + defBool->addValue(this, this, "false"); + defBool->addValue(this, this, "true"); + addTypeAlias("voidc", defNullCont); + addTypeAlias("str", defStr); + addTypeAlias("chars", defCharSet); + addTypeAlias("charf", registerType(defCharFifo)->getRefType()); + addTypeAlias("self", defSelfStub); + + // Constants + addDefinition("__VER_MAJOR", defInt, SHANNON_VERSION_MAJOR, this); + addDefinition("__VER_MINOR", defInt, SHANNON_VERSION_MINOR, this); + addDefinition("__VER_FIX", defInt, SHANNON_VERSION_FIX, this); + + // Variables + resultVar = addInnerVar("__program_result", defVariant); + sioVar = addInnerVar("sio", defCharFifo); + serrVar = addInnerVar("serr", defCharFifo); + + // Built-ins: + // NULL argument means anything goes, the builtin parser will take care of + // type checking. Return type doesn't matter; the builtin parser functions + // leave the actual result types on the simulation stack anyway. + // TODO: fmt() read() write() + // TODO: infile() outfile() + FuncPtr* proto1 = registerProto(defVariant, NULL); + FuncPtr* proto2 = registerProto(defVariant, NULL, NULL); + addBuiltin("len", compileLen, proto1); + addBuiltin("lo", compileLo, proto1); + addBuiltin("hi", compileHi, proto1); + addBuiltin("_str", compileToStr, proto1); + addBuiltin("enq", compileEnq, proto2); + addBuiltin("deq", compileDeq, proto1); + addBuiltin("token", compileToken, proto2); + + addBuiltin("skip", compileSkip, + registerState(registerProto(defVoid, NULL, NULL), shn_skipset)); + addBuiltin("eol", NULL, + registerState(registerProto(defBool, defCharFifo), shn_eol)); + addBuiltin("line", NULL, + registerState(registerProto(defStr, defCharFifo), shn_line)); + addBuiltin("skipln", NULL, + registerState(registerProto(defVoid, defCharFifo), shn_skipln)); + addBuiltin("look", NULL, + registerState(registerProto(defChar, defCharFifo), shn_look)); + + addTypeAlias("strfifo", + registerState(registerProto(defCharFifo, defStr), shn_strfifo)); + + getCodeSeg()->close(); + setComplete(); +} + + +QueenBee::~QueenBee() throw() + { builtins.release_all(); } + + +stateobj* QueenBee::newInstance() +{ + assert(getCodeSeg()->closed); + assert(complete); + stateobj* inst = parent::newInstance(); + sio.setType(defCharFifo); + serr.setType(defCharFifo); + *inst->member(sioVar->id) = &sio; + *inst->member(serrVar->id) = &serr; + return inst; +} + + +Builtin* QueenBee::addBuiltin(Builtin* b) +{ + builtins.push_back(b->grab()); + addUnique(b); + builtinScope.add(b); // note: doesn't throw + return b; +} + + +Builtin* QueenBee::addBuiltin(const str& n, Builtin::CompileFunc f, FuncPtr* p) +{ + return addBuiltin(new Builtin(n, f, p, this)); +} + + +Builtin* QueenBee::addBuiltin(const str& n, Builtin::CompileFunc f, State* s) +{ + return addBuiltin(new Builtin(n, f, s, this)); +} + + +State* QueenBee::registerState(FuncPtr* proto, ExternFuncProto ext) +{ + return registerType(new State(this, proto, ext)); +} + + +// --- Globals ------------------------------------------------------------- // + + +objptr defTypeRef; +objptr defVoid; +objptr queenBee; + + +void initTypeSys() +{ + // Because all Type objects are also runtime objects, they all have a + // runtime type of "type reference". The initial typeref object refers to + // itself and should be created before anything else in the type system. + defTypeRef = new TypeReference(); + + // Void is used in deriving vectors and sets, so we need it before some of + // the default types are created in QueenBee + defVoid = new Void(); + + // The "system" module that defines default types; some of them have + // recursive definitions and other kinds of weirdness, and therefore should + // be defined in C code rather than in Shannon code + queenBee = new QueenBee(); +} + + +void doneTypeSys() +{ + queenBee = NULL; + defVoid = NULL; + defTypeRef = NULL; +} + diff --git a/src/typesys.h b/src/typesys.h new file mode 100644 index 0000000..4beb8bf --- /dev/null +++ b/src/typesys.h @@ -0,0 +1,717 @@ +#ifndef __TYPESYS_H +#define __TYPESYS_H + +#include "runtime.h" + + +class Symbol; +class Variable; +class InnerVar; +class StkVar; +class ArgVar; +class PtrVar; +class ResultVar; +class Definition; +class Builtin; +class Scope; +class BlockScope; +class Type; +class Reference; +class Ordinal; +class Enumeration; +class Range; +class Container; +class Fifo; +class SelfStub; +class State; +class FuncPtr; +class Module; + +typedef Symbol* PSymbol; +typedef Variable* PVariable; +typedef InnerVar* PInnerVar; +typedef StkVar* PStkVar; +typedef ArgVar* PArgVar; +typedef PtrVar* PPtrVar; +typedef ResultVar* PResultVar; +typedef Definition* PDefinition; +typedef Builtin* PBuiltin; +typedef Scope* PScope; +typedef BlockScope* PBlockScope; +typedef Type* PType; +typedef Reference* PReference; +typedef Ordinal* POrdinal; +typedef Enumeration* PEnumeration; +typedef Range* PRange; +typedef Container* PContainer; +typedef Fifo* PFifo; +typedef State* PState; +typedef FuncPtr* PFuncPtr; +typedef Module* PModule; + + +class CodeSeg; // defined in vm.h +class CodeGen; + +class Compiler; // defined in compiler.h + + +// --- Symbols & Scope ----------------------------------------------------- // + + +class Symbol: public symbol +{ +public: + enum SymbolId { STKVAR, ARGVAR, PTRVAR, RESULTVAR, INNERVAR, + FORMALARG, DEFINITION, BUILTIN }; + + SymbolId const symbolId; + Type* const type; + State* const host; + + Symbol(const str&, SymbolId, Type*, State*) throw(); + ~Symbol() throw(); + + void fqName(fifo&) const; + void dump(fifo&) const; + + bool isAnyVar() const { return symbolId <= INNERVAR; } + bool isStkVar() const { return symbolId == STKVAR; } + bool isArgVar() const { return symbolId == ARGVAR; } + bool isPtrVar() const { return symbolId == PTRVAR; } + bool isResultVar() const { return symbolId == RESULTVAR; } + bool isInnerVar() const { return symbolId == INNERVAR; } + bool isFormalArg() const { return symbolId == FORMALARG; } + bool isDef() const { return symbolId == DEFINITION; } + bool isBuiltin() const { return symbolId == BUILTIN; } +}; + + +class Definition: public Symbol +{ +public: + variant const value; + Definition(const str&, Type*, const variant&, State*) throw(); + ~Definition() throw(); + Type* getAliasedType() const; +}; + + +class Variable: public Symbol +{ +protected: + Variable(const str&, SymbolId, Type*, memint, State*) throw(); +public: + memint const id; + ~Variable() throw(); + memint getMemSize(); +}; + + +class StkVar: public Variable +{ +public: + StkVar(const str&, Type*, memint, State*) throw(); +}; + + +class ArgVar: public Variable +{ +public: + ArgVar(const str&, Type*, memint, State*) throw(); +}; + + +class PtrVar: public Variable +{ +public: + PtrVar(const str&, Type*, memint, State*) throw(); +}; + + +class ResultVar: public Variable +{ +public: + ResultVar(Type*, State*) throw(); +}; + + +class InnerVar: public Variable +{ +public: + InnerVar(const str&, Type*, memint, State*) throw(); + Module* getModuleType() const + { return cast(type); } +}; + + +class FormalArg: public Symbol +{ +public: + memint const id; + bool const isPtr; + bool const hasDefValue; + variant /*const*/ defValue; + FormalArg(const str&, Type*, memint, bool isPtr, variant*) throw(); + ~FormalArg() throw(); + memint getMemSize() + { return 1 + int(isPtr); } +}; + + +class Builtin: public Symbol +{ +public: + typedef void (*CompileFunc)(Compiler*, Builtin*); + + CompileFunc const compile; // either call the compiler function + State* const staticFunc; // ... or generate a static call to this function + FuncPtr* const prototype; // optional + + Builtin(const str&, CompileFunc, FuncPtr*, State*) throw(); + Builtin(const str&, CompileFunc, State*, State*) throw(); + ~Builtin() throw(); + void dump(fifo&) const; +}; + + +struct EDuplicate: public exception +{ + str const ident; + EDuplicate(const str& _ident) throw(); + ~EDuplicate() throw(); + const char* what() throw(); // shouldn't be called +}; + + +struct EUnknownIdent: public exception +{ + str const ident; + EUnknownIdent(const str& _ident) throw(); + ~EUnknownIdent() throw(); + const char* what() throw(); // shouldn't be called +}; + + +class Scope +{ + friend void test_typesys(); +protected: + symtbl symbols; // symbol table for search +public: + Scope* const outer; + Scope(Scope* _outer) throw() + : outer(_outer) { } + ~Scope() throw() { } + void addUnique(Symbol*); + void replaceSymbol(Symbol*); + Symbol* find(const str& ident) const // returns NULL or Symbol + { return symbols.find(ident); } + Symbol* findShallow(const str& _name) const; // throws EUnknown +}; + + +class BlockScope: public Scope +{ +protected: + objvec stkVars; // owned + memint startId; + memint varCount; + CodeGen* gen; +public: + BlockScope(Scope* outer, CodeGen*) throw(); + ~BlockScope() throw(); + StkVar* addStkVar(const str&, Type*); + void deinitLocals(); // generates POPs via CodeGen (currently used only in AutoScope) +}; + + +// --- Type ---------------------------------------------------------------- // + +// Note: type objects (all descendants of Type) should not be modified once +// created. This will allow to reuse loaded modules in a multi-threaded server +// environment for serving concurrent requests without actually re-compiling +// or reloading used modules. + +class Type: public rtobject +{ + friend class State; + friend class Reference; // for access to dump() +public: + +#if defined(BOOL) || defined(CHAR) || defined(INT) +# error "I don't like your macro names and I'm not going to change mine." +#endif + + enum TypeId { + TYPEREF, VOID, VARIANT, REF, RANGE, + BOOL, CHAR, INT, ENUM, // ordinal types; see isAnyOrd() + NULLCONT, VEC, SET, DICT, // containers; see isAnyCont() + FIFO, SELFSTUB, FUNCPTR, + STATE, MODULE // note isAnyState() + }; + +protected: + objptr refType; + State* host; // State that "owns" a given type + str defName; // for more readable diagnostics output, but not really needed + + Type(TypeId) throw(); + static TypeId contType(Type* i, Type* e) throw(); + // void setTypeId(TypeId id) + // { const_cast(typeId) = id; } + +public: + TypeId const typeId; + + ~Type() throw(); + + bool isTypeRef() const { return typeId == TYPEREF; } + bool isVoid() const { return typeId == VOID; } + bool isVariant() const { return typeId == VARIANT; } + bool isReference() const { return typeId == REF; } + bool isDerefable() const { return !isAnyState() && !isAnyFifo(); } + bool isRange() const { return typeId == RANGE; } + + bool isBool() const { return typeId == BOOL; } + bool isChar() const { return typeId == CHAR; } + bool isInt() const { return typeId == INT; } + bool isEnum() const { return typeId == ENUM || isBool(); } + bool isAnyOrd() const { return typeId >= BOOL && typeId <= ENUM; } + bool isByte() const; + bool isBit() const; + bool isFullChar() const; + + bool isNullCont() const { return typeId == NULLCONT; } + bool isAnyVec() const { return typeId == VEC; } + bool isAnySet() const { return typeId == SET; } + bool isAnyDict() const { return typeId == DICT; } + bool isAnyCont() const { return typeId >= NULLCONT && typeId <= DICT; } + bool isByteVec() const; + bool isByteSet() const; + bool isByteDict() const; + bool isContainer(Type* idx, Type* elem) const; + bool isVectorOf(Type* elem) const; + + bool isAnyFifo() const { return typeId == FIFO; } + bool isByteFifo() const; + bool isFifo(Type*) const; + + bool isSelfStub() const { return typeId == SELFSTUB; } + bool isFuncPtr() const { return typeId == FUNCPTR; } + bool isAnyState() const { return typeId >= STATE; } + bool isState() const { return typeId == STATE; } + bool isModule() const { return typeId == MODULE; } + + bool isPod() const { return isAnyOrd() || isVoid(); } + memint getMemSize() const { return 1; } + + bool empty() const; // override + void dump(fifo&) const; // override + void dumpDef(fifo&) const; + virtual void dumpValue(fifo&, const variant&) const; + + // NOTE: the difference between identicalTo() and canAssignTo() is very subtle. + // Identicality is mostly (but not only) tested for container compatibility, + // when testing compatibility of function prototypes, etc. What can I say, + // "Careful with that axe, Eugene" + virtual bool identicalTo(Type*) const; + virtual bool canAssignTo(Type*) const; + bool isCompatibleWith(const variant&); + + Reference* getRefType() { return isReference() ? PReference(this) : refType.get(); } + Type* getValueType(); + Container* deriveVec(State*); + Container* deriveSet(State*); + Container* deriveContainer(State*, Type* idxType); + Fifo* deriveFifo(State*); +}; + + +void dumpVariant(fifo&, const variant&, Type* = NULL); + + +inline memint Variable::getMemSize() + { return type->getMemSize(); } + + + +// --- General Types ------------------------------------------------------- // + + +class TypeReference: public Type +{ + friend void initTypeSys(); +protected: + TypeReference() throw(); + ~TypeReference() throw(); + void dumpValue(fifo&, const variant&) const; +}; + + +class Void: public Type +{ + friend void initTypeSys(); +protected: + Void() throw(); + ~Void() throw(); +}; + + +class Variant: public Type +{ + friend class QueenBee; +protected: + Variant() throw(); + ~Variant() throw(); +}; + + +class Reference: public Type +{ + friend class Type; +protected: + Reference(Type*) throw(); +public: + Type* const to; + ~Reference() throw(); + bool canAssignTo(Type*) const; + bool identicalTo(Type* t) const; + void dump(fifo&) const; + void dumpValue(fifo&, const variant&) const; +}; + + +inline Type* Type::getValueType() + { return isReference() ? PReference(this)->to : this; } + + +// --- Ordinals ------------------------------------------------------------ // + + +class Ordinal: public Type +{ + friend class QueenBee; +protected: + Ordinal(TypeId, integer, integer) throw(); + ~Ordinal() throw(); + void reassignRight(integer r) + { assert(r == right + 1); (integer&)right = r; } + virtual Ordinal* _createSubrange(integer, integer); + + objptr rangeType; + +public: + integer const left; + integer const right; + + void dump(fifo&) const; + void dumpValue(fifo&, const variant&) const; + bool identicalTo(Type* t) const; + bool canAssignTo(Type*) const; + bool isInRange(integer v) const + { return v >= left && v <= right; } + bool isByte() const + { return left >= 0 && right <= 255; } + bool isBit() const + { return left == 0 && right == 1; } + bool isFullChar() const + { return isChar() && left == 0 && right == 255; } + integer getRange() const + { return right - left + 1; } + Ordinal* createSubrange(integer, integer); + Ordinal* createSubrange(const range& r) + { return createSubrange(r.left(), r.right()); } + Range* getRangeType() + { return rangeType; } +}; + + +class Enumeration: public Ordinal +{ + friend class QueenBee; +protected: + typedef objvec EnumValues; + EnumValues values; + Enumeration(TypeId _typeId); // built-in enums, e.g. bool + Enumeration(const EnumValues&, integer, integer); // subrange + Ordinal* _createSubrange(integer, integer); // override +public: + Enumeration() throw(); // user-defined enums + ~Enumeration() throw(); + void dump(fifo&) const; + void dumpValue(fifo&, const variant&) const; + bool identicalTo(Type* t) const; + bool canAssignTo(Type*) const; + void addValue(State*, Scope*, const str&); +}; + + +class Range: public Type +{ + friend class Ordinal; +protected: + Range(Ordinal*) throw(); + ~Range() throw(); +public: + Ordinal* const elem; + void dump(fifo&) const; + void dumpValue(fifo&, const variant&) const; + bool identicalTo(Type* t) const; + bool canAssignTo(Type*) const; +}; + + +// --- Containers ---------------------------------------------------------- // + + +class Container: public Type +{ + friend class State; + friend class QueenBee; + +protected: + Container(Type* i, Type* e) throw(); + +public: + Type* const index; + Type* const elem; + + ~Container() throw(); + void dump(fifo&) const; + void dumpValue(fifo&, const variant&) const; + bool identicalTo(Type*) const; + bool hasByteIndex() const + { return index->isByte(); } + bool hasByteElem() const + { return elem->isByte(); } +}; + + +// --- Fifo ---------------------------------------------------------------- // + + +class Fifo: public Type +{ + friend class State; + friend class QueenBee; +protected: + Fifo(Type*) throw(); +public: + Type* const elem; + ~Fifo() throw(); + void dump(fifo&) const; + bool identicalTo(Type*) const; + bool isByteFifo() const + { return elem->isByte(); } +}; + + +// --- Prototype/FuncPtr --------------------------------------------------- // + + +class FuncPtr: public Type +{ +public: + Type* returnType; + objvec formalArgs; // owned + memint popArgCount; + bool const returns; // VM helper + + FuncPtr(Type* retType) throw(); + ~FuncPtr() throw(); + void dump(fifo&) const; + bool identicalTo(Type*) const; // override + bool identicalTo(FuncPtr* t) const; + bool canAssignTo(Type*) const; // override + bool canAssignTo(FuncPtr* t) const; + FormalArg* addFormalArg(const str&, Type*, bool isPtr, variant*); + void resolveSelfType(State*); +}; + + +// --- SelfStub ------------------------------------------------------------ // + + +class SelfStub: public Type +{ + friend class QueenBee; +protected: + SelfStub() throw(); + ~SelfStub() throw(); +public: + bool identicalTo(Type*) const; + bool canAssignTo(Type*) const; +}; + + +// --- State --------------------------------------------------------------- // + + +typedef void (*ExternFuncProto)(variant* result, stateobj* outerobj, variant args[]); + +// "i" below is 1-based; arguments are numbered from right to left +#define SHN_ARG(i) (args-(i)) + + +class State: public Type, public Scope +{ +protected: + Type* _registerType(Type*, Definition* = NULL) throw(); + void addTypeAlias(const str&, Type*); + + objvec types; // owned + objvec defs; // owned + objvec args; // owned, copied from prototype + + void _setup(); + InnerVar* addInnerVar(InnerVar*); + static Module* getParentModule(State*) throw(); + + // Compiler helpers: + bool complete; + int innerObjUsed; + int outsideObjectsUsed; + +public: + objvec innerVars; // owned + + State* const parent; + Module* const parentModule; + FuncPtr* const prototype; + objptr resultVar; // may be NULL + + objptr const codeseg; + ExternFuncProto const externFunc; + State* const base; + + // VM helpers: + memint varCount; + bool isCtor; + + State(State* parent, FuncPtr*, State* base = NULL) throw(); + State(State* parent, FuncPtr*, ExternFuncProto, State* base = NULL) throw(); + ~State() throw(); + void fqName(fifo&) const; + void dump(fifo&) const; + void dumpAll(fifo&) const; + + bool isComplete() const + { return complete; } + void setComplete() + { assert(!complete); complete = true; } + bool isStatic() const + { return isComplete() && outsideObjectsUsed == 0; } + int isInnerObjUsed() const + { assert(complete); return innerObjUsed; } + bool isExternal() const + { return externFunc != NULL; } + void useInnerObj() + { innerObjUsed++; } + void useOutsideObject() + { outsideObjectsUsed++; } + + void setBase(State*); + bool canAssignTo(Type*) const; // override + bool canAssignTo(State* t) const; + + Definition* addDefinition(const str&, Type*, const variant&, Scope*); + Variable* addArgument(FormalArg*); + void addResultVar(Type*); + InnerVar* addInnerVar(const str&, Type*); + InnerVar* reclaimArg(ArgVar*, Type*); + virtual stateobj* newInstance(); + template + T* registerType(T* t) throw() + { return cast(_registerType(t)); } + Container* getContainerType(Type* idx, Type* elem); + Fifo* getFifoType(Type* elem); + FuncPtr* registerProto(Type* ret); + FuncPtr* registerProto(Type* ret, Type* arg1); + FuncPtr* registerProto(Type* ret, Type* arg1, Type* arg2); + CodeSeg* getCodeSeg() const; + const uchar* getCodeStart() const; +}; + + +inline void FuncPtr::resolveSelfType(State* state) + { returnType = state; } + + +inline stateobj::stateobj(State* t) throw() + : rtobject(t) +#ifdef DEBUG + , varcount(t->varCount) +#endif + { } + + +// --- Module -------------------------------------------------------------- // + + +class Module: public State +{ +protected: + strvec constStrings; + objvec codeSegs; // for dumps +public: + str const filePath; + objvec usedModuleVars; // used module instances are stored in static vars + Module(const str& name, const str& filePath) throw(); + ~Module() throw(); + void dump(fifo&) const; + str getName() const { return defName; } + void addUsedModule(Module*); + InnerVar* findUsedModuleVar(Module*); + void registerString(str&); // registers a string literal for use at run-time + void registerCodeSeg(CodeSeg* c); // collected here for dumps +}; + + +// --- QueenBee (system module) -------------------------------------------- // + + +class QueenBee: public Module +{ + typedef Module parent; + friend void initTypeSys(); +protected: + symtbl builtinScope; + objvec builtins; + QueenBee(); + ~QueenBee() throw(); + stateobj* newInstance(); // override + Builtin* addBuiltin(Builtin*); + Builtin* addBuiltin(const str&, Builtin::CompileFunc, FuncPtr*); + Builtin* addBuiltin(const str&, Builtin::CompileFunc, State*); + State* registerState(FuncPtr* proto, ExternFuncProto); +public: + Variant* const defVariant; + Ordinal* const defInt; + Ordinal* const defChar; + Ordinal* const defByte; + Enumeration* const defBool; + Container* const defNullCont; + Container* const defStr; + Container* const defCharSet; + Fifo* const defCharFifo; + SelfStub* const defSelfStub; + Variable* sioVar; + Variable* serrVar; + Variable* resultVar; + Builtin* findBuiltin(const str& ident) // returns Builtin* or NULL + { return builtinScope.find(ident); } +}; + + +// --- Globals ------------------------------------------------------------- // + + +void initTypeSys(); +void doneTypeSys(); + +extern objptr defTypeRef; +extern objptr defVoid; +extern objptr queenBee; + +#endif // __TYPESYS_H diff --git a/src/version.h b/src/version.h new file mode 100644 index 0000000..a99bd4d --- /dev/null +++ b/src/version.h @@ -0,0 +1,4 @@ +#define SHANNON_VERSION_MAJOR 0 +#define SHANNON_VERSION_MINOR 92 +#define SHANNON_VERSION_FIX 0 +#define SHANNON_COPYRIGHT "Copyright (c) 2009-2010 Hovik Melikyan" diff --git a/src/vm.cpp b/src/vm.cpp new file mode 100644 index 0000000..09fa9ee --- /dev/null +++ b/src/vm.cpp @@ -0,0 +1,1175 @@ + +#include "vm.h" +#include "compiler.h" + + +// --- VIRTUAL MACHINE ----------------------------------------------------- // + + +#ifdef DEBUG +static void invOpcode(int code) +{ + str s = str("Invalid opcode: ") + to_string(code); + _fatal(0x5002, s.c_str()); +#else +static void invOpcode(int) +{ + _fatal(0x5002); +#endif +} + +static void doExit(const variant& r) { throw eexit(r); } + + +static void failAssertion(const str& modname, integer linenum, const str& cond) + { throw emessage("Assertion failed \"" + cond + "\" at " + modname + " (" + to_string(linenum) + ')'); } + +static void typecastError() + { throw evariant("Invalid typecast"); } + +static void constExprErr() + { throw emessage("Variable used in const expression"); } + +static void funcPtrErr() + { throw emessage("Invalid use of function in const expression"); } + +static void localObjErr() + { throw emessage("Local object is locked"); } + + +static void dumpVar(const str& expr, const variant& var, Type* type) +{ + // TODO: dump to serr? + sio << "# " << expr; + if (type) + { + sio << ": "; + type->dumpDef(sio); + } + sio << " = "; + dumpVariant(sio, var, type); + sio << endl; +} + + +static void byteDictReplace(varvec& v, integer i, const variant& val) +{ + memint size = v.size(); + if (uinteger(i) > 255) + container::keyerr(); + if (i == size) + v.push_back(val); + else + { + if (i > size) + v.grow(i - size + 1); + v.replace(i, val); + } +} + + +static void byteDictDelete(varvec& v, integer i) +{ + memint size = v.size(); + if (uinteger(i) >= umemint(size) || v[i].is_null()) + container::keyerr(); + if (i == size - 1) + v.pop_back(); + else + v.replace(i, variant()); +} + + +inline void INITAT(variant* dest) + { ::new(dest) variant(); } + +template + inline void INITAT(variant* dest, const T& v) + { ::new(dest) variant(v); } + +template + inline void INITAT(variant* dest, const T& v1, const U& v2) + { ::new(dest) variant(v1, v2); } + +#define ADV(T) \ + (ip += sizeof(T), *(T*)(ip - sizeof(T))) // TODO: improve this? + +#define PUSH0(v) \ + { INITAT(++stk); } + +#define PUSH(v) \ + { INITAT(stk + 1, v); stk++; } + +#define PUSH2(v1,v2) \ + { INITAT(stk + 1, v1, v2); stk++; } + +#define POP() \ + { (*stk--).~variant(); } + +#define POPPOD() \ + { assert(!stk->is_anyobj()); stk--; } + +#define INITPOP(dest) \ + { *(podvar*)(dest) = *(podvar*)stk; stk--; } // pop to uninitialized area + +#define INITPUSH(src) \ + { *(podvar*)(++stk) = *(podvar*)src; } // push without ctor + +#define POPTO(dest) \ + { variant* d = dest; d->~variant(); INITPOP(d); } + + +void runRabbitRun(variant* result, stateobj* dataseg, stateobj* outerobj, + variant* basep, CodeSeg* codeseg) +{ + // TODO: check for stack overflow + const uchar* ip = codeseg->getCode(); + State* state = codeseg->state; + variant* argp = basep; + stateobj* innerobj = NULL; + + if (state) + { + if (state->isCtor) + { + // Instantiate the class if not already done + if (result->is_null()) + INITAT(result, state->newInstance()); + innerobj = result->_stateobj(); + } + else if (state->varCount && state->isInnerObjUsed()) + { + innerobj = new(basep) stateobj(state); // note: doesn't initialize the vars + innerobj->_mkstatic(); +#ifdef DEBUG + innerobj->varcount = state->varCount; +#endif + basep = innerobj->member(0); + } + } + + variant* stk = basep - 1; + + // Function call helpers: + variant ax; // accumulator register, for function results + State* callee; + stateobj* callds; + stateobj* callobj; + int popArgCount; + try + { +loop: // We use goto instead of while(1) {} so that compilers never complain + switch(*ip++) + { + + // --- 1. MISC CONTROL ----------------------------------------------- + case opInv0: invOpcode(0); break; + case opEnd: goto exit; + case opExit: doExit(*stk); break; + + // --- 2. CONST LOADERS ---------------------------------------------- + case opLoadTypeRef: + PUSH(ADV(Type*)); + break; + case opLoadNull: + PUSH(variant::null); + break; + case opLoad0: + PUSH(integer(0)); + break; + case opLoad1: + PUSH(integer(1)); + break; + case opLoadByte: + PUSH(integer(ADV(uchar))); + break; + case opLoadOrd: + PUSH(ADV(integer)); + break; + case opLoadStr: + PUSH(ADV(str)); + break; + case opLoadEmptyVar: + PUSH(variant::Type(ADV(uchar))); + break; + case opLoadConstObj: + { + uchar t = ADV(uchar); + PUSH2(variant::Type(t), ADV(object*)); + } + break; + case opLoadOuterObj: + PUSH(outerobj); + break; + case opLoadDataSeg: + PUSH(dataseg); + break; + case opLoadOuterFuncPtr: + PUSH(new funcptr(dataseg, outerobj, ADV(State*))); + break; + case opLoadInnerFuncPtr: + PUSH(new funcptr(dataseg, innerobj, ADV(State*))); + break; + case opLoadStaticFuncPtr: + PUSH(new funcptr(NULL, NULL, ADV(State*))); + break; + case opLoadFuncPtrErr: + funcPtrErr(); + break; + case opLoadCharFifo: + PUSH(new memfifo(ADV(Fifo*), true)); + break; + case opLoadVarFifo: + PUSH(new memfifo(ADV(Fifo*), false)); + break; + + // --- 3. DESIGNATOR LOADERS ----------------------------------------- + case opLoadInnerVar: + PUSH(*(innerobj->member(ADV(uchar)))); + break; + case opLoadOuterVar: + PUSH(*(CHKPTR(outerobj)->member(ADV(uchar)))); + break; + case opLoadStkVar: + PUSH(*(basep + ADV(uchar))); + break; + case opLoadArgVar: + PUSH(*(argp - ADV(uchar))); + break; + case opLoadPtrVar: + PUSH(*(argp - ADV(uchar) + 1)->_ptr()); + break; + case opLoadResultVar: + PUSH(*result); + break; + case opLoadVarErr: + constExprErr(); + break; + + case opLoadMember: + *stk = *(CHKPTR(stk->_stateobj())->member(ADV(uchar))); + break; + case opDeref: + { + reference* r = stk->_ref(); + INITAT(stk, r->var); + r->release(); + } + break; + + case opLeaInnerVar: + PUSH((rtobject*)NULL); // no need to lock "self", should be locked anyway + PUSH(innerobj->member(ADV(uchar))); + break; + case opLeaOuterVar: + PUSH((rtobject*)NULL); // again, an outer var is "grounded" and thus locked too + PUSH(CHKPTR(outerobj)->member(ADV(uchar))); + break; + case opLeaStkVar: + PUSH((rtobject*)NULL); // same for stack-local vars + PUSH(basep + ADV(uchar)); + break; + case opLeaArgVar: + PUSH((rtobject*)NULL); // same for arguments + PUSH(argp - ADV(uchar)); + break; + case opLeaPtrVar: + { + variant* a = argp - ADV(uchar); + PUSH(*a); + PUSH(*(a + 1)); + } + break; + case opLeaResultVar: + PUSH((rtobject*)NULL); + PUSH(result); + break; + case opLeaMember: + PUSH(CHKPTR(stk->_stateobj())->member(ADV(uchar))); + break; + case opLeaRef: + PUSH(&(stk->_ref()->var)); + break; + + + // --- 4. STORERS ---------------------------------------------------- + case opInitInnerVar: + INITPOP(innerobj->member(ADV(uchar))); + break; + // case opInitStkVar: + // INITTO(basep + ADV(uchar)); + // break; + case opStoreInnerVar: + POPTO(innerobj->member(ADV(uchar))); + break; + case opStoreOuterVar: + POPTO(CHKPTR(outerobj)->member(ADV(uchar))); + break; + case opStoreStkVar: + POPTO(basep + ADV(uchar)); + break; + case opStoreArgVar: + POPTO(argp - ADV(uchar)); + break; + case opStorePtrVar: + POPTO((argp - ADV(uchar) + 1)->_ptr()); + break; + case opStoreResultVar: + POPTO(result); + break; + case opStoreMember: + POPTO(CHKPTR((stk - 1)->_stateobj())->member(ADV(uchar))); + POP(); + break; + case opStoreRef: + POPTO(&(stk - 1)->_ref()->var); + POP(); + break; + + case opIncStkVar: + ((basep + ADV(uchar))->_int())++; + break; + + // --- 5. DESIGNATOR OPS, MISC --------------------------------------- + case opMkRange: + { + INITAT(stk - 1, (stk - 1)->_int(), stk->_int()); + POPPOD(); + } + break; + case opMkRef: + INITAT(stk, new reference((podvar*)stk)); + break; + case opMkFuncPtr: + *stk = new funcptr(dataseg, stk->_stateobj(), ADV(State*)); + break; + case opMkFarFuncPtr: + callee = ADV(State*); + *stk = new funcptr(dataseg->member(ADV(uchar))->_stateobj(), stk->_stateobj(), callee); + break; + case opNonEmpty: + *stk = int(!stk->empty()); + break; + case opPop: + POP(); + break; + case opPopPod: + POPPOD(); + break; + case opCast: + if (!ADV(Type*)->isCompatibleWith(*stk)) + typecastError(); + break; + case opIsType: + *stk = int(ADV(Type*)->isCompatibleWith(*stk)); + break; + case opToStr: + { + strfifo f(NULL); + ADV(Type*)->dumpValue(f, *stk); + *stk = f.all(); + } + break; + + + // --- 6. STRINGS, VECTORS ------------------------------------------- + case opChrToStr: + *stk = str(stk->_int()); + break; + case opChrCat: + (stk - 1)->_str().push_back(stk->_uchar()); + POPPOD(); + break; + case opStrCat: + (stk - 1)->_str().append(stk->_str()); + POP(); + break; + case opVarToVec: + { varvec v; v.push_back(*stk); *stk = v; } + break; + case opVarCat: + (stk - 1)->_vec().push_back(*stk); + POP(); + break; + case opVecCat: + (stk - 1)->_vec().append(stk->_vec()); + POP(); + break; + case opStrLen: + *stk = integer(stk->_str().size()); + break; + case opVecLen: + *stk = integer(stk->_vec().size()); + break; + case opStrHi: + *stk = integer(stk->_str().size() - 1); + break; + case opVecHi: + *stk = integer(stk->_vec().size() - 1); + break; + case opStrElem: + *(stk - 1) = (stk - 1)->_str().at(stk->_int()); // *OVR + POPPOD(); + break; + case opVecElem: + *(stk - 1) = (stk - 1)->_vec().at(stk->_int()); // *OVR + POPPOD(); + break; + + case opSubstr: // -{int,void} -int -str +str + { + memint pos = (stk - 1)->_int(); // *OVR + str& s = (stk - 2)->_str(); + s = stk->is_null() ? s.substr(pos) + : s.substr(pos, stk->_int() - pos + 1); // *OVR + POPPOD(); POPPOD(); + } + break; + + case opSubvec: // -{int,void} -int -vec +vec + { + memint pos = (stk - 1)->_int(); // *OVR + varvec& v = (stk - 2)->_vec(); + v = v.subvec(pos, stk->is_null() ? v.size() - pos + : stk->_int() - pos + 1); // *OVR + POPPOD(); POPPOD(); + } + break; + + case opStoreStrElem: // -char -int -ptr -obj + (stk - 2)->_ptr()->_str().replace((stk - 1)->_int(), stk->_uchar()); // *OVR + POPPOD(); POPPOD(); POPPOD(); POP(); + break; + case opStoreVecElem: // -var -int -ptr -obj + (stk - 2)->_ptr()->_vec().replace((stk - 1)->_int(), *stk); // *OVR + POP(); POPPOD(); POPPOD(); POP(); + break; + case opDelStrElem: // -int -ptr -obj + (stk - 1)->_ptr()->_str().erase(stk->_int(), 1); // *OVR + POPPOD(); POPPOD(); POP(); + break; + case opDelVecElem: // -int -ptr -obj + (stk - 1)->_ptr()->_vec().erase(stk->_int()); // *OVR + POPPOD(); POPPOD(); POP(); + break; + + case opDelSubstr: // -{int,void} -int -ptr -obj + { + memint pos = (stk - 1)->_int(); // *OVR + str& s = (stk - 2)->_ptr()->_str(); + s.erase(pos, stk->is_null() ? s.size() - pos : stk->_int() - pos + 1); // *OVR + POPPOD(); POPPOD(); POPPOD(); POP(); + } + break; + + case opDelSubvec: // -{int,void} -int -ptr -obj + { + memint pos = (stk - 1)->_int(); // *OVR + varvec& v = (stk - 2)->_ptr()->_vec(); + v.erase(pos, stk->is_null() ? v.size() - pos : stk->_int() - pos + 1); // *OVR + POPPOD(); POPPOD(); POPPOD(); POP(); + } + break; + + case opStrIns: // -{char,str} -int -ptr -obj + { + str& s = (stk - 2)->_ptr()->_str(); + memint pos = (stk - 1)->_int(); // *OVR + if (stk->is_str()) + { s.insert(pos, stk->_str()); POP(); } + else // is_uchar() + { s.insert(pos, stk->_uchar()); POPPOD(); } + } + POPPOD(); POPPOD(); POP(); + break; + + case opVecIns: // -{var,vec} -int -ptr -obj + { + varvec& v = (stk - 2)->_ptr()->_vec(); + memint pos = (stk - 1)->_int(); // *OVR + if (stk->is_vec()) + v.insert(pos, stk->_vec()); + else + v.insert(pos, *stk); + } + POP(); POPPOD(); POPPOD(); POP(); + break; + + case opSubstrReplace: // -str -{int,void} -int -ptr -obj + { + str& s = (stk - 3)->_ptr()->_str(); + memint pos = (stk - 2)->_int(); // *OVR + s.replace(pos, + (stk - 1)->is_null() ? s.size() - pos : + (stk - 1)->_int() - pos + 1, + stk->_str()); + } + POP(); POPPOD(); POPPOD(); POPPOD(); POP(); + break; + + case opSubvecReplace: // -vec -{int,void} -int -ptr -obj + { + varvec& v = (stk - 3)->_ptr()->_vec(); + memint pos = (stk - 2)->_int(); // *OVR + v.replace(pos, + (stk - 1)->is_null() ? v.size() - pos : + (stk - 1)->_int() - pos + 1, + stk->_vec()); + } + POP(); POPPOD(); POPPOD(); POPPOD(); POP(); + break; + + // In-place vector concat + case opChrCatAssign: + (stk - 1)->_ptr()->_str().push_back(stk->_uchar()); + POPPOD(); POP(); POP(); + break; + case opStrCatAssign: + (stk - 1)->_ptr()->_str().append(stk->_str()); + POP(); POP(); POP(); + break; + case opVarCatAssign: + (stk - 1)->_ptr()->_vec().push_back(*stk); + POP(); POP(); POP(); + break; + case opVecCatAssign: + (stk - 1)->_ptr()->_vec().append(stk->_vec()); + POP(); POP(); POP(); + break; + // *OVR: integer type is reduced to memint in some configs + + + // --- 7. SETS ------------------------------------------------------- + case opElemToSet: + { varset s; s.push_back(*stk); *stk = s; } + break; + case opSetAddElem: + (stk - 1)->_set().find_insert(*stk); + POP(); + break; + case opElemToByteSet: + *stk = ordset(stk->_int()); + break; + case opRngToByteSet: + *(stk - 1) = ordset((stk - 1)->_int(), stk->_int()); + POPPOD(); + break; + case opByteSetAddElem: + (stk - 1)->_ordset().find_insert(stk->_int()); + POPPOD(); + break; + case opByteSetAddRng: + (stk - 2)->_ordset().find_insert((stk - 1)->_int(), stk->_int()); + POPPOD(); + POPPOD(); + break; + case opInSet: + *(stk - 1) = int(stk->_set().find(*(stk - 1))); + POP(); + break; + case opInByteSet: + (stk - 1)->_int() = int(stk->_ordset().find((stk - 1)->_int())); + POP(); + break; + case opInBounds: + stk->_int() = int(ADV(Ordinal*)->isInRange(stk->_int())); + break; + case opInRange: + (stk - 1)->_int() = stk->_range().contains((stk - 1)->_int()); + POP(); + break; + case opRangeLo: + CHKPTR(stk->_anyobj()); + *stk = stk->_range().left(); + break; + case opRangeHi: + CHKPTR(stk->_anyobj()); + *stk = stk->_range().right(); + break; + case opInRange2: + { + integer i = (stk - 2)->_int(); + (stk - 2)->_int() = int(i >= (stk - 1)->_int() && i <= stk->_int()); + POPPOD(); POPPOD(); + } + break; + case opSetElem: + POP(); POP(); PUSH0(); // see CodeGen::loadContainerElem() + break; + case opByteSetElem: + POPPOD(); POP(); PUSH0(); + break; + case opDelSetElem: // -var -ptr -obj + (stk - 1)->_ptr()->_set().find_erase(*stk); + POP(); POPPOD(); POP(); + break; + case opDelByteSetElem: // -int -ptr -obj + (stk - 1)->_ptr()->_ordset().find_erase(stk->_int()); + POPPOD(); POPPOD(); POP(); + break; + case opSetLen: + *stk = integer(stk->_set().size()); + break; + case opSetKey: + *(stk - 1) = (stk - 1)->_set().at(stk->_int()); // *OVR + POPPOD(); + break; + + + // --- 8. DICTIONARIES ----------------------------------------------- + case opPairToDict: + *(stk - 1) = vardict(*(stk - 1), *stk); + POP(); + break; + case opDictAddPair: + (stk - 2)->_dict().find_replace(*(stk - 1), *stk); + POP(); + POP(); + break; + case opPairToByteDict: + { + integer i = (stk - 1)->_int(); + INITAT(stk - 1, varvec()); + byteDictReplace((stk - 1)->_vec(), i, *stk); + POP(); + } + break; + case opByteDictAddPair: + byteDictReplace((stk - 2)->_vec(), (stk - 1)->_int(), *stk); + POP(); + POPPOD(); + break; + case opDictElem: + { + const variant* v = (stk - 1)->_dict().find(*stk); + POP(); + if (v) + *stk = *v; // potentially dangerous if dict has refcount=1, which it shouldn't + else + container::keyerr(); + } + break; + case opByteDictElem: + { + integer i = stk->_int(); + POPPOD(); + if (i < 0 || i >= stk->_vec().size()) + container::keyerr(); + const variant& v = stk->_vec()[i]; + if (v.is_null()) + container::keyerr(); + *stk = v; // same as for opDictElem + } + break; + case opInDict: + *(stk - 1) = int(stk->_dict().find_key(*(stk - 1))); + POP(); + break; + case opInByteDict: + { + integer i = (stk - 1)->_int(); + const varvec& v = stk->_vec(); + (stk - 1)->_int() = int(i >= 0 && i < v.size() && !v[i].is_null()); + POP(); + } + break; + case opStoreDictElem: // -var -var -ptr -obj + (stk - 2)->_ptr()->_dict().find_replace(*(stk - 1), *stk); + POP(); POP(); POPPOD(); POP(); + break; + case opStoreByteDictElem: // -var -int -ptr -obj + byteDictReplace((stk - 2)->_ptr()->_vec(), (stk - 1)->_int(), *stk); + POP(); POPPOD(); POPPOD(); POP(); + break; + case opDelDictElem: // -var -ptr -obj + (stk - 1)->_ptr()->_dict().find_erase(*stk); + POP(); POPPOD(); POP(); + break; + case opDelByteDictElem: // -int -ptr -obj + byteDictDelete((stk - 1)->_ptr()->_vec(), stk->_int()); + POPPOD(); POPPOD(); POP(); + break; + case opDictLen: + *stk = integer(stk->_dict().size()); + break; + case opDictElemByIdx: + *(stk - 1) = (stk - 1)->_dict().value(stk->_int()); // *OVR + POPPOD(); + break; + case opDictKeyByIdx: + *(stk - 1) = (stk - 1)->_dict().key(stk->_int()); // *OVR + POPPOD(); + break; + + // --- 9. FIFOS ------------------------------------------------------ + case opElemToFifo: // used in the fifo ctor <...> + { + Fifo* t = ADV(Fifo*); + objptr f = new memfifo(t, t->isByteFifo()); + if (f->is_char_fifo()) + { f->enq_char(stk->_uchar()); POPPOD(); } + else + INITPOP(f->enq_var()); + PUSH(f.get()); + } + break; + case opFifoEnqChar: + (stk - 1)->_fifo()->enq_char(stk->_uchar()); + POPPOD(); + break; + case opFifoEnqVar: + INITPOP((stk - 1)->_fifo()->enq_var()); + break; + case opFifoEnqChars: + (stk - 1)->_fifo()->enq(stk->_str()); + POP(); + break; + case opFifoEnqVars: + (stk - 1)->_fifo()->enq(stk->_vec()); + POP(); + break; + case opFifoDeqChar: + *stk = stk->_fifo()->get(); + break; + case opFifoDeqVar: + { + variant f; + INITPOP(&f); + f._fifo()->deq_var(++stk); + } + break; + case opFifoCharToken: + *(stk - 1) = (stk - 1)->_fifo()->token(stk->_ordset().get_charset()); + POP(); + break; + + // --- 10. ARITHMETIC ------------------------------------------------ +#define BINARY_INT(op) { (stk - 1)->_int() op stk->_int(); POPPOD(); } +#define UNARY_INT(op) { stk->_int() = op stk->_int(); } +#define INPLACE_INT(op) { (stk - 1)->_ptr()->_int() op stk->_int(); \ + POPPOD(); POPPOD(); POP(); } + + // TODO: range checking in debug mode + case opAdd: BINARY_INT(+=); break; + case opSub: BINARY_INT(-=); break; + case opMul: BINARY_INT(*=); break; + case opDiv: BINARY_INT(/=); break; + case opMod: BINARY_INT(%=); break; + case opBitAnd: BINARY_INT(&=); break; + case opBitOr: BINARY_INT(|=); break; + case opBitXor: BINARY_INT(^=); break; + case opBitShl: BINARY_INT(<<=); break; + case opBitShr: BINARY_INT(>>=); break; + case opNeg: UNARY_INT(-); break; + case opBitNot: UNARY_INT(~); break; + case opNot: UNARY_INT(!); break; + case opAddAssign: INPLACE_INT(+=); break; + case opSubAssign: INPLACE_INT(-=); break; + case opMulAssign: INPLACE_INT(*=); break; + case opDivAssign: INPLACE_INT(/=); break; + case opModAssign: INPLACE_INT(%=); break; + + // --- 11. BOOLEAN --------------------------------------------------- + case opCmpOrd: + (stk - 1)->_int() -= stk->_int(); + POPPOD(); + break; + case opCmpStr: + *(stk - 1) = integer((stk - 1)->_str().compare(stk->_str())); + POP(); + break; + case opCmpVar: + *(stk - 1) = int(*(stk - 1) == *stk) - 1; + POP(); + break; + + case opEqual: stk->_int() = stk->_int() == 0; break; + case opNotEq: stk->_int() = stk->_int() != 0; break; + case opLessThan: stk->_int() = stk->_int() < 0; break; + case opLessEq: stk->_int() = stk->_int() <= 0; break; + case opGreaterThan: stk->_int() = stk->_int() > 0; break; + case opGreaterEq: stk->_int() = stk->_int() >= 0; break; + + case opCaseOrd: stk->_int() = int(stk->_int() == (stk - 1)->_int()); break; + case opCaseRange: + { + integer i = (stk - 2)->_int(); + (stk - 1)->_int() = int(i >= (stk - 1)->_int() && i <= stk->_int()); + POPPOD(); + } + break; + case opCaseStr: *stk = int(stk->_str() == (stk - 1)->_str()); break; + case opCaseVar: *stk = int(*stk == *(stk - 1)); break; + + // Loop helpers + case opStkVarGt: *stk = int((basep + ADV(uchar))->_int() > stk->_int()); break; + case opStkVarGe: *stk = int((basep + ADV(uchar))->_int() >= stk->_int()); break; + + + // --- 12. JUMPS, CALLS ---------------------------------------------- + case opJump: + { + // Beware of strange behavior of the GCC optimizer: this should be done in 2 steps + jumpoffs offs = ADV(jumpoffs); + ip += offs; + } + break; + case opJumpFalse: + { + jumpoffs offs = ADV(jumpoffs); + if (!stk->_int()) + ip += offs; + POP(); + } + break; + case opJumpTrue: + { + jumpoffs offs = ADV(jumpoffs); + if (stk->_int()) + ip += offs; + POP(); + } + break; + case opJumpAnd: + { + jumpoffs offs = ADV(jumpoffs); + if (!stk->_int()) + ip += offs; + else + POP(); + } + break; + case opJumpOr: + { + jumpoffs offs = ADV(jumpoffs); + if (stk->_int()) + ip += offs; + else + POP(); + } + break; + + // --- Function calls + case opChildCall: + callobj = innerobj; +nearCall: + callds = dataseg; +farCall: + callee = ADV(State*); + popArgCount = callee->prototype->popArgCount; +anyCall: + if (callee->isExternal()) + callee->externFunc(&ax, callobj, stk + 1); + else + runRabbitRun(&ax, callds, callobj, stk + 1, callee->getCodeSeg()); + while (popArgCount--) + POP(); + if (callee->prototype->returns) + { + INITPUSH(&ax); // no need for the variant ctor, just copy + INITAT(&ax, variant::VOID); + } + break; + + case opSiblingCall: + callobj = outerobj; + goto nearCall; + + case opStaticCall: + callobj = NULL; + callds = NULL; + goto farCall; + + case opMethodCall: + callee = ADV(State*); + callds = dataseg; +farMethodCall: + callobj = (stk - callee->prototype->popArgCount)->_stateobj(); + popArgCount = callee->prototype->popArgCount + 1; + goto anyCall; + + case opFarMethodCall: + callee = ADV(State*); + callds = dataseg->member(ADV(uchar))->_stateobj(); + goto farMethodCall; + + case opCall: + { + funcptr* callfp = (stk - ADV(uchar))->_funcptr(); + CHKPTR(callfp); + callee = callfp->state; + popArgCount = callee->prototype->popArgCount + 1; + callds = callfp->dataseg; + callobj = callfp->outer; + } + goto anyCall; + + + // --- 13. DEBUGGING, DIAGNOSTICS ------------------------------------ + case opLineNum: + ADV(integer); + break; + case opAssert: + { + integer linenum = ADV(integer); + str& cond = ADV(str); + if (!stk->_int()) + failAssertion(state->parentModule->filePath, linenum, cond); + POPPOD(); + } + break; + case opDump: + { + str& expr = ADV(str); + dumpVar(expr, *stk, ADV(Type*)); + POP(); + } + break; + + case opInv: // silence the opcode checkers (opcodes.sh in particular) + default: + invOpcode(uchar(*(ip - 1))); + break; + } + goto loop; +exit: + + if (state && !state->isCtor) + { + if (innerobj && !innerobj->isunique()) + localObjErr(); +#ifdef DEBUG + for (memint i = state->varCount; i--; ) + POP(); +#endif + } +#ifndef DEBUG + while (stk >= basep) + POP(); +#endif + assert(stk == basep - 1); + } + catch(exception&) + { + while (stk >= basep) + POP(); + throw; + } +} + + +eexit::eexit(const variant& r) throw(): exception(), result(r) {} +eexit::~eexit() throw() { } +const char* eexit::what() throw() { return "Exit called"; } + + +Type* CodeGen::runConstExpr(rtstack& constStack, variant& result) +{ + Type* resultType = stkPop(); + addOp(opStoreResultVar); + end(); + + runRabbitRun(&result, NULL, NULL, constStack.base(), &codeseg); + + return resultType; +} + + +// --- Execution Context --------------------------------------------------- // + + +ModuleInstance::ModuleInstance(Module* m) throw() + : symbol(m->getName()), module(m), obj() { } + +ModuleInstance::~ModuleInstance() throw() + { } + +void ModuleInstance::run(Context* context, rtstack& stack) +{ + assert(module->isComplete()); + + // Assign module vars. This allows to generate code that accesses module + // static data by variable id, so that code is context-independent + for (memint i = 0; i < module->usedModuleVars.size(); i++) + { + InnerVar* v = module->usedModuleVars[i]; + stateobj* o = context->getModuleObject(v->getModuleType()); + *obj->member(v->id) = o; + } + + // Run module initialization or main code + variant result = obj.get(); + runRabbitRun(&result, obj, obj, stack.base(), module->getCodeSeg()); +} + + +void ModuleInstance::finalize() +{ + if (!obj.empty()) + { + try + { + obj->collapse(); // destroy possible circular references first + obj.clear(); // now free the object itself + } + catch (exception&) + { + fatal(0x5006, "Exception in destructor"); + } + } +} + + +CompilerOptions::CompilerOptions() throw() + : enableDump(true), enableAssert(true), lineNumbers(true), + vmListing(true), compileOnly(false), stackSize(8192) + { modulePath.push_back("./"); } + + +void CompilerOptions::setDebugOpts(bool flag) +{ + enableDump = flag; + enableAssert = flag; + lineNumbers = flag; + vmListing = flag; +} + + +static str moduleNameFromFileName(const str& n) + { return remove_filename_path(remove_filename_ext(n)); } + + +Context::Context() + : queenBeeInst(addModule(queenBee)) { } + + +Context::~Context() + { instances.release_all(); } + + +ModuleInstance* Context::addModule(Module* m) +{ + if (m->getName().empty()) + fatal(0x5007, "Empty module name"); + objptr inst = new ModuleInstance(m); + if (!instTable.add(inst)) + throw emessage("Duplicate module name: " + inst->name); + instances.push_back(inst->grab()); + return inst; +} + + +Module* Context::loadModule(const str& filePath) +{ + // TODO: store the current file name in a named const, say __FILE__ + str modName = moduleNameFromFileName(filePath); + objptr m = new Module(modName, filePath); + addModule(m); + Compiler compiler(*this, m, new intext(NULL, filePath)); + compiler.compileModule(); + if (options.enableDump || options.vmListing) + dump(remove_filename_ext(filePath) + ".lst"); + return m; +} + + +str Context::lookupSource(const str& modName) +{ + for (memint i = 0; i < options.modulePath.size(); i++) + { + str t = options.modulePath[i] + "/" + modName + SOURCE_EXT; + if (isFile(t.c_str())) + return t; + } + throw emessage("Module not found: " + modName); +} + + +Module* Context::getModule(const str& modName) +{ + // TODO: find a moudle by full path, not just name (hash by path/name?) + // TODO: to have a global cache of compiled modules, not just within the context + ModuleInstance* inst = instTable.find(modName); + if (inst != NULL) + return inst->module; + else + return loadModule(lookupSource(modName)); +} + + +stateobj* Context::getModuleObject(Module* m) +{ + // TODO: Linear search, can be improved later + for (memint i = 0; i < instances.size(); i++) + if (instances[i]->module == m) + return instances[i]->obj; + fatal(0x5003, "Module not found"); + return NULL; +} + + +void Context::instantiateModules() +{ + for (memint i = 0; i < instances.size(); i++) + { + ModuleInstance* inst = instances[i]; + if (!inst->module->isComplete()) + fatal(0x5004, "Module not compiled"); + inst->obj = inst->module->newInstance(); + } +} + + +void Context::clear() +{ + for (memint i = instances.size(); i--; ) + instances[i]->finalize(); +} + + +void Context::dump(const str& listingPath) +{ + outtext stm(NULL, listingPath); + stm << "#FLAG INT_SIZE " << sizeof(integer) * 8 << endl; + stm << "#FLAG PTR_SIZE " << sizeof(void*) * 8 << endl; + for (memint i = 0; i < instances.size(); i++) + instances[i]->module->dump(stm); +} + + +variant Context::execute() +{ + if (options.compileOnly) + return variant(); + + // Now that all modules are compiled and their dataseg sizes are known, we can + // instantiate the objects + instantiateModules(); + + // Run init code segments for all modules; the last one is the main program + rtstack stack(options.stackSize); + try + { + for (memint i = 0; i < instances.size(); i++) + instances[i]->run(this, stack); + } + catch (eexit& e) + { + // Program exit variable (not necessarily int, can be anything) + *queenBeeInst->obj->member(queenBee->resultVar->id) = e.result; + } + catch (exception&) + { + clear(); + throw; + } + + variant result = *queenBeeInst->obj->member(queenBee->resultVar->id); + clear(); + return result; +} + + +void initVm() { if (opMaxCode > 255) fatal(0x5001, "Opcodes > 255"); } +void doneVm() { } + diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 0000000..55f442c --- /dev/null +++ b/src/vm.h @@ -0,0 +1,641 @@ +#ifndef __VM_H +#define __VM_H + +#include "common.h" +#include "runtime.h" +#include "parser.h" +#include "typesys.h" + + +enum OpCode +{ + // NOTE: the relative order of many of these instructions in their groups is significant + // NOTE: no instruction should push more than one value onto the stack + // regardless of the number of pop's -- required for some optimization + // techniques (e.g. see CodeGen::undoSubexpr()) + + // --- 1. MISC CONTROL + opInv0, // -- help detect invalid code: don't execute 0 + opEnd, // end execution and return + opExit, // throws eexit() + + // --- 2. CONST LOADERS + // --- begin primary loaders (it's important that all this kind of loaders + // be grouped together or at least recognized by isPrimaryLoader()) + opLoadTypeRef, // [Type*] +obj + opLoadNull, // +null + opLoad0, // +int + opLoad1, // +int + opLoadByte, // [int:u8] +int + opLoadOrd, // [int] +int + opLoadStr, // [str] +str + opLoadEmptyVar, // [variant::Type:u8] + var + opLoadConstObj, // [variant::Type:u8, object*] +var + opLoadOuterObj, // +stateobj + opLoadDataSeg, // +module-obj + // opLoadInnerObj, // equivalent to opLoadStkVar 'result' + opLoadOuterFuncPtr, // [State*] +funcptr -- see also opMkFuncPtr + opLoadInnerFuncPtr, // [State*] +funcptr + opLoadStaticFuncPtr,// [State*] +funcptr + opLoadFuncPtrErr, // [State*] +funcptr + opLoadCharFifo, // [Fifo*] +fifo + opLoadVarFifo, // [Fifo*] +fifo + + // --- 3. DESIGNATOR LOADERS + // --- begin grounded loaders + opLoadInnerVar, // [inner.idx:u8] +var + opLoadOuterVar, // [outer.idx:u8] +var + opLoadStkVar, // [stk.idx:u8] +var + opLoadArgVar, // [arg.idx:u8] +var + opLoadPtrVar, // [arg.idx:u8] +var + opLoadResultVar, // +var + opLoadVarErr, // placeholder for var loaders to generate an error + // --- end primary loaders + opLoadMember, // [stateobj.idx:u8] -stateobj +var + opDeref, // -ref +var + // --- end grounded loaders + + opLeaInnerVar, // [inner.idx:u8] +obj(0) +ptr + opLeaOuterVar, // [outer.idx:u8] +obj(0) +ptr + opLeaStkVar, // [stk.idx:u8] +obj(0) +ptr + opLeaArgVar, // [arg.idx:u8] +obj(0) +ptr + opLeaPtrVar, // [arg.idx:u8] +obj +ptr + opLeaResultVar, // +obj(0) +ptr + opLeaMember, // [stateobj.idx:u8] -stateobj +stateobj +ptr + opLeaRef, // -ref +ref +ptr + + // --- 4. STORERS + opInitInnerVar, // [inner.idx:u8] -var + // opInitStkVar, // [stk.idx:u8] -var + // --- begin grounded storers + opStoreInnerVar, // [inner.idx:u8] -var + opStoreOuterVar, // [outer.idx:u8] -var + opStoreStkVar, // [stk.idx:u8] -var + opStoreArgVar, // [arg.idx:u8] -var + opStorePtrVar, // [arg.idx:u8] -var + opStoreResultVar, // -var + opStoreMember, // [stateobj.idx:u8] -var -stateobj + opStoreRef, // -var -ref + // --- end grounded storers + opIncStkVar, // [stk.idx:u8] -- for loop helper + + // --- 5. DESIGNATOR OPS, MISC + opMkRange, // -int -int +range -- currently used only in const expressions + opMkRef, // -var +ref + opMkFuncPtr, // [State*] -obj +funcptr -- args for opMk*FuncPtr should match respective caller ops + opMkFarFuncPtr, // [State*, datasegidx:u8] -obj +funcptr + opNonEmpty, // -var +bool + opPop, // -var + opPopPod, // -int + opCast, // [Type*] -var +var + opIsType, // [Type*] -var +bool + opToStr, // [Type*] -var +str + + // --- 6. STRINGS, VECTORS + opChrToStr, // -char +str + opChrCat, // -char -str +str + opStrCat, // -str -str +str + opVarToVec, // -var +vec + opVarCat, // -var -vec +vec + opVecCat, // -vec -vec +vec + opStrLen, // -str +int + opVecLen, // -str +int + opStrHi, // -str +int + opVecHi, // -str +int + opStrElem, // -int -str +char + opVecElem, // -int -vec +var + opSubstr, // -{int,void} -int -str +str + opSubvec, // -{int,void} -int -vec +vec + opStoreStrElem, // -char -int -ptr -obj + opStoreVecElem, // -var -int -ptr -obj + opDelStrElem, // -int -ptr -obj + opDelVecElem, // -int -ptr -obj + opDelSubstr, // -{int,void} -int -ptr -obj + opDelSubvec, // -{int,void} -int -ptr -obj + opStrIns, // -char -int -ptr -obj + opVecIns, // -var -int -ptr -obj + opSubstrReplace, // -str -{int,void} -int -ptr -obj + opSubvecReplace, // -vec -{int,void} -int -ptr -obj + // In-place vector concat + opChrCatAssign, // -char -ptr -obj + opStrCatAssign, // -str -ptr -obj + opVarCatAssign, // -var -ptr -obj + opVecCatAssign, // -vec -ptr -obj + + // --- 7. SETS + opElemToSet, // -var +set + opSetAddElem, // -var -set + set + opElemToByteSet, // -int +set + opRngToByteSet, // -int -int +set + opByteSetAddElem, // -int -set +set + opByteSetAddRng, // -int -int -set +set + opInSet, // -set -var +bool + opInByteSet, // -set -int +bool + opInBounds, // [Ordinal*] -int +bool + opInRange, // -range -int +bool + opRangeLo, // -range +int + opRangeHi, // -range +int + opInRange2, // -int -int -int +bool + opSetElem, // -var -set +void + opByteSetElem, // -int -set +void + opDelSetElem, // -var -ptr -obj + opDelByteSetElem, // -int -ptr -obj + opSetLen, // -set +int + opSetKey, // -int -set +var + + // --- 8. DICTIONARIES + opPairToDict, // -var -var +dict + opDictAddPair, // -var -var -dict +dict + opPairToByteDict, // -var -int +vec + opByteDictAddPair, // -var -int -vec +vec + opDictElem, // -var -dict +var + opByteDictElem, // -int -dict +var + opInDict, // -var -dict +bool + opInByteDict, // -int -dict +bool + opStoreDictElem, // -var -var -ptr -obj + opStoreByteDictElem,// -var -int -ptr -obj + opDelDictElem, // -var -ptr -obj + opDelByteDictElem, // -int -ptr -obj + opDictLen, // -dict +int + opDictElemByIdx, // -int -dict +var + opDictKeyByIdx, // -int -dict +var + + // --- 9. FIFOS + // opLoadCharFifo and opLoadVarFifo are primary loaders, defined in section [2]. + opElemToFifo, // [Fifo*] -var +fifo + opFifoEnqChar, // -char -fifo +fifo + opFifoEnqVar, // -var -fifo +fifo + opFifoEnqChars, // -str -fifo +fifo + opFifoEnqVars, // -vec -fifo +fifo + opFifoDeqChar, // -fifo +char + opFifoDeqVar, // -fifo +char + opFifoCharToken, // -charset -fifo +str + + // --- 10. ARITHMETIC + opAdd, // -int -int +int + opSub, // -int -int +int + opMul, // -int -int +int + opDiv, // -int -int +int + opMod, // -int -int +int + opBitAnd, // -int -int +int + opBitOr, // -int -int +int + opBitXor, // -int -int +int + opBitShl, // -int -int +int + opBitShr, // -int -int +int + // Arithmetic unary + opNeg, // -int +int + opBitNot, // -int +int + opNot, // -bool +bool + // Arithmetic in-place, in sync with tokAddAssign etc + opAddAssign, // -int -ptr -obj + opSubAssign, // -int -ptr -obj + opMulAssign, // -int -ptr -obj + opDivAssign, // -int -ptr -obj + opModAssign, // -int -ptr -obj + + // --- 11. BOOLEAN + opCmpOrd, // -int -int +{-1,0,1} + opCmpStr, // -str -str +{-1,0,1} + opCmpVar, // -var -var +{0,1} + // see isCmpOp() + opEqual, // -int +bool + opNotEq, // -int +bool + opLessThan, // -int +bool + opLessEq, // -int +bool + opGreaterThan, // -int +bool + opGreaterEq, // -int +bool + // case label helpers + opCaseOrd, // -int -int +int +bool + opCaseRange, // -int -int -int +int +bool + opCaseStr, // -str -str +str +bool + opCaseVar, // -var -var +var +bool + // for loop helpers + opStkVarGt, // [stk.idx:u8] -int +bool + opStkVarGe, // [stk.idx:u8] -int +bool + + // --- 12. JUMPS, CALLS + // Jumps; [dst] is a relative 16-bit offset + opJump, // [dst:s16] + opJumpFalse, // [dst:s16] -bool + opJumpTrue, // [dst:s16] -bool + // Short bool evaluation: pop if jump, leave it otherwise + opJumpAnd, // [dst:s16] (-)bool + opJumpOr, // [dst:s16] (-)bool + + // don't forget isCaller() + opChildCall, // [State*] -var -var ... {+var} + opSiblingCall, // [State*] -var -var ... {+var} + opStaticCall, // [State*] -var -var ... {+var} + opMethodCall, // [State*] -var -var -obj ... {+var} + opFarMethodCall, // [State*, datasegidx:u8] -var -var -obj ... {+var} + opCall, // [argcount:u8] -var -var -funcptr {+var} + + // --- 13. DEBUGGING, DIAGNOSTICS + opLineNum, // [linenum:int] + opAssert, // [linenum:int, cond:str] -bool + opDump, // [expr:str, type:Type*] -var + + opInv, + opMaxCode = opInv, +}; + + +inline bool isPrimaryLoader(OpCode op) + { return (op >= opLoadTypeRef && op <= opLoadVarErr); } + +inline bool isGroundedLoader(OpCode op) + { return op >= opLoadInnerVar && op <= opDeref; } + +inline bool isCmpOp(OpCode op) + { return op >= opEqual && op <= opGreaterEq; } + +inline bool isJump(OpCode op) + { return op >= opJump && op <= opJumpOr; } + +inline bool isBoolJump(OpCode op) + { return op >= opJumpFalse && op <= opJumpOr; } + +inline bool isCaller(OpCode op) + { return op >= opChildCall && op <= opCall; } + +inline bool isDiscardable(OpCode op) + { return isCaller(op) || op == opFifoEnqChar || op == opFifoEnqVar; } + +inline bool hasTypeArg(OpCode op); + + +// --- OpCode Info + + +enum OpArgType + { argNone, + argType, argState, argFarState, argFifo, // order is important, see hasTypeArg() + argUInt8, argInt, argStr, argVarType8, argVarTypeObj, + argInnerIdx, argOuterIdx, argStkIdx, argArgIdx, argStateIdx, + argJump16, argLineNum, argAssert, argDump, + argMax }; + + +extern umemint opArgSizes[argMax]; + + +struct OpInfo +{ + const char* name; + OpCode op; + OpArgType arg; +}; + + +extern OpInfo opTable[]; + + +// --- Code Segment -------------------------------------------------------- // + + +#define DEFAULT_STACK_SIZE 8192 + + +class CodeSeg: public object +{ + typedef rtobject parent; + + str code; + + template + T& atw(memint i) { return *(T*)code.atw(i); } + template + T at(memint i) const { return *(T*)code.data(i); } + +public: + State* const state; + +#ifdef DEBUG + bool closed; +#endif + + // Code gen helpers + template + void append(const T& t) { code.append((const char*)&t, sizeof(T)); } + void append(const str& s) { code.append(s); } + void erase(memint from) { code.resize(from); } + void eraseOp(memint offs); + str cutOp(memint offs); + void replaceOpAt(memint i, OpCode op); + OpCode opAt(memint i) const { return OpCode(at(i)); } + memint opLenAt(memint offs) const; + + jumpoffs& jumpOffsAt(memint i) + { assert(isJump(opAt(i))); return atw(i + 1); } + + Type* typeArgAt(memint i) const; + State* stateArgAt(memint i) const { return cast(typeArgAt(i)); } + + static inline memint opLen(OpCode op) + { assert(op < opMaxCode); return memint(opArgSizes[opTable[op].arg]) + 1; } + + static inline OpArgType opArgType(OpCode op) + { assert(op < opMaxCode); return opTable[op].arg; } + + CodeSeg(State*) throw(); + ~CodeSeg() throw(); + + State* getStateType() const { return state; } + memint size() const { return code.size(); } + bool empty() const { return code.empty(); } + void close(); + + const uchar* getCode() const { assert(closed); return (uchar*)code.data(); } + void dump(fifo& stm) const; // in vminfo.cpp +}; + + +template<> inline void CodeSeg::append(const OpCode& op) + { append(uchar(op)); } + +// Compiler traps +template<> OpCode CodeSeg::at(memint i) const; +template<> OpCode& CodeSeg::atw(memint i); + + +inline CodeSeg* State::getCodeSeg() const { return cast(codeseg.get()); } +inline const uchar* State::getCodeStart() const { return getCodeSeg()->getCode(); } + + +// --- Code Generator ------------------------------------------------------ // + + +class CodeGen: noncopyable +{ +protected: + Module* const module; + State* const codeOwner; + State* const typeReg; // for calling registerType() + CodeSeg& codeseg; + + // TODO: keep at least ordinal consts so that some things can be evaluated + // at compile time or optimized + struct SimStackItem + { + Type* type; + memint loaderOffs; + SimStackItem(Type* t, memint o) + : type(t), loaderOffs(o) { } + }; + + podvec simStack; // exec simulation stack + memint locals; // number of local vars allocated + + template + void add(const T& t) { codeseg.append(t); } + void addOp(OpCode op) { codeseg.append(op); } + void addOp(Type*, OpCode op); + void addOp(Type*, const str& op); + template + void addOp(OpCode op, const T& a) { addOp(op); add(a); } + template + void addOp(Type* t, OpCode op, const T& a) { addOp(t, op); add(a); } + void stkPush(Type*, memint); + Type* stkPop(); + void stkReplaceType(Type* t); // only if the opcode is not changed + Type* stkType() + { return simStack.back().type; } + Type* stkType(memint i) + { return simStack.back(i).type; } + memint stkLoaderOffs() + { return simStack.back().loaderOffs; } + memint stkPrevLoaderOffs() + { return prevLoaderOffs; } + memint stkPrimaryLoaderOffs() + { return primaryLoaders.back(); } + static void error(const char*); + static void error(const str&); + + void _loadVar(Variable*, OpCode); + + memint prevLoaderOffs; + podvec primaryLoaders; + +public: + CodeGen(CodeSeg&, Module* m, State* treg, bool compileTime) throw(); + ~CodeGen() throw(); + + memint getStackLevel() { return simStack.size(); } + void endStatement() { primaryLoaders.clear(); } + bool isCompileTime() { return codeOwner == NULL; } + memint getLocals() { return locals; } + State* getCodeOwner() { return codeOwner; } + Type* getTopType() { return stkType(); } + Type* getTopType(memint i) { return stkType(i); } + void justForget() { stkPop(); } // for branching in the if() function + memint getCurrentOffs() { return codeseg.size(); } + void undoSubexpr(); + Type* undoTypeRef(); + State* undoStateRef(); + Ordinal* undoOrdTypeRef(); + bool canDiscardValue(); + void deinitLocalVar(Variable*); + void deinitFrame(memint baseLevel); // doesn't change the sim stack + void popValue(); + bool tryImplicitCast(Type*); + void implicitCast(Type*, const char* errmsg = NULL); + void explicitCast(Type*); + void isType(Type*); + void mkRange(); + void toStr(); + + bool deref(); + void mkref(); + void nonEmpty(); + void loadTypeRefConst(Type*); + void loadConst(Type* type, const variant&); + void loadTypeRef(Type*); + void loadDefinition(Definition*); + void loadEmptyConst(Type* type); + void loadSymbol(Symbol*); + void loadStkVar(StkVar* var) + { _loadVar(var, opLoadStkVar); } + void loadArgVar(ArgVar* var) + { _loadVar(var, opLoadArgVar); } + void loadPtrVar(PtrVar* var) + { _loadVar(var, opLoadPtrVar); } + void loadResultVar(ResultVar* var); + void loadInnerVar(InnerVar*); + void loadVariable(Variable*); + void loadMember(State*, Symbol*); + void loadMember(State*, State*); + void loadMember(State*, Variable*); + void loadThis(); + void loadDataSeg(); + void storeResultVar() + { stkPop(); addOp(opStoreResultVar); } + + void initStkVar(StkVar*); + void initInnerVar(InnerVar*); + void incStkVar(StkVar*); + + Container* elemToVec(Container*); + void elemCat(); + void cat(); + void loadContainerElem(); + void loadKeyByIndex(); + void loadDictElemByIndex(); + void loadSubvec(); + void length(); + void lo(); + void hi(); + Container* elemToSet(); + Container* rangeToSet(); + void setAddElem(); + void checkRangeLeft(); + void setAddRange(); + Container* pairToDict(); + void checkDictKey(); + void dictAddPair(); + void inCont(); + void inBounds(); + void inRange(); + void inRange2(bool isCaseLabel = false); + void loadFifo(Fifo*); + Fifo* elemToFifo(); + void fifoEnq(); + void fifoPush(); + void fifoDeq(); + void fifoToken(); + + void arithmBinary(OpCode op); + void arithmUnary(OpCode op); + void cmp(OpCode); + void caseCmp(); + void caseInRange() + { inRange2(true); } + void _not(); // 'not' is something reserved, probably only with Apple's GCC + + void stkVarCmp(StkVar*, OpCode); + void stkVarCmpLength(StkVar* var, StkVar* vec); + + void boolJump(memint target, OpCode op); + memint boolJumpForward(OpCode op); + memint jumpForward(OpCode = opJump); + void resolveJump(memint target); + void _jump(memint target, OpCode op = opJump); + void jump(memint target) + { _jump(target, opJump); } + void linenum(integer); + void assertion(integer linenum, const str& cond); + void dumpVar(const str& expr); + void programExit(); + + void toLea(); + void prevToLea(); + str lvalue(); + void assign(const str& storerCode); + str arithmLvalue(Token); + void catLvalue(); + void catAssign(); + str insLvalue(); + void insAssign(const str& storerCode); + void deleteContainerElem(); + + void prolog() { } + void epilog() { end(); } + void _popArgs(FuncPtr*); + void call(FuncPtr*); + void staticCall(State*); + + void end(); + Type* runConstExpr(rtstack& constStack, variant& result); // defined in vm.cpp +}; + + +struct evoidfunc: public exception +{ + evoidfunc() throw(); + ~evoidfunc() throw(); + const char* what() throw(); +}; + + +// --- Execution context --------------------------------------------------- // + + +struct CompilerOptions +{ + bool enableDump; + bool enableAssert; + bool lineNumbers; + bool vmListing; + bool compileOnly; + memint stackSize; + strvec modulePath; + + CompilerOptions() throw(); + void setDebugOpts(bool); +}; + + +class Context; + +class ModuleInstance: public symbol +{ +public: + objptr module; + objptr obj; + ModuleInstance(Module* m) throw(); + ~ModuleInstance() throw(); + void run(Context*, rtstack&); + void finalize(); +}; + + +class Context +{ +protected: + symtbl instTable; + objvec instances; + ModuleInstance* queenBeeInst; + + ModuleInstance* addModule(Module*); + str lookupSource(const str& modName); + void instantiateModules(); + void clear(); + void dump(const str& listingPath); + +public: + CompilerOptions options; + + Context(); + ~Context(); + + Module* getModule(const str& name); // for use by the compiler, "uses" clause + stateobj* getModuleObject(Module*); // for initializing module vars in ModuleInstance::run() + Module* loadModule(const str& filePath); + variant execute(); // after compilation only (loadModule()) +}; + + +// The Virtual Machine. This routine is used for both evaluating const +// expressions at compile time and, obviously, running runtime code. It is +// reenterant and can be launched concurrently in one process as long as +// the arguments are thread safe. It doesn't use any global/static data. +// Besides, code segments never have any relocatble data elements, so that any +// module can be reused in the multithreaded server environment too. + +void runRabbitRun(variant* result, stateobj* dataseg, stateobj* outerobj, + variant* basep, CodeSeg* codeseg); + + +struct eexit: public exception +{ + variant result; + eexit(const variant&) throw(); + ~eexit() throw(); + const char* what() throw(); +}; + + +void initVm(); +void doneVm(); + + +#endif // __VM_H diff --git a/src/vmcodegen.cpp b/src/vmcodegen.cpp new file mode 100644 index 0000000..9dd276a --- /dev/null +++ b/src/vmcodegen.cpp @@ -0,0 +1,1670 @@ + +#include "vm.h" + + +inline bool hasTypeArg(OpCode op) +{ + OpArgType arg = opTable[op].arg; + return arg >= argType && arg <= argFifo; +} + + +inline bool hasStateArg(OpCode op) +{ + OpArgType arg = opTable[op].arg; + return arg == argState || arg == argFarState; +} + + +// --- Code Segment -------------------------------------------------------- // + + +CodeSeg::CodeSeg(State* s) throw() + : object(), state(s) +#ifdef DEBUG + , closed(false) +#endif + { } + + +CodeSeg::~CodeSeg() throw() + { } + + +memint CodeSeg::opLenAt(memint offs) const + { return opLen(opAt(offs)); } + + +void CodeSeg::eraseOp(memint offs) + { code.erase(offs, opLenAt(offs)); } + + +str CodeSeg::cutOp(memint offs) +{ + memint len = opLenAt(offs); + str s = code.substr(offs, len); + code.erase(offs, len); + return s; +} + + +void CodeSeg::replaceOpAt(memint i, OpCode op) +{ + assert(opArgType(opAt(i)) == opArgType(op)); + *code.atw(i) = op; +} + + +Type* CodeSeg::typeArgAt(memint i) const +{ + assert(hasTypeArg(opAt(i))); + return at(i + 1); +} + + +void CodeSeg::close() +{ +#ifdef DEBUG + assert(!closed); + closed = true; +#endif + append(opEnd); +} + + +// --- Code Generator ------------------------------------------------------ // + + +evoidfunc::evoidfunc() throw() { } +evoidfunc::~evoidfunc() throw() { } +const char* evoidfunc::what() throw() { return "Void function called"; } + + +CodeGen::CodeGen(CodeSeg& c, Module* m, State* treg, bool compileTime) throw() + : module(m), codeOwner(c.getStateType()), typeReg(treg), codeseg(c), locals(0), + prevLoaderOffs(-1), primaryLoaders() +{ + assert(treg != NULL); + if (compileTime != (codeOwner == NULL)) + fatal(0x6003, "CodeGen: invalid codeOwner"); +} + + +CodeGen::~CodeGen() throw() + { } + + +void CodeGen::error(const char* msg) + { throw emessage(msg); } + + +void CodeGen::error(const str& msg) + { throw emessage(msg); } + + +void CodeGen::stkPush(Type* type, memint offs) +{ + simStack.push_back(SimStackItem(type, offs)); + OpCode op = codeseg.opAt(offs); + if (isPrimaryLoader(op)) + primaryLoaders.push_back(offs); +} + + +void CodeGen::addOp(Type* type, OpCode op) +{ + memint offs = getCurrentOffs(); + addOp(op); + stkPush(type, offs); +} + + +void CodeGen::addOp(Type* type, const str& code) +{ + memint offs = getCurrentOffs(); + codeseg.append(code); + stkPush(type, offs); +} + + +Type* CodeGen::stkPop() +{ + const SimStackItem& s = simStack.back(); + prevLoaderOffs = s.loaderOffs; + if (!primaryLoaders.empty() && s.loaderOffs < primaryLoaders.back()) + primaryLoaders.pop_back(); + Type* result = s.type; + simStack.pop_back(); + return result; +} + + +void CodeGen::undoSubexpr() +{ + // This works based on the assumption that at any stack level there is a + // corresponding primary loader starting from which all code can be safely + // discarded. I think this should work provided that any instruction + // pushes not more than one value onto the stack (regardless of how many + // it pops off the stack). See also stkPop(). + memint from; + primaryLoaders.pop_back(from); // get & pop + codeseg.erase(from); + simStack.pop_back(); + prevLoaderOffs = -1; +} + + +bool CodeGen::canDiscardValue() + { return isDiscardable(codeseg.opAt(stkLoaderOffs())); } + +void CodeGen::stkReplaceType(Type* t) + { simStack.backw().type = t; } + + +bool CodeGen::tryImplicitCast(Type* to) +{ + Type* from = stkType(); + + if (from == to) + return true; + + if (to->isVariant() || from->canAssignTo(to)) + { + // canAssignTo() should take care of polymorphic typecasts + stkReplaceType(to); + return true; + } + + // Vector elements are automatically converted to vectors when necessary, + // e.g. char -> str + if (to->isVectorOf(from)) + { + elemToVec(PContainer(to)); + return true; + } + + if (from->isNullCont()) + { + undoSubexpr(); + if (to->isAnyFifo()) + loadFifo(PFifo(to)); + else + loadEmptyConst(to); + return true; + } + + if (from->isFuncPtr() && to->isTypeRef()) + { + memint offs = stkLoaderOffs(); + if (hasStateArg(codeseg.opAt(offs))) + { + State* stateType = codeseg.stateArgAt(offs); + undoSubexpr(); + loadTypeRefConst(stateType); + return true; + } + } + + return false; +} + + +void CodeGen::implicitCast(Type* to, const char* errmsg) +{ + // TODO: better error message, something like expected; use Type::dump() + if (!tryImplicitCast(to)) + error(errmsg == NULL ? "Type mismatch" : errmsg); +} + + +void CodeGen::explicitCast(Type* to) +{ + if (tryImplicitCast(to)) + return; + + Type* from = stkType(); + + if (from->isAnyOrd() && to->isAnyOrd()) + stkReplaceType(to); + + else if (from->isVariant()) + { + stkPop(); + addOp(to, opCast, to); + } + + // TODO: better error message with type defs + else + error("Invalid explicit typecast"); +} + + +void CodeGen::isType(Type* to) +{ + Type* from = stkType(); + if (from->canAssignTo(to)) + { + undoSubexpr(); + loadConst(queenBee->defBool, 1); + } + else if (from->isAnyState() || from->isVariant()) + { + stkPop(); + addOp(queenBee->defBool, opIsType, to); + } + else + { + undoSubexpr(); + loadConst(queenBee->defBool, 0); + } +} + + +void CodeGen::mkRange() +{ + Type* left = stkType(2); + if (!left->isAnyOrd()) + error("Non-ordinal range bounds"); + implicitCast(left, "Incompatible range bounds"); + stkPop(); + stkPop(); + addOp(POrdinal(left)->getRangeType(), opMkRange); +} + + +void CodeGen::toStr() + { addOp(queenBee->defStr, opToStr, stkPop()); } + + +void CodeGen::deinitLocalVar(Variable* var) +{ + // TODO: don't generate POPs if at the end of a function in RELEASE mode + assert(var->isStkVar()); + assert(locals == getStackLevel()); + if (var->id != locals - 1) + fatal(0x6002, "Invalid local var id"); + popValue(); + locals--; +} + + +void CodeGen::deinitFrame(memint baseLevel) +{ + memint topLevel = getStackLevel(); + for (memint i = topLevel; i > baseLevel; i--) + { + bool isPod = stkType(topLevel - i + 1)->isPod(); + addOp(isPod ? opPopPod : opPop); + } +} + + +void CodeGen::popValue() +{ + bool isPod = stkPop()->isPod(); + addOp(isPod ? opPopPod : opPop); +} + + +Type* CodeGen::undoTypeRef() +{ + memint offs = stkLoaderOffs(); + if (codeseg.opAt(offs) != opLoadTypeRef) + error("Const type reference expected"); + Type* type = codeseg.typeArgAt(offs); + undoSubexpr(); + return type; +} + + +State* CodeGen::undoStateRef() +{ + Type* type = undoTypeRef(); + if (!type->isAnyState()) + error("State/function type expected"); + return PState(type); +} + + +Ordinal* CodeGen::undoOrdTypeRef() +{ + Type* type = undoTypeRef(); + if (!type->isAnyOrd()) + error("Ordinal type reference expected"); + return POrdinal(type); +} + + +bool CodeGen::deref() +{ + Type* type = stkType(); + if (type->isReference()) + { + type = type->getValueType(); + if (type->isDerefable()) + { + stkPop(); + addOp(type, opDeref); + } + else + notimpl(); + return true; + } + return false; +} + + +void CodeGen::mkref() +{ + Type* type = stkType(); + if (!type->isReference()) + { + if (codeseg.opAt(stkLoaderOffs()) == opDeref) + error("Superfluous automatic dereference"); + if (type->isDerefable()) + { + stkPop(); + addOp(type->getRefType(), opMkRef); + } + else + error("Can't convert to reference"); + } +} + + +void CodeGen::nonEmpty() +{ + Type* type = stkType(); + if (!type->isBool()) + { + stkPop(); + addOp(queenBee->defBool, opNonEmpty); + } +} + + +void CodeGen::loadTypeRefConst(Type* type) +{ + addOp(defTypeRef, opLoadTypeRef, type); +} + + +void CodeGen::loadConst(Type* type, const variant& value) +{ + // NOTE: compound consts should be held by a smart pointer somewhere else + switch(value.getType()) + { + case variant::VOID: + addOp(type, opLoadNull); + return; + case variant::ORD: + { + assert(type->isAnyOrd()); + integer i = value._int(); + if (i == 0) + addOp(type, opLoad0); + else if (i == 1) + addOp(type, opLoad1); + else if (uinteger(i) <= 255) + addOp(type, opLoadByte, i); + else + addOp(type, opLoadOrd, i); + } + return; + case variant::REAL: + notimpl(); + break; + case variant::VARPTR: + break; + case variant::STR: + assert(type->isByteVec()); + addOp(type, opLoadStr, value._str().obj); + return; + case variant::RANGE: + case variant::VEC: + case variant::SET: + case variant::ORDSET: + case variant::DICT: + addOp(type, opLoadConstObj, value.getType()); + add(value._anyobj()); + return; + case variant::REF: + case variant::RTOBJ: + break; + } + error("Can not load constants of this type"); +} + + +void CodeGen::loadTypeRef(Type* type) +{ + if (type->isAnyState()) + { + // A state definition by default is tranformed into a function pointer + // to preserve the object subexpression that may have preceeded it (see + // loadMember(Symbol*). Later though, one of the following may occur: + // (1) it's a function call, then most likely opLoad*FuncPtr will be + // replaced with a op*Call (see call()); (2) typecast is requested to + // TypeRef, in which case the preceeding subexpression is discarded; + // (3) member constant selection or scope override is requested: same + // as (2); or (4) otherwise the function pointer is left "as is". + State* stateType = PState(type); + if (stateType->isStatic()) + { + addOp(stateType->prototype, opLoadStaticFuncPtr, stateType); + } + else if (isCompileTime()) + { + addOp(stateType->prototype, opLoadFuncPtrErr, stateType); + } + else if (stateType->parent == codeOwner->parent) + { + codeOwner->useOutsideObject(); + addOp(stateType->prototype, opLoadOuterFuncPtr, stateType); + } + else if (stateType->parent == codeOwner) + { + codeOwner->useOutsideObject(); // uses dataseg + codeOwner->useInnerObj(); + addOp(stateType->prototype, opLoadInnerFuncPtr, stateType); + } + else if (stateType->parent == codeOwner->parentModule) // near top-level func + { + loadDataSeg(); + stkPop(); + addOp(stateType->prototype, opMkFuncPtr, stateType); + } + // TODO: far call, see loadMember(State*, Symbol*) + else + error("Invalid context for a function pointer"); + } + else + loadTypeRefConst(type); +} + + +void CodeGen::loadDefinition(Definition* def) +{ + Type* aliasedType = def->getAliasedType(); + if (aliasedType) + loadTypeRef(aliasedType); + else + loadConst(def->type, def->value); +} + + +static variant::Type typeToVarType(Type* t) +{ + // TYPEREF, VOID, VARIANT, REF, + // BOOL, CHAR, INT, ENUM, + // NULLCONT, VEC, SET, DICT, + // FIFO, PROTOTYPE, SELFSTUB, STATE + // VOID, ORD, REAL, VARPTR, + // STR, VEC, SET, ORDSET, DICT, REF, RTOBJ + switch (t->typeId) + { + case Type::TYPEREF: + return variant::RTOBJ; + case Type::VOID: + case Type::NULLCONT: + case Type::VARIANT: + return variant::VOID; + case Type::REF: + return variant::REF; + case Type::RANGE: + return variant::RANGE; + case Type::BOOL: + case Type::CHAR: + case Type::INT: + case Type::ENUM: + return variant::ORD; + case Type::VEC: + return t->isByteVec() ? variant::STR : variant::VEC; + case Type::SET: + return t->isByteSet() ? variant::ORDSET : variant::SET; + case Type::DICT: + return t->isByteDict() ? variant::VEC : variant::DICT; + case Type::FUNCPTR: + case Type::FIFO: + case Type::STATE: + case Type::MODULE: + return variant::RTOBJ; + case Type::SELFSTUB: + throw emessage("'self' incomplete"); + } + return variant::VOID; +} + + +void CodeGen::loadEmptyConst(Type* type) + { addOp(type, opLoadEmptyVar, typeToVarType(type)); } + + +void CodeGen::loadSymbol(Symbol* sym) +{ + if (sym->isDef()) + loadDefinition(PDefinition(sym)); + else if (sym->isAnyVar()) + loadVariable(PVariable(sym)); + else + notimpl(); +} + + +void CodeGen::_loadVar(Variable* var, OpCode op) +{ + assert(var->id >= 0 && var->id < 255); + if (isCompileTime()) + // Load an error message generator in case it gets executed; however + // this may be useful in expressions like typeof, where the value + // is not needed: + addOp(var->type, opLoadVarErr); + else + addOp(var->type, op, var->id); +} + + +void CodeGen::loadInnerVar(InnerVar* var) +{ + // In ordinary (non-ctor) functions innerobj may not be available because + // of optimizations, so we use stack reference whenever possible + if (codeOwner && codeOwner->isCtor) + { + // codeOwner->useInnerObj(); -- done in State::State() + _loadVar(var, opLoadInnerVar); + } + else + _loadVar(var, opLoadStkVar); +} + + +void CodeGen::loadResultVar(ResultVar* var) +{ + assert(var->id == 0); + addOp(var->type, isCompileTime() ? opLoadVarErr : opLoadResultVar); +} + + +static void varNotAccessible(const str& name) + { throw emessage("'" + name + "' is not accessible within this context"); } + + +void CodeGen::loadVariable(Variable* var) +{ + assert(var->host != NULL); + if (isCompileTime()) + addOp(var->type, opLoadVarErr); + else if (var->host == codeOwner) + { + if (var->isStkVar()) + loadStkVar(PStkVar(var)); + else if (var->isArgVar()) + loadArgVar(PArgVar(var)); + else if (var->isPtrVar()) + loadPtrVar(PPtrVar(var)); + else if (var->isResultVar()) + loadResultVar(PResultVar(var)); + else if (var->isInnerVar()) + loadInnerVar(PInnerVar(var)); + else + varNotAccessible(var->name); + } + else if (var->isInnerVar() && var->host == codeOwner->parent) + { + codeOwner->useOutsideObject(); + _loadVar(var, opLoadOuterVar); + } + else if (var->isInnerVar() && var->host == module) + { + loadDataSeg(); + loadMember(module, var); + } + else + varNotAccessible(var->name); +} + + +void CodeGen::loadMember(State* hostStateType, Symbol* sym) +{ + assert(hostStateType == stkType()); + if (sym->host != hostStateType) // shouldn't happen + fatal(0x600c, "Invalid member selection"); + if (sym->isAnyVar()) + loadMember(hostStateType, PVariable(sym)); + else if (sym->isDef()) + { + Definition* def = PDefinition(sym); + Type* type = def->getAliasedType(); + if (type && type->isAnyState()) + loadMember(hostStateType, PState(type)); + else + { + undoSubexpr(); + loadDefinition(def); + } + } + else + notimpl(); +} + + +void CodeGen::loadMember(State* hostStateType, State* stateType) +{ + assert(hostStateType == stkType()); + if (stateType->parent != hostStateType) // shouldn't happen + fatal(0x600d, "Invalid member state selection"); + if (stateType->isStatic()) + { + undoSubexpr(); + addOp(stateType->prototype, opLoadStaticFuncPtr, stateType); + } + else if (isCompileTime()) + { + undoSubexpr(); + addOp(stateType->prototype, opLoadFuncPtrErr, stateType); + } + else + { + stkPop(); // host + codeOwner->useOutsideObject(); + Module* targetModule = stateType->parentModule; + if (targetModule == codeOwner->parentModule) // near call + addOp(stateType->prototype, opMkFuncPtr, stateType); + else + { + // For far calls/funcptrs a data segment object should be provided + // as well: we do this by providing the ID of a corresponding + // module instance variable: + InnerVar* moduleVar = codeOwner->parentModule->findUsedModuleVar(targetModule); + if (moduleVar == NULL) + error("Function call impossible within this context"); + addOp(stateType->prototype, opMkFarFuncPtr, stateType); + add(moduleVar->id); + } + } +} + + +void CodeGen::loadMember(State* stateType, Variable* var) +{ + // This variant of loadMember() is called when (1) loading a global/static + // variable which is not accessible other than through the dataseg object, + // or (2) from loadMember(Symbol*) + assert(stateType == stkType()); + if (var->host != stateType || !var->isInnerVar()) + varNotAccessible(var->name); + if (isCompileTime()) + { + undoSubexpr(); + addOp(var->type, opLoadVarErr); + } + else + { + assert(var->id >= 0 && var->id < 255); + stkPop(); + addOp(var->type, opLoadMember, var->id); + } +} + + +void CodeGen::loadThis() +{ + if (isCompileTime()) + error("'this' is not available in const expressions"); + else if (codeOwner->parent && codeOwner->parent->isCtor) + { + codeOwner->useOutsideObject(); + addOp(codeOwner->parent, opLoadOuterObj); + } + else + error("'this' is not available within this context"); +} + + +void CodeGen::loadDataSeg() +{ + if (isCompileTime()) + error("Static data can not be accessed in const expressions"); + codeOwner->useOutsideObject(); + addOp(module, opLoadDataSeg); +} + + +void CodeGen::initStkVar(StkVar* var) +{ + if (var->host != codeOwner) + fatal(0x6005, "initLocalVar(): not my var"); + // Local var simply remains on the stack, so just check the types. + assert(var->id >= 0 && var->id < 255); + assert(locals == getStackLevel() - 1 && var->id == locals); + locals++; + implicitCast(var->type, "Variable type mismatch"); +} + + +void CodeGen::initInnerVar(InnerVar* var) +{ + assert(var->id >= 0 && var->id < 255); + assert(codeOwner); + if (var->host != codeOwner) + fatal(0x6005, "initInnerVar(): not my var"); + implicitCast(var->type, "Variable type mismatch"); + if (codeOwner->isCtor) + { + // codeOwner->useInnerObj(); -- done in State::State() + stkPop(); + addOp(opInitInnerVar, var->id); + } + else + { + assert(getStackLevel() - 1 == var->id); + locals++; + // addOp(opInitStkVar, var->id); + } +} + + +void CodeGen::incStkVar(StkVar* var) +{ + assert(var->id >= 0 && var->id < 255); + addOp(opIncStkVar, var->id); +} + + +void CodeGen::loadContainerElem() +{ + // This is square brackets op - can be string, vector, array or dictionary. + OpCode op = opInv; + Type* contType = stkType(2); + if (contType->isAnyVec()) + { + implicitCast(queenBee->defInt, "Vector index must be integer"); + op = contType->isByteVec() ? opStrElem : opVecElem; + } + else if (contType->isAnyDict()) + { + implicitCast(PContainer(contType)->index, "Dictionary key type mismatch"); + op = contType->isByteDict() ? opByteDictElem : opDictElem; + } + else if (contType->isAnySet()) + { + // Selecting a set element thorugh [] returns void, because that's the + // element type for sets. However, [] selection is used with operator del, + // that's why we need the opcode opSetElem, which actually does nothing. + // (see CodeGen::deleteContainerElem()) + implicitCast(PContainer(contType)->index, "Set element type mismatch"); + op = contType->isByteSet() ? opByteSetElem : opSetElem; + } + else + error("Vector/dictionary/set expected"); + stkPop(); + stkPop(); + addOp(PContainer(contType)->elem, op); +} + + +void CodeGen::loadKeyByIndex() +{ + // For non-byte dicts and sets, used internally by the for loop parser + Type* contType = stkType(2); + if (!stkType()->isAnyOrd()) + fatal(0x6008, "loadContainerElemByIndex(): invalid index"); + stkPop(); + stkPop(); + if (contType->isAnyDict() && !contType->isByteDict()) + addOp(PContainer(contType)->index, opDictKeyByIdx); + else if (contType->isAnySet() && !contType->isByteSet()) + addOp(PContainer(contType)->index, opSetKey); + else + fatal(0x6009, "loadContainerElemByIndex(): invalid container type"); +} + + +void CodeGen::loadDictElemByIndex() +{ + // Used internally by the for loop parser + Type* contType = stkType(2); + if (!stkType()->isAnyOrd()) + fatal(0x6008, "loadContainerElemByIndex(): invalid index"); + stkPop(); + stkPop(); + if (contType->isAnyDict() && !contType->isByteDict()) + addOp(PContainer(contType)->elem, opDictElemByIdx); + else + fatal(0x6009, "loadDictKeyByIndex(): invalid container type"); +} + + +void CodeGen::loadSubvec() +{ + Type* contType = stkType(3); + Type* left = stkType(2); + Type* right = stkType(); + bool tail = right->isVoid(); + if (!tail) + implicitCast(left); + if (contType->isAnyVec()) + { + if (!left->isInt()) + error("Vector index type mismatch"); + stkPop(); + stkPop(); + stkPop(); + addOp(contType, contType->isByteVec() ? opSubstr : opSubvec); + } + else + error("Vector/string type expected"); +} + + +void CodeGen::length() +{ + // NOTE: len() for sets and dicts is not a language feature, it's needed for 'for' loops + Type* type = stkType(); + if (type->isNullCont()) + { + undoSubexpr(); + loadConst(queenBee->defInt, 0); + } + else if (type->isByteSet()) + { + undoSubexpr(); + loadConst(queenBee->defInt, POrdinal(PContainer(type)->index)->getRange()); + } + else + { + OpCode op = opInv; + if (type->isAnySet()) + op = opSetLen; + else if (type->isAnyVec() || type->isByteDict()) + op = type->isByteVec() ? opStrLen : opVecLen; + else if (type->isAnyDict()) + op = opDictLen; + else + error("len() expects vector or string"); + stkPop(); + addOp(queenBee->defInt, op); + } +} + + +void CodeGen::lo() +{ + Type* type = stkType(); + if (type->isTypeRef()) + loadConst(queenBee->defInt, undoOrdTypeRef()->left); + else if (type->isNullCont() || type->isAnyVec()) + { + undoSubexpr(); + loadConst(queenBee->defInt, 0); + } + else if (type->isRange()) + { + stkPop(); + addOp(queenBee->defInt, opRangeLo); + } + else + error("lo() expects vector, string or ordinal type reference"); +} + + +void CodeGen::hi() +{ + Type* type = stkType(); + if (type->isTypeRef()) + loadConst(queenBee->defInt, undoOrdTypeRef()->right); + else if (type->isNullCont()) + { + undoSubexpr(); + loadConst(queenBee->defInt, -1); + } + else if (type->isAnyVec()) + { + stkPop(); + addOp(queenBee->defInt, type->isByteVec() ? opStrHi : opVecHi); + } + else if (type->isRange()) + { + stkPop(); + addOp(queenBee->defInt, opRangeHi); + } + else + error("hi() expects vector, string or ordinal type reference"); +} + + +Container* CodeGen::elemToVec(Container* vecType) +{ + Type* elemType = stkType(); + if (vecType) + { + if (!vecType->isAnyVec()) + error("Vector type expected"); + implicitCast(vecType->elem, "Vector/string element type mismatch"); + } + else + vecType = elemType->deriveVec(typeReg); + stkPop(); + addOp(vecType, vecType->isByteVec() ? opChrToStr : opVarToVec); + return vecType; +} + + +void CodeGen::elemCat() +{ + Type* vecType = stkType(2); + if (!vecType->isAnyVec()) + error("Vector/string type expected"); + implicitCast(PContainer(vecType)->elem, "Vector/string element type mismatch"); + stkPop(); + addOp(vecType->isByteVec() ? opChrCat: opVarCat); +} + + +void CodeGen::cat() +{ + Type* vecType = stkType(2); + if (!vecType->isAnyVec()) + error("Left operand is not a vector"); + implicitCast(vecType, "Vector/string types do not match"); + stkPop(); + addOp(vecType->isByteVec() ? opStrCat : opVecCat); +} + + +Container* CodeGen::elemToSet() +{ + Type* elemType = stkType(); + Container* setType = elemType->deriveSet(typeReg); + stkPop(); + addOp(setType, setType->isByteSet() ? opElemToByteSet : opElemToSet); + return setType; +} + + +Container* CodeGen::rangeToSet() +{ + Type* left = stkType(2); + if (!left->isAnyOrd()) + error("Non-ordinal range bounds"); + if (!left->canAssignTo(stkType())) + error("Incompatible range bounds"); + Container* setType = left->deriveSet(typeReg); + if (!setType->isByteSet()) + error("Invalid element type for ordinal set"); + stkPop(); + stkPop(); + addOp(setType, opRngToByteSet); + return setType; +} + + +void CodeGen::setAddElem() +{ + Type* setType = stkType(2); + if (!setType->isAnySet()) + error("Set type expected"); + implicitCast(PContainer(setType)->index, "Set element type mismatch"); + stkPop(); + addOp(setType->isByteSet() ? opByteSetAddElem : opSetAddElem); +} + + +void CodeGen::checkRangeLeft() +{ + Type* setType = stkType(2); + if (!setType->isByteSet()) + error("Byte set type expected"); + implicitCast(PContainer(setType)->index, "Set element type mismatch"); +} + + +void CodeGen::setAddRange() +{ + Type* setType = stkType(3); + if (!setType->isByteSet()) + error("Byte set type expected"); + implicitCast(PContainer(setType)->index, "Set element type mismatch"); + stkPop(); + stkPop(); + addOp(opByteSetAddRng); +} + + +Container* CodeGen::pairToDict() +{ + Type* val = stkType(); + Type* key = stkType(2); + Container* dictType = val->deriveContainer(typeReg, key); + stkPop(); + stkPop(); + addOp(dictType, dictType->isByteDict() ? opPairToByteDict : opPairToDict); + return dictType; +} + + +void CodeGen::checkDictKey() +{ + Type* dictType = stkType(2); + if (!dictType->isAnyDict()) + error("Dictionary type expected"); + implicitCast(PContainer(dictType)->index, "Dictionary key type mismatch"); +} + + +void CodeGen::dictAddPair() +{ + Type* dictType = stkType(3); + if (!dictType->isAnyDict()) + error("Dictionary type expected"); + implicitCast(PContainer(dictType)->elem, "Dictionary element type mismatch"); + stkPop(); + stkPop(); + addOp(dictType->isByteDict() ? opByteDictAddPair : opDictAddPair); +} + + +void CodeGen::inCont() +{ + Type* contType = stkPop(); + Type* elemType = stkPop(); + OpCode op = opInv; + if (contType->isAnySet()) + op = contType->isByteSet() ? opInByteSet : opInSet; + else if (contType->isAnyDict()) + op = contType->isByteDict() ? opInByteDict : opInDict; + else + error("Set/dict type expected"); + if (!elemType->canAssignTo(PContainer(contType)->index)) + error("Key type mismatch"); + addOp(queenBee->defBool, op); +} + + +void CodeGen::inBounds() +{ + Type* type = undoOrdTypeRef(); + Type* elemType = stkPop(); + if (!elemType->isAnyOrd()) + error("Ordinal type expected"); + addOp(queenBee->defBool, opInBounds, POrdinal(type)); +} + + +void CodeGen::inRange() +{ + Type* right = stkPop(); + Type* left = stkPop(); + if (!right->isRange()) + error("Range type expected"); + if (!left->canAssignTo(PRange(right)->elem)) + error("Range element type mismatch"); + addOp(queenBee->defBool, opInRange); +} + + +void CodeGen::inRange2(bool isCaseLabel) +{ + Type* right = stkPop(); + Type* left = stkPop(); + Type* elem = isCaseLabel ? stkType() : stkPop(); + if (!left->canAssignTo(right)) + error("Incompatible range bounds"); + if (!elem->canAssignTo(left)) + error("Element type mismatch"); + if (!elem->isAnyOrd() || !left->isAnyOrd() || !right->isAnyOrd()) + error("Ordinal type expected"); + addOp(queenBee->defBool, isCaseLabel ? opCaseRange : opInRange2); +} + + +void CodeGen::loadFifo(Fifo* type) +{ + addOp(type, type->isByteFifo() ? opLoadCharFifo : opLoadVarFifo, type); +} + + +Fifo* CodeGen::elemToFifo() +{ + Type* elem = stkPop(); + Fifo* fifoType = elem->deriveFifo(codeOwner); + addOp(fifoType, opElemToFifo, fifoType); + return fifoType; +} + + +void CodeGen::fifoEnq() +{ + Type* fifoType = stkType(2); + if (!fifoType->isAnyFifo()) + error("Fifo type expected"); + implicitCast(PFifo(fifoType)->elem, "Fifo element type mismatch"); + stkPop(); + stkPop(); + addOp(fifoType, fifoType->isByteFifo() ? opFifoEnqChar : opFifoEnqVar); +} + + +void CodeGen::fifoPush() +{ + Type* fifoType = stkType(2); + if (!fifoType->isAnyFifo()) + error("'<<' expects FIFO type"); + Type* right = stkType(); + // TODO: what about conversions like in C++? probably Nah. + if (right->isVectorOf(PFifo(fifoType)->elem)) + { + stkPop(); + stkPop(); + addOp(fifoType, fifoType->isByteFifo() ? opFifoEnqChars : opFifoEnqVars); + } + else if (tryImplicitCast(PFifo(fifoType)->elem)) + fifoEnq(); + else + error("FIFO element type mismatch"); +} + + +void CodeGen::fifoDeq() +{ + Type* fifoType = stkType(); + if (!fifoType->isAnyFifo()) + error("Fifo type expected"); + stkPop(); + addOp(PFifo(fifoType)->elem, + fifoType->isByteFifo() ? opFifoDeqChar : opFifoDeqVar); +} + + +void CodeGen::fifoToken() +{ + Type* setType = stkType(); + if (!setType->isByteSet()) + error("Small ordinal set expected"); + Type* fifoType = stkType(2); + if (!fifoType->isByteFifo()) + error("Small ordinal FIFO expected"); + if (!PContainer(setType)->index->canAssignTo(PFifo(fifoType)->elem)) + error("Set and FIFO element type mismatch"); + stkPop(); + stkPop(); + addOp(PFifo(fifoType)->elem->deriveVec(typeReg), opFifoCharToken); +} + + +void CodeGen::arithmBinary(OpCode op) +{ + assert(op >= opAdd && op <= opBitShr); + Type* right = stkPop(); + Type* left = stkPop(); + if (!right->isInt() || !left->isInt()) + error("Operand types do not match binary operator"); + addOp(left->identicalTo(right) ? left : queenBee->defInt, op); +} + + +void CodeGen::arithmUnary(OpCode op) +{ + assert(op >= opNeg && op <= opNot); + Type* type = stkType(); + if (!type->isInt()) + error("Operand type doesn't match unary operator"); + addOp(op); +} + + +void CodeGen::cmp(OpCode op) +{ + assert(isCmpOp(op)); + Type* left = stkType(2); + implicitCast(left, "Type mismatch in comparison"); + Type* right = stkType(); + if (left->isAnyOrd() && right->isAnyOrd()) + addOp(opCmpOrd); + else if (left->isByteVec() && right->isByteVec()) + addOp(opCmpStr); + else + { + if (op != opEqual && op != opNotEq) + error("Only equality can be tested for this type"); + addOp(opCmpVar); + } + stkPop(); + stkPop(); + addOp(queenBee->defBool, op); +} + + +void CodeGen::caseCmp() +{ + Type* left = stkType(2); + implicitCast(left, "Type mismatch in comparison"); + Type* right = stkPop(); + if (left->isAnyOrd() && right->isAnyOrd()) + addOp(queenBee->defBool, opCaseOrd); + else if (left->isByteVec() && right->isByteVec()) + addOp(queenBee->defBool, opCaseStr); + else + addOp(queenBee->defBool, opCaseVar); +} + + +void CodeGen::_not() +{ + Type* type = stkType(); + if (type->isInt()) + addOp(opBitNot); + else + { + implicitCast(queenBee->defBool, "Boolean or integer operand expected"); + addOp(opNot); + } +} + + +void CodeGen::stkVarCmp(StkVar* var, OpCode op) +{ + // implicitCast(var->type, "Type mismatch in comparison"); + if (!stkType()->isAnyOrd() || !var->type->isAnyOrd()) + fatal(0x6007, "localVarCmp(): unsupported type"); + stkPop(); + if (op == opGreaterThan) + op = opStkVarGt; + else if (op == opGreaterEq) + op = opStkVarGe; + else + fatal(0x6007, "localVarCmp(): unsupported opcode"); + assert(var->id >= 0 && var->id < 255); + addOp(queenBee->defBool, op, var->id); +} + + +void CodeGen::stkVarCmpLength(StkVar* var, StkVar* contVar) +{ + // TODO: optimize (single instruction?) + loadStkVar(contVar); + length(); + stkVarCmp(var, opGreaterEq); +} + + +void CodeGen::boolJump(memint target, OpCode op) +{ + assert(isBoolJump(op)); + implicitCast(queenBee->defBool, "Boolean expression expected"); + stkPop(); + _jump(target, op); +} + + +memint CodeGen::boolJumpForward(OpCode op) +{ + assert(isBoolJump(op)); + implicitCast(queenBee->defBool, "Boolean expression expected"); + stkPop(); + return jumpForward(op); +} + + +memint CodeGen::jumpForward(OpCode op) +{ + assert(isJump(op)); + memint pos = getCurrentOffs(); + addOp(op, 0); + return pos; +} + + +void CodeGen::resolveJump(memint target) +{ + assert(target <= getCurrentOffs() - 1 - memint(sizeof(jumpoffs))); + memint offs = getCurrentOffs() - (target + codeseg.opLenAt(target)); + if (offs > 32767) + error("Jump target is too far away"); + codeseg.jumpOffsAt(target) = offs; +} + + +void CodeGen::_jump(memint target, OpCode op) +{ + assert(target <= getCurrentOffs() - 1 - memint(sizeof(jumpoffs))); + memint offs = target - (getCurrentOffs() + codeseg.opLen(op)); + if (offs < -32768) + error("Jump target is too far away"); + addOp(op, jumpoffs(offs)); +} + + +void CodeGen::linenum(integer n) +{ + addOp(opLineNum, n); +} + + +void CodeGen::assertion(integer ln, const str& cond) +{ + implicitCast(queenBee->defBool, "Boolean expression expected for 'assert'"); + stkPop(); + addOp(opAssert); + add(ln); + add(cond); +} + + +void CodeGen::dumpVar(const str& expr) +{ + Type* type = stkPop(); + addOp(opDump, expr.obj); + add(type); +} + + +void CodeGen::programExit() +{ + stkPop(); + addOp(opExit); +} + + +// --- ASSIGNMENTS --------------------------------------------------------- // + + +static void errorLValue() + { throw emessage("Not an l-value"); } + +static void errorNotAddressableElem() + { throw emessage("Not an addressable container element"); } + +static void errorNotInsertableElem() + { throw emessage("Not an insertable location"); } + + +static OpCode loaderToStorer(OpCode op) +{ + switch (op) + { + case opLoadInnerVar: return opStoreInnerVar; + case opLoadOuterVar: return opStoreOuterVar; + case opLoadStkVar: return opStoreStkVar; + case opLoadArgVar: return opStoreArgVar; + case opLoadPtrVar: return opStorePtrVar; + case opLoadResultVar: return opStoreResultVar; + case opLoadMember: return opStoreMember; + case opDeref: return opStoreRef; + // end grounded loaders + case opStrElem: return opStoreStrElem; + case opVecElem: return opStoreVecElem; + case opDictElem: return opStoreDictElem; + case opByteDictElem: return opStoreByteDictElem; + default: + errorLValue(); + return opInv; + } +} + + +static OpCode loaderToLea(OpCode op) +{ + switch (op) + { + case opLoadInnerVar: return opLeaInnerVar; + case opLoadOuterVar: return opLeaOuterVar; + case opLoadStkVar: return opLeaStkVar; + case opLoadArgVar: return opLeaArgVar; + case opLoadPtrVar: return opLeaPtrVar; + case opLoadResultVar: return opLeaResultVar; + case opLoadMember: return opLeaMember; + case opDeref: return opLeaRef; + default: + errorLValue(); + return opInv; + } +} + + +static OpCode loaderToInserter(OpCode op) +{ + switch (op) + { + case opStrElem: return opStrIns; + case opVecElem: return opVecIns; + case opSubstr: return opSubstrReplace; + case opSubvec: return opSubvecReplace; + default: + errorNotInsertableElem(); + return opInv; + } +} + + +static OpCode loaderToDeleter(OpCode op) +{ + switch (op) + { + case opStrElem: return opDelStrElem; + case opVecElem: return opDelVecElem; + case opSubstr: return opDelSubstr; + case opSubvec: return opDelSubvec; + case opDictElem: return opDelDictElem; + case opByteDictElem: return opDelByteDictElem; + case opSetElem: return opDelSetElem; + case opByteSetElem: return opDelByteSetElem; + default: + errorNotAddressableElem(); + return opInv; + } +} + + +void CodeGen::toLea() +{ + // Note that the sim stack doesn't change even though the value is an + // effective address (pointer) now + memint offs = stkLoaderOffs(); + codeseg.replaceOpAt(offs, loaderToLea(codeseg.opAt(offs))); +} + + +void CodeGen::prevToLea() +{ + memint offs = stkPrevLoaderOffs(); + codeseg.replaceOpAt(offs, loaderToLea(codeseg.opAt(offs))); +} + + +str CodeGen::lvalue() +{ + memint offs = stkLoaderOffs(); + OpCode loader = codeseg.opAt(offs); + if (isGroundedLoader(loader)) + { + // Plain assignment to a "grounded" variant: remove the loader and + // return the corresponding storer to be appended later at the end + // of the assignment statement. + } + else + { + // A more complex assignment case: look at the previous loader - it + // should be a grounded one, transform it to its LEA equivalent, then + // transform/move the last loader like in the previous case. + prevToLea(); + } + OpCode storer = loaderToStorer(loader); + codeseg.replaceOpAt(offs, storer); + return codeseg.cutOp(offs); +} + + +void CodeGen::assign(const str& storerCode) +{ + assert(!storerCode.empty()); + Type* dest = stkType(2); + if (dest->isVoid()) // Don't remember why it's here. Possibly because of set elem selection + error("Destination is void type"); + implicitCast(dest, "Type mismatch in assignment"); + codeseg.append(storerCode); + stkPop(); + stkPop(); +} + + +str CodeGen::arithmLvalue(Token tok) +{ + // Like with lvalue(), returns the storer code to be processed by assign() + assert(tok >= tokAddAssign && tok <= tokModAssign); + toLea(); + OpCode op = OpCode(opAddAssign + (tok - tokAddAssign)); + memint offs = getCurrentOffs(); + codeseg.append(op); + return codeseg.cutOp(offs); +} + + +void CodeGen::catLvalue() +{ + if (!stkType()->isAnyVec()) + error("'|=' expects vector/string type"); + toLea(); +} + + +void CodeGen::catAssign() +{ + Type* left = stkType(2); + if (!left->isAnyVec()) + error("'|=' expects vector/string type"); + Type* right = stkType(); + if (right->canAssignTo(PContainer(left)->elem)) + addOp(left->isByteVec() ? opChrCatAssign : opVarCatAssign); + else + { + implicitCast(left, "Type mismatch in in-place concatenation"); + addOp(left->isByteVec() ? opStrCatAssign : opVecCatAssign); + } + stkPop(); + stkPop(); +} + + +str CodeGen::insLvalue() +{ + prevToLea(); + memint offs = stkLoaderOffs(); + OpCode inserter = loaderToInserter(codeseg.opAt(offs)); + codeseg.replaceOpAt(offs, inserter); + return codeseg.cutOp(offs); +} + + +void CodeGen::insAssign(const str& storerCode) +{ + assert(!storerCode.empty()); + Type* left = stkType(2); + Type* right = stkType(); + // This one is a little bit messy. If the lvalue is an element selection + // then 'left' is the element type; otherwise if it's a subvec/substr + // selection (s[i..j]), then 'left' is vector/string type. At the same time, + // we need to support both vector and element cases on the right. The below + // code somehow works correctly but I don't like all this. + if (!right->isVectorOf(left)) + implicitCast(left, "Type mismatch in 'ins'"); + codeseg.append(storerCode); + stkPop(); + stkPop(); +} + + +void CodeGen::deleteContainerElem() +{ + prevToLea(); + memint offs = stkLoaderOffs(); + OpCode deleter = loaderToDeleter(codeseg.opAt(offs)); + codeseg.replaceOpAt(offs, deleter); + stkPop(); +} + + +// --- FUNCTIONS, CALLS ---------------------------------------------------- // + + +void CodeGen::_popArgs(FuncPtr* proto) +{ + // Pop arguments off the simulation stack + for (memint i = proto->formalArgs.size(); i--; ) + { +#ifdef DEBUG + Type* argType = proto->formalArgs[i]->type; + if (argType && !stkType()->canAssignTo(argType)) + error("Argument type mismatch"); // shouldn't happen, checked by the compiler earlier +#endif + stkPop(); + } +} + + +void CodeGen::call(FuncPtr* proto) +{ + _popArgs(proto); + + // Remove the opMk*FuncPtr and append a corresponding caller. Note that + // opcode arguments for funcptr loaders and their respective callers + // should match. + assert(stkType()->isFuncPtr()); + OpCode op = opInv; + memint offs = stkLoaderOffs(); + switch (codeseg.opAt(offs)) + { + case opLoadOuterFuncPtr: op = opSiblingCall; break; + case opLoadInnerFuncPtr: op = opChildCall; break; + case opLoadStaticFuncPtr: op = opStaticCall; break; + case opMkFuncPtr: op = opMethodCall; break; + case opMkFarFuncPtr: op = opFarMethodCall; break; + default: ; // leave op = opInv + } + + stkPop(); // funcptr; arguments are gone already + if (op != opInv) + { + codeseg.replaceOpAt(offs, op); // replace funcptr loader with a call op + str callCode = codeseg.cutOp(offs); // and move it to the end (after the actual args) + if (proto->returns) + addOp(proto->returnType, callCode); + else + { + codeseg.append(callCode); + throw evoidfunc(); + } + } + + else // indirect call + { + if (proto->returns) + addOp(proto->returnType, opCall, proto->popArgCount); + else + { + addOp(opCall, proto->popArgCount); + throw evoidfunc(); + } + } +} + + +void CodeGen::staticCall(State* callee) +{ + _popArgs(callee->prototype); + if (callee->prototype->returns) + addOp(callee->prototype->returnType, opStaticCall, callee); + else + { + addOp(opStaticCall, callee); + throw evoidfunc(); + } +} + + +void CodeGen::end() +{ + codeseg.close(); + assert(getStackLevel() == locals); +} + diff --git a/src/vminfo.cpp b/src/vminfo.cpp new file mode 100644 index 0000000..f9ed64e --- /dev/null +++ b/src/vminfo.cpp @@ -0,0 +1,336 @@ + +#include "vm.h" + + +#define OP(o,a) { #o, op##o, arg##a } + + +umemint opArgSizes[argMax] = + { + 0, + sizeof(Type*), sizeof(State*), sizeof(State*) + sizeof(uchar), sizeof(Fifo*), + sizeof(uchar), sizeof(integer), sizeof(str), + sizeof(uchar), sizeof(uchar) + sizeof(object*), + sizeof(uchar), sizeof(uchar), sizeof(uchar), sizeof(uchar), sizeof(uchar), + sizeof(jumpoffs), sizeof(integer), + sizeof(integer) + sizeof(str), // argAssert + sizeof(str) + sizeof(Type*), // argDump + }; + + +OpInfo opTable[] = +{ + OP(Inv0, None), // + OP(End, None), // + OP(Exit, None), // + + // --- 2. CONST LOADERS + // sync with isUndoableLoadOp() + OP(LoadTypeRef, Type), // [Type*] +obj + OP(LoadNull, None), // +null + OP(Load0, None), // +int + OP(Load1, None), // +int + OP(LoadByte, UInt8), // [int:u8] +int + OP(LoadOrd, Int), // [int] +int + OP(LoadStr, Str), // [str] +str + OP(LoadEmptyVar, VarType8), // [variant::Type:8] + var + OP(LoadConstObj, VarTypeObj), // [variant::Type:u8, object*] +var + OP(LoadOuterObj, None), // +stateobj + OP(LoadDataSeg, None), // +module-obj + OP(LoadOuterFuncPtr, State),// [State*] +funcptr + OP(LoadInnerFuncPtr, State),// [State*] +funcptr + OP(LoadStaticFuncPtr, State),// [State*] +funcptr + OP(LoadFuncPtrErr, State), // [State*] +funcptr + OP(LoadCharFifo, Fifo), // [Fifo*] +fifo + OP(LoadVarFifo, Fifo), // [Fifo*] +fifo + + // --- 3. DESIGNATOR LOADERS + OP(LoadInnerVar, InnerIdx), // [inner.idx:u8] +var + OP(LoadOuterVar, OuterIdx), // [outer.idx:u8] +var + OP(LoadStkVar, StkIdx), // [stk.idx:u8] +var + OP(LoadArgVar, ArgIdx), // [arg.idx:u8] +var + OP(LoadPtrVar, ArgIdx), // [arg.idx:u8] +var + OP(LoadResultVar, None), // +var + OP(LoadVarErr, None), // + // --- end undoable loaders + OP(LoadMember, StateIdx), // [stateobj.idx:u8] -stateobj +var + OP(Deref, None), // -ref +var + + OP(LeaInnerVar, InnerIdx), // [inner.idx:u8] +obj(0) +ptr + OP(LeaOuterVar, OuterIdx), // [outer.idx:u8] +obj(0) +ptr + OP(LeaStkVar, StkIdx), // [stk.idx:u8] +obj(0) +ptr + OP(LeaArgVar, ArgIdx), // [arg.idx:u8] +obj(0) +ptr + OP(LeaPtrVar, ArgIdx), // [arg.idx:u8] +obj +ptr + OP(LeaResultVar, None), // +var + OP(LeaMember, StateIdx), // [stateobj.idx:u8] -stateobj +stateobj +ptr + OP(LeaRef, None), // -ref +ref +ptr + + // --- 4. STORERS + OP(InitInnerVar, InnerIdx), // [inner.idx:u8] -var + // OP(InitStkVar, StkIdx), // [stk.idx:u8] -var + // --- begin grounded storers + OP(StoreInnerVar, InnerIdx),// [inner.idx:u8] -var + OP(StoreOuterVar, OuterIdx),// [outer.idx:u8] -var + OP(StoreStkVar, StkIdx), // [stk.idx:u8] -var + OP(StoreArgVar, ArgIdx), // [arg.idx:u8] -var + OP(StorePtrVar, ArgIdx), // [arg.idx:u8] -var + OP(StoreResultVar, None), // -var + OP(StoreMember, StateIdx), // [stateobj.idx:u8] -var -stateobj + OP(StoreRef, None), // -var -ref + // --- end grounded storers + OP(IncStkVar, StkIdx), // [stk.idx:u8] + + // --- 5. DESIGNATOR OPS, MISC + OP(MkRange, None), // -int -int +range + OP(MkRef, None), // -var +ref + OP(MkFuncPtr, State), // [State*] -obj +funcptr + OP(MkFarFuncPtr, FarState), // [State*, datasegidx:u8] -obj +funcptr + OP(NonEmpty, None), // -var +bool + OP(Pop, None), // -var + OP(PopPod, None), // -int + OP(Cast, Type), // [Type*] -var +var + OP(IsType, Type), // [Type*] -var +bool + OP(ToStr, Type), // [Type*] -var +str + + // --- 6. STRINGS, VECTORS + OP(ChrToStr, None), // -int +str + OP(ChrCat, None), // -int -str +str + OP(StrCat, None), // -str -str +str + OP(VarToVec, None), // -var +vec + OP(VarCat, None), // -var -vec +vec + OP(VecCat, None), // -vec -vec +vec + OP(StrLen, None), // -str +int + OP(VecLen, None), // -str +int + OP(StrHi, None), // -str +int + OP(VecHi, None), // -str +int + OP(StrElem, None), // -int -str +int + OP(VecElem, None), // -int -vec +var + OP(Substr, None), // -{int,void} -int -str +str + OP(Subvec, None), // -{int,void} -int -vec +vec + OP(StoreStrElem, None), // -char -int -ptr -obj + OP(StoreVecElem, None), // -var -int -ptr -obj + OP(DelStrElem, None), // -int -ptr -obj + OP(DelVecElem, None), // -int -ptr -obj + OP(DelSubstr, None), // -{int,void} -int -ptr -obj + OP(DelSubvec, None), // -{int,void} -int -ptr -obj + OP(StrIns, None), // -char -int -ptr -obj + OP(VecIns, None), // -var -int -ptr -obj + OP(SubstrReplace, None), // -str -void -int -ptr -obj + OP(SubvecReplace, None), // -vec -void -int -ptr -obj + OP(ChrCatAssign, None), // -char -ptr -obj + OP(StrCatAssign, None), // -str -ptr -obj + OP(VarCatAssign, None), // -var -ptr -obj + OP(VecCatAssign, None), // -vec -ptr -obj + + // --- 7. SETS + OP(ElemToSet, None), // -var +set + OP(SetAddElem, None), // -var -set + set + OP(ElemToByteSet, None), // -int +set + OP(RngToByteSet, None), // -int -int +set + OP(ByteSetAddElem, None), // -int -set +set + OP(ByteSetAddRng, None), // -int -int -set +set + OP(InSet, None), // -set -var +bool + OP(InByteSet, None), // -set -int +bool + OP(InBounds, Type), // [Ordinal*] -int +bool + OP(InRange, None), // -range -int +bool + OP(RangeLo, None), // -range +int + OP(RangeHi, None), // -range +int + OP(InRange2, None), // -int -int -int +bool + OP(SetElem, None), // -var -set +void + OP(ByteSetElem, None), // -int -set +void + OP(DelSetElem, None), // -var -ptr -obj + OP(DelByteSetElem, None), // -int -ptr -obj + OP(SetLen, None), // -set +int + OP(SetKey, None), // -int -set +var + + // --- 8. DICTIONARIES + OP(PairToDict, None), // -var -var +dict + OP(DictAddPair, None), // -var -var -dict +dict + OP(PairToByteDict, None), // -var -int +vec + OP(ByteDictAddPair, None), // -var -int -vec +vec + OP(DictElem, None), // -var -dict +var + OP(ByteDictElem, None), // -int -dict +var + OP(InDict, None), // -dict -var +bool + OP(InByteDict, None), // -dict -int +bool + OP(StoreDictElem, None), // -var -var -ptr -obj + OP(StoreByteDictElem, None),// -var -int -ptr -obj + OP(DelDictElem, None), // -var -ptr -obj + OP(DelByteDictElem, None), // -int -ptr -obj + OP(DictLen, None), // -dict +int + OP(DictElemByIdx, None), // -int -dict +var + OP(DictKeyByIdx, None), // -int -dict +var + + // --- 9. FIFOS + OP(ElemToFifo, Fifo), // [Fifo*] -var +fifo + OP(FifoEnqChar, None), // -char -fifo +fifo + OP(FifoEnqVar, None), // -var -fifo +fifo + OP(FifoEnqChars, None), // -str -fifo +fifo + OP(FifoEnqVars, None), // -vec -fifo +fifo + OP(FifoDeqChar, None), // -fifo +char + OP(FifoDeqVar, None), // -fifo +char + OP(FifoCharToken, None), // -charset -fifo +str + + // --- 10. ARITHMETIC + OP(Add, None), // -int -int +int + OP(Sub, None), // -int -int +int + OP(Mul, None), // -int -int +int + OP(Div, None), // -int -int +int + OP(Mod, None), // -int -int +int + OP(BitAnd, None), // -int -int +int + OP(BitOr, None), // -int -int +int + OP(BitXor, None), // -int -int +int + OP(BitShl, None), // -int -int +int + OP(BitShr, None), // -int -int +int + OP(Neg, None), // -int +int + OP(BitNot, None), // -int +int + OP(Not, None), // -bool +bool + OP(AddAssign, None), // -int -ptr -obj + OP(SubAssign, None), // -int -ptr -obj + OP(MulAssign, None), // -int -ptr -obj + OP(DivAssign, None), // -int -ptr -obj + OP(ModAssign, None), // -int -ptr -obj + + // --- 11. BOOLEAN + OP(CmpOrd, None), // -int, -int, +{-1,0,1} + OP(CmpStr, None), // -str, -str, +{-1,0,1} + OP(CmpVar, None), // -var, -var, +{0,1} + OP(Equal, None), // -int, +bool + OP(NotEq, None), // -int, +bool + OP(LessThan, None), // -int, +bool + OP(LessEq, None), // -int, +bool + OP(GreaterThan, None), // -int, +bool + OP(GreaterEq, None), // -int, +bool + OP(CaseOrd, None), // -int -int +int +bool + OP(CaseRange, None), // -int -int -int +int +bool + OP(CaseStr, None), // -str -str +str +bool + OP(CaseVar, None), // -var -var +var +bool + OP(StkVarGt, StkIdx), // [stk.idx:u8] -int +bool + OP(StkVarGe, StkIdx), // [stk.idx:u8] -int +bool + + // --- 12. JUMPS, CALLS + OP(Jump, Jump16), // [dst:s16] + OP(JumpFalse, Jump16), // [dst:s16] -bool + OP(JumpTrue, Jump16), // [dst:s16] -bool + OP(JumpAnd, Jump16), // [dst:s16] (-)bool + OP(JumpOr, Jump16), // [dst:s16] (-)bool + + OP(ChildCall, State), // [State*] -var -var ... {+var} + OP(SiblingCall, State), // [State*] -var -var ... {+var} + OP(StaticCall, State), // [State*] -var -var ... {+var} + OP(MethodCall, State), // [State*] -var -var -obj ... {+var} + OP(FarMethodCall, FarState),// [State*, datasegidx:u8] -var -var -obj ... {+var} + OP(Call, UInt8), // [argcount:u8] -var -var -funcptr {+var} + + // --- 13. DEBUGGING, DIAGNOSTICS + OP(LineNum, LineNum), // [linenum:int] + OP(Assert, Assert), // [linenum:int, cond:str] -bool + OP(Dump, Dump), // [expr:str, type:Type*] -var + OP(Inv, None), // not used +}; + + +#ifdef DEBUG +static struct vmdebuginit +{ + vmdebuginit() + { + for (int i = 0; i <= opInv; i++) + { + if (opTable[i].op != i) + { + fprintf(stderr, "VMInfo table inconsistency"); + exit(201); + } + } + } +} _vmdebuginit; +#endif + + +#define ADV(T) \ + (ip += sizeof(T), *(T*)(ip - sizeof(T))) + + +static const char* varTypeStr(variant::Type type) +{ +#define _C(t) case variant::t: return #t; + switch(type) + { + _C(VOID) + _C(ORD) + _C(REAL) + _C(VARPTR) + _C(STR) + _C(RANGE) + _C(VEC) + _C(SET) + _C(ORDSET) + _C(DICT) + _C(REF) + _C(RTOBJ) + } + return false; +} + + +void CodeSeg::dump(fifo& stm) const +{ + if (code.empty()) + return; + const uchar* beginip = (const uchar*)code.data(); + const uchar* ip = beginip; + const uchar* endip = beginip + code.size(); + while (ip < endip) + { + if (*ip >= opMaxCode) + fatal(0x5101, "Corrupt code"); + const OpInfo& info = opTable[*ip]; + if (*ip == opLineNum) + { + ip++; + stm << "#LINENUM " << ADV(integer); + } + else + { + stm << to_string(ip - beginip, 16, 4, '0') << ":\t"; + ip++; + stm << info.name; + if (info.arg != argNone) + { + stm << '\t'; + if (strlen(info.name) < 8) + stm << '\t'; + } + switch (info.arg) + { + case argNone: break; + case argType: + case argFifo: ADV(Type*)->dumpDef(stm); break; + case argState: ADV(State*)->fqName(stm); break; + case argFarState: ADV(State*)->fqName(stm); stm << "[ds:" << ADV(uchar) << ']'; break; + case argUInt8: stm << to_quoted(*ip); stm << " (" << int(ADV(uchar)) << ')'; break; + case argInt: stm << ADV(integer); break; + case argStr: stm << to_quoted(ADV(str)); break; + case argVarType8: stm << varTypeStr(variant::Type(ADV(uchar))); break; + case argVarTypeObj: stm << "const "; + { uchar t = ADV(uchar); dumpVariant(stm, variant(variant::Type(t), ADV(object*)), NULL); } break; + case argInnerIdx: stm << "inner." << int(ADV(uchar)); break; + case argOuterIdx: stm << "outer." << int(ADV(uchar)); break; + case argStkIdx: stm << "local." << int(ADV(uchar)); break; + case argArgIdx: stm << "arg." << int(ADV(uchar)); break; + case argStateIdx: stm << "state." << int(ADV(uchar)); break; + case argJump16: stm << to_string(ip - beginip + ADV(jumpoffs), 16, 4, '0'); + case argLineNum: break; // handled above + case argAssert: + stm << state->parentModule->filePath; + stm << " (" << ADV(integer) << "): "; + stm << " \"" << ADV(str) << '"'; + break; + case argDump: stm << ADV(str) << ": "; ADV(Type*)->dumpDef(stm); break; + case argMax: break; + } + } + stm << endl; + } +} +