root/trunk/YamlReference/yaml2yeast/Main.hs

Revision 6, 10.7 KB (checked in by oren, 9 months ago)

Version 0.9 - compatible with the April 06, 2008 draft.

Line 
1-------------------------------------------------------------------------------
2-- |
3-- Module      :  yaml2yeast
4-- Copyright   :  (c) Oren Ben-Kiki 2007
5-- License     :  LGPL
6--
7-- Maintainer  :  yaml-oren@ben-kiki.org
8-- Stability   :  alpha
9-- Portability :  portable
10--
11-- Convert an input YAML file to a YEAST (YAML Elaborate Atomic Syntax Tokens).
12-- Command line options are:
13--
14--  [@-h@ @-?@ @--help@] Prints short usage message
15--
16--  [@-o@ @--output@ @file@] Specify output file (default is \"@-@\" for
17--  stdout)
18--
19--  [@-u@ @--unbuffered@] Disable @stdout@ buffering.
20--
21--  [@-p@ @--production@ @production@] Specify the top production (default is
22--  full YAML stream).
23--
24--  [@-n@ @--n-value@ @n@] Specify the /n/ parameter of the production.
25--
26--  [@-c@ @--c-value@ @c@] Specify the /c/ parameter of the production.
27--
28--  [@-s@ @--s-value@ @s@] Specify the /s/ parameter of the production.
29--
30--  [@-t@ @--t-value@ @t@] Specify the /t/ parameter of the production.
31--
32--  [@input@] Specify the input file (default is \"@-@\" for stdin).
33--
34-- The YEAST format is designed to allow trivial post-processing. Since YEAST
35-- contains all the syntactical information contained in the original YAML
36-- file, such processing can perform tasks which are impossible or difficult
37-- using higher-level YAML parsers, such as pretty-printing. For example, the
38-- @yeast2html@ program generates an HTML visualization of the syntactical
39-- structure of the original YAML file.
40--
41-- YEAST contains one line per token, where the first character is the token
42-- code and the following characters are the input characters contained in the
43-- token, if any. Non printable\/non ASCII characters (including line breaks)
44-- are escaped using either \"@\\x@/XX/\", \"@\\u@/XXXX/\" or
45-- \"@\\U@/XXXXXXXX/\" notation. Therefore YEAST files are restricted to 7-bit
46-- printable ASCII, making them sufficiently human-readable for debugging
47-- purposes.
48--
49-- The YEAST token codes are:
50--
51--  [@U@] BOM, contains \"@TF8@\", \"@TF16LE@\" or \"@TF16BE@\"
52--
53--  [@T@] Contains presrved content text characters
54--
55--  [@t@] Contains non-content (meta) text characters
56--
57--  [@B@] Contains preserved content line break
58--
59--  [@b@] Contains separation line break
60--
61--  [@L@] Contains line break normalized to content line feed
62--
63--  [@l@] Contains line break folded to content space
64--
65--  [@I@] Contains character indicating structure
66--
67--  [@w@] Contains separation white space
68--
69--  [@i@] Contains indentation spaces
70--
71--  [@K@] Document start marker
72--
73--  [@k@] Document end marker
74--
75--  [@E@] Begins escape sequence
76--
77--  [@e@] Ends escape sequence
78--
79--  [@C@] Begins comment
80--
81--  [@c@] Ends comment
82--
83--  [@D@] Begins directive
84--
85--  [@d@] Ends directive
86--
87--  [@G@] Begins tag
88--
89--  [@g@] Ends tag
90--
91--  [@H@] Begins tag handle
92--
93--  [@h@] Ends tag handle
94--
95--  [@A@] Begins anchor
96--
97--  [@a@] Ends anchor
98--
99--  [@P@] Begins node properties
100--
101--  [@p@] Ends node properties
102--
103--  [@R@] Begins alias (reference)
104--
105--  [@r@] Ends alias (reference)
106--
107--  [@S@] Begins scalar content
108--
109--  [@s@] Ends scalar content
110--
111--  [@Q@] Begins sequence content
112--
113--  [@q@] Ends sequence content
114--
115--  [@M@] Begins mapping content
116--
117--  [@m@] Ends mapping content
118--
119--  [@N@] Begins complete node
120--
121--  [@n@] Ends complete node
122--
123--  [@X@] Begins mapping key:value pair
124--
125--  [@x@] Ends mapping key:value pair
126--
127--  [@O@] Begins document
128--
129--  [@o@] Ends document
130--
131--  [@Y@] Begins YAML stream
132--
133--  [@y@] Ends YAML stream
134--
135--  [@!@] Parsing error at this point.
136--
137--  [@-@] Unparsed text following error point.
138--
139-- In addition, the following codes are used for testing partial productions
140-- and do not appear when parsing a complete YAML stream:
141--
142--  [@?@] Contains test characters otherwise unassigned
143--
144--  [@\/@] Value of detected parameters
145--
146-------------------------------------------------------------------------------
147module Main (main) where
148
149import           Control.Monad
150import qualified Data.ByteString.Lazy.Char8 as C
151import           System.Console.GetOpt
152import           System.Environment
153import           System.IO
154import           Text.Regex
155import           Text.Yaml.Reference
156
157
158-- | Command line flag.
159data Flag = Help                   -- ^ Request printing usage.
160          | Output String          -- ^ Specify output file name.
161          | Production String      -- ^ Specify start production name.
162          | Unbuffered             -- ^ Disable @stdout@ bufferting.
163          | Following              -- ^ Emit unparsed text following an error.
164          | ParamN (Maybe Int)     -- ^ Specify $n$ parameter.
165          | ParamT (Maybe Chomp)   -- ^ Specify $t$ parameter.
166          | ParamC (Maybe Context) -- ^ Specify $c$ parameter.
167          | Input String           -- ^ Specify input file name.
168    deriving Show
169
170-- | Command line options.
171optionDescriptions :: [OptDescr Flag]
172optionDescriptions = [
173      Option ['h', '?'] ["help"]
174             (NoArg Help)
175             "print usage and exit",
176      Option ['o'] ["output"]
177             (ReqArg Output "file")
178             "output file",
179      Option ['u'] ["unbuffered"]
180             (NoArg Unbuffered)
181             "disable stdout buffering",
182      Option ['f'] ["following"]
183             (NoArg Following)
184             "emit unparsed input following an error",
185      Option ['p'] ["production"]
186             (ReqArg Production "production")
187             "top production",
188      Option ['n'] ["n-value"]
189             (ReqArg (ParamN . readParameter) "indentation")
190             "n parameter value (indentation)",
191      Option ['c'] ["context"]
192             (ReqArg (ParamC . readParameter) "context")
193             "c parameter value (context)",
194      Option ['t'] ["chomp"]
195             (ReqArg (ParamT . readParameter) "chomp")
196             "t parameter value (chomp)"
197  ]
198
199-- | @testParameter parameter@ converts the /parameter/ to a typed value. Patch
200-- the @-@ characters in the /c/ parameter into @_@ to make it possible for the
201-- built-in lexer to handle them.
202readParameter :: (Read t) => String -> Maybe t
203readParameter text = Just $ read $ subRegex (mkRegex "([a-z])-") text "\\1_"
204
205-- | @usage@ returns the usage string for the program.
206usage :: String
207usage = usageInfo "yamlSyntax: [options] [file]" optionDescriptions
208
209-- | @collectFlags args@ converts the command line /args/ to list of 'Flag'.
210collectFlags :: [String] -> IO [Flag]
211collectFlags args =
212  case getOpt Permute optionDescriptions args of
213       (_,     _,     [error:errors]) -> ioError $ userError $ (concat [error:errors]) ++ usage
214       (flags, [],    [])             -> do return flags
215       (flags, [arg], [])             -> do return $ [Input arg] ++ flags
216       (flags, _,     [])             -> ioError $ userError "more than one input file"
217
218-- | Options controlling program behavior
219data Options = Options {
220      oToHelp        :: Bool,          -- ^ Whether to just print the usage.
221      oToUnbuffer    :: Bool,          -- ^ Whether to disable @stdout@ buffering.
222      oWithFollowing :: Bool,          -- ^ Whether to emit unparsed text following an error.
223      oInput         :: String,        -- ^ Name of input file ("-": @stdin@).
224      oOutput        :: String,        -- ^ Name of output file ("-": @stdout@).
225      oProduction    :: String,        -- ^ Name of start production.
226      oN             :: Maybe Int,     -- ^ N parameter, if any.
227      oC             :: Maybe Context, -- ^ C parameter, if any.
228      oT             :: Maybe Chomp    -- ^ T parameter, if any.
229  }
230
231-- | Default options if no flags are specified.
232defaultOptions = Options {
233      oToHelp        = False,
234      oToUnbuffer    = False,
235      oWithFollowing = False,
236      oInput         = "-",
237      oOutput        = "-",
238      oProduction    = "l-yaml-stream",
239      oN             = Nothing,
240      oC             = Nothing,
241      oT             = Nothing
242  }
243
244-- | @applyFlags flags options@ applies the specified /flags/ to the /options/.
245applyFlags :: [Flag] -> Options -> IO Options
246applyFlags []           options = return options
247applyFlags (flag:flags) options =
248  case flag of
249       Help            -> applyFlags flags options { oToHelp = True }
250       Unbuffered      -> applyFlags flags options { oToUnbuffer = True }
251       Following       -> applyFlags flags options { oWithFollowing = True }
252       Output name     -> applyFlags flags options { oOutput = name }
253       Input name      -> applyFlags flags options { oInput = name }
254       ParamN value    -> applyFlags flags options { oN = value }
255       ParamC value    -> applyFlags flags options { oC = value }
256       ParamT value    -> applyFlags flags options { oT = value }
257       Production name -> applyFlags flags options { oProduction = name }
258
259-- | @fromFile name@ returns the contents of the specified input file called
260-- /name/ (may be "-" for @stdin@).
261fromFile :: String -> IO C.ByteString
262fromFile name =
263  case name of
264       "-"  -> C.getContents
265       path -> C.readFile path
266
267-- | @intoFile name text@ writes the /text/ into the specified output file
268-- called /name/ (may be "-" for @stdout@).
269intoFile :: String -> String -> IO ()
270intoFile name text =
271  case name of
272       "-"  -> putStr text
273       path -> writeFile path $ text
274
275-- | @runWith options@ runs the program with the specified /options/.
276runWith :: Options -> IO ()
277runWith options =
278  do let resolved = case ((oN options), (oC options), (oT options)) of
279                         (Nothing,    Nothing,    Nothing) -> tokenizer       (oProduction options)
280                         (Just n,     Nothing,    Nothing) -> tokenizerWithN  (oProduction options) n
281                         (Nothing,    Just c,     Nothing) -> tokenizerWithC  (oProduction options) c
282                         (Nothing,    Nothing,    Just t)  -> tokenizerWithT  (oProduction options) t
283                         (Just n,     Just c,     Nothing) -> tokenizerWithNC (oProduction options) n c
284                         (Just n,     Nothing,    Just t)  -> tokenizerWithNT (oProduction options) n t
285                         _                                 -> error "No production with this combination of parameters"
286     case resolved of
287          Nothing     -> error $ "No production " ++ (oProduction options) ++ " or it doesn't take this combination of parameters"
288          Just parser -> do text <- fromFile (oInput options)
289                            let tokens = parser (oInput options) text (oWithFollowing options)
290                            intoFile (oOutput options) (showTokens tokens)
291
292-- | @main@ converts an input YAML file to YEAST tokens.
293main :: IO ()
294main = do args <- getArgs
295          flags <- collectFlags args
296          options <- applyFlags flags defaultOptions
297          when (oToUnbuffer options) $ hSetBuffering stdout NoBuffering
298          if oToHelp options
299             then putStrLn usage
300             else runWith options
Note: See TracBrowser for help on using the browser.