I have a little experience working with .hl7 data (format for transmitting health data), so I decided to try and write an .hl7 parser using fparsec.
A standard .hl7 segment is a single line with:
- First 3 chars are the header
- Next 5 chars are the separators (typically |^~\&)
- Fields / repetitions delimited by pipes
A segment has fields or repetitions (repeating fields). A field can have components. A component can have sub-components.
Sample .hl7 message header:
MSH|^~\\&|Location|SendingApp|DateTimeOfMessage
I am looking for feedback on data design (the types) and the functions.
type Subcomponent = {value:string; position:int}
type Component = {subcomponents: Subcomponent list; position:int}
type SingleField = {components: Component list; position:int}
type Field = Repetitions of SingleField list | SingleField of SingleField
type Segment = { name:string; fields:Field list; }
type Hl7Message = { segments:Segment list }
let hl7Seps = "|^~\\&"
let normalChar = noneOf hl7Seps
let unescape c = match c with
| 'F' -> '|'
| 'R' -> '~'
| 'S' -> '^'
| 'T' -> '&'
| 'E' -> '\\'
| c -> c
let escapedChar = attempt (pchar '\\' >>. anyChar |>> unescape .>> skipChar '\\') <|> pchar '\\'
let pHl7Element = manyChars (normalChar <|> escapedChar)
let pcomp = sepBy pHl7Element (pchar '&') |>> (fun vals -> List.mapi (fun i s -> {value = s; position = i}) vals)
let pfield = sepBy pcomp (pchar '^') |>> (fun comps -> List.mapi (fun i c -> {subcomponents = c; position = i}) comps)
let pRepsOrField = sepBy pfield (pchar '~') |>> (fun fields -> match fields.Length with
| 0 | 1 -> SingleField {components = fields.Item 0; position = 0}
| _ -> Repetitions (List.mapi (fun i c -> {components = c; position = i}) fields))
let pheader = anyString 3 |>> (fun name -> name)
let pSegment = pipe2 pheader (sepBy pRepsOrField (pchar '|')) (fun name repsOrFields -> {name = name; fields = repsOrFields})
test pSegment "EVN|A&1^B^C|123~456~789"